def test_channel_as_channel_success(self): instance_a = types.Artifact('MyTypeName') instance_b = types.Artifact('MyTypeName') chnl_original = channel.Channel('MyTypeName', artifacts=[instance_a, instance_b]) chnl_result = channel.as_channel(chnl_original) self.assertEqual(chnl_original, chnl_result)
def test_valid_channel(self): instance_a = types.Artifact('MyTypeName') instance_b = types.Artifact('MyTypeName') chnl = channel.Channel('MyTypeName', artifacts=[instance_a, instance_b]) self.assertEqual(chnl.type_name, 'MyTypeName') self.assertItemsEqual(chnl.get(), [instance_a, instance_b])
def testCsvExampleGenWrapper(self): input_base = types.Artifact(type_name='ExternalPath', split='') input_base.uri = '/path/to/dataset' with patch.object(executor, 'Executor', autospec=True) as _: wrapper = executor_wrappers.CsvExampleGenWrapper( argparse.Namespace( exec_properties=json.dumps(self.exec_properties), outputs=artifact_utils.jsonify_artifact_dict( {'examples': self.examples}), executor_class_path= ('tfx.components.example_gen.csv_example_gen.executor.Executor' ), input_base=json.dumps([input_base.json_dict()])), ) wrapper.run(output_basedir=self.output_basedir) # TODO(b/133011207): Validate arguments for executor and Do() method. metadata_file = os.path.join(self.output_basedir, 'output/ml_metadata/examples') expected_output_examples = types.Artifact(type_name='ExamplesPath', split='dummy') # Expect that span and path are resolved. expected_output_examples.span = 1 expected_output_examples.uri = ( '/path/to/output/csv_example_gen/examples/mock_workflow_id/dummy/' ) with tf.gfile.GFile(metadata_file) as f: self.assertEqual([expected_output_examples.json_dict()], json.loads(f.read()))
def test_do(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.gfile.MakeDirs(output_data_dir) # Create input dict. train_examples = types.Artifact(type_name='ExamplesPath', split='train') train_examples.uri = os.path.join(source_data_dir, 'csv_example_gen/train/') eval_examples = types.Artifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join(source_data_dir, 'csv_example_gen/eval/') train_stats = types.Artifact( type_name='ExampleStatisticsPath', split='train') train_stats.uri = os.path.join(output_data_dir, 'train', '') eval_stats = types.Artifact(type_name='ExampleStatisticsPath', split='eval') eval_stats.uri = os.path.join(output_data_dir, 'eval', '') input_dict = { 'input_data': [train_examples, eval_examples], } output_dict = { 'output': [train_stats, eval_stats], } # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties={}) # Check statistics_gen outputs. self._validate_stats_output(os.path.join(train_stats.uri, 'stats_tfrecord')) self._validate_stats_output(os.path.join(eval_stats.uri, 'stats_tfrecord'))
def setUp(self): self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.gfile.MakeDirs(self._output_data_dir) self._model_export = types.Artifact(type_name='ModelExportPath') self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._model_blessing = types.Artifact(type_name='ModelBlessingPath') self._input_dict = { 'model_export': [self._model_export], 'model_blessing': [self._model_blessing], } self._model_push = types.Artifact(type_name='ModelPushPath') self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.gfile.MakeDirs(self._model_push.uri) self._output_dict = { 'model_push': [self._model_push], } self._serving_model_dir = os.path.join(self._output_data_dir, 'serving_model_dir') tf.gfile.MakeDirs(self._serving_model_dir) self._exec_properties = { 'push_destination': json_format.MessageToJson( pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=self._serving_model_dir))), } self._executor = executor.Executor()
def testPipelineWithArtifactInfo(self): artifacts_collection = [types.Artifact('channel_one')] channel_one = types.Channel(type_name='channel_one', artifacts=artifacts_collection) component_a = _make_fake_component_instance( name='component_a', inputs={}, outputs={'one': channel_one}) component_b = _make_fake_component_instance( name='component_b', inputs={ 'a': component_a.outputs.one, }, outputs={}) my_pipeline = pipeline.Pipeline( pipeline_name='a', pipeline_root='b', components=[component_b, component_a], metadata_connection_config=self._metadata_connection_config) expected_artifact = types.Artifact('channel_one') expected_artifact.name = 'one' expected_artifact.pipeline_name = 'a' expected_artifact.pipeline_timestamp_ms = 0 expected_artifact.producer_component = 'component_a' self.assertItemsEqual(my_pipeline.components, [component_a, component_b]) self.assertEqual(component_a.outputs.one._artifacts[0].pipeline_name, 'a') self.assertEqual( component_a.outputs.one._artifacts[0].producer_component, component_a.component_id) self.assertEqual(component_a.outputs.one._artifacts[0].name, 'one') self.assertEqual(component_b.inputs.a._artifacts[0].pipeline_name, 'a') self.assertEqual(component_b.inputs.a._artifacts[0].producer_component, component_a.component_id) self.assertEqual(component_b.inputs.a._artifacts[0].name, 'one')
def test_fetch_previous_result(self): with metadata.Metadata(connection_config=self._connection_config) as m: # Create an 'previous' execution. exec_properties = {'log_root': 'path'} eid = m.register_execution( exec_properties=exec_properties, pipeline_info=self._pipeline_info, component_info=self._component_info) input_artifact = types.Artifact(type_name='ExamplesPath') m.publish_artifacts([input_artifact]) output_artifact = types.Artifact(type_name='ExamplesPath') input_artifacts = {'input': [input_artifact]} output_artifacts = {'output': [output_artifact]} m.publish_execution(eid, input_artifacts, output_artifacts) # Test previous_run. self.assertEqual( None, m.previous_execution( input_artifacts=input_artifacts, exec_properties={}, pipeline_info=self._pipeline_info, component_info=self._component_info)) self.assertEqual( None, m.previous_execution( input_artifacts={}, exec_properties=exec_properties, pipeline_info=self._pipeline_info, component_info=self._component_info)) self.assertEqual( None, m.previous_execution( input_artifacts=input_artifacts, exec_properties=exec_properties, pipeline_info=self._pipeline_info, component_info=data_types.ComponentInfo( component_id='unique', component_type='a.b.c'))) self.assertEqual( eid, m.previous_execution( input_artifacts=input_artifacts, exec_properties=exec_properties, pipeline_info=self._pipeline_info, component_info=self._component_info)) # Test fetch_previous_result_artifacts. new_output_artifact = types.Artifact(type_name='ExamplesPath') self.assertNotEqual(ArtifactState.PUBLISHED, new_output_artifact.state) new_output_dict = {'output': [new_output_artifact]} updated_output_dict = m.fetch_previous_result_artifacts( new_output_dict, eid) previous_artifact = output_artifacts['output'][-1].artifact current_artifact = updated_output_dict['output'][-1].artifact self.assertEqual(ArtifactState.PUBLISHED, current_artifact.properties['state'].string_value) self.assertEqual(previous_artifact.id, current_artifact.id) self.assertEqual(previous_artifact.type_id, current_artifact.type_id)
def test_do(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') train_stats_artifact = types.Artifact('ExampleStatsPath', split='train') train_stats_artifact.uri = os.path.join(source_data_dir, 'statistics_gen/train/') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) schema_output = types.Artifact('SchemaPath') schema_output.uri = os.path.join(output_data_dir, 'schema_output') input_dict = { 'stats': [train_stats_artifact], } output_dict = { 'output': [schema_output], } exec_properties = {} schema_gen_executor = executor.Executor() schema_gen_executor.Do(input_dict, output_dict, exec_properties) self.assertNotEqual(0, len(tf.gfile.ListDirectory(schema_output.uri)))
def setUp(self): self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self.component_name = 'test_component' # Create input dict. eval_examples = types.Artifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join(self._source_data_dir, 'csv_example_gen/eval/') model = types.Artifact(type_name='ModelExportPath') model.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._input_dict = { 'examples': [eval_examples], 'model': [model], } # Create output dict. self._blessing = types.Artifact('ModelBlessingPath') self._blessing.uri = os.path.join(output_data_dir, 'blessing') self._output_dict = { 'blessing': [self._blessing] } # Create context self._tmp_dir = os.path.join(output_data_dir, '.temp') self._context = executor.Executor.Context(tmp_dir=self._tmp_dir, unique_id='2')
def setUp(self): self._mock_metadata = tf.test.mock.Mock() self._input_dict = { 'input_data': channel.Channel(type_name='input_data', artifacts=[types.Artifact(type_name='input_data')]) } input_dir = os.path.join( os.environ.get('TEST_TMP_DIR', self.get_temp_dir()), self._testMethodName, 'input_dir') # valid input artifacts must have a uri pointing to an existing directory. for key, input_channel in self._input_dict.items(): for index, artifact in enumerate(input_channel.get()): artifact.id = index + 1 uri = os.path.join(input_dir, key, str(artifact.id), '') artifact.uri = uri tf.gfile.MakeDirs(uri) self._output_dict = { 'output_data': channel.Channel(type_name='output_data', artifacts=[ types.Artifact(type_name='output_data', split='split') ]) } self._input_artifacts = channel.unwrap_channel_dict(self._input_dict) self._output_artifacts = { 'output_data': [types.Artifact(type_name='OutputType')], } self._exec_properties = { 'key': 'value', } self._execution_id = 100
def test_construct(self): examples = types.Artifact(type_name='ExamplesPath') model_exports = types.Artifact(type_name='ModelExportPath') evaluator = component.Evaluator( examples=channel.as_channel([examples]), model_exports=channel.as_channel([model_exports])) self.assertEqual('ModelEvalPath', evaluator.outputs.output.type_name)
def fakeUpstreamOutputs(mlmd_connection: metadata.Metadata, example_gen: pipeline_pb2.PipelineNode, transform: pipeline_pb2.PipelineNode): with mlmd_connection as m: if example_gen: # Publishes ExampleGen output. output_example = types.Artifact( example_gen.outputs.outputs['output_examples']. artifact_spec.type) output_example.uri = 'my_examples_uri' contexts = context_lib.register_contexts_if_not_exists( m, example_gen.contexts) execution = execution_publish_utils.register_execution( m, example_gen.node_info.type, contexts) execution_publish_utils.publish_succeeded_execution( m, execution.id, contexts, { 'output_examples': [output_example], }) if transform: # Publishes Transform output. output_transform_graph = types.Artifact( transform.outputs.outputs['transform_graph'].artifact_spec. type) output_example.uri = 'my_transform_graph_uri' contexts = context_lib.register_contexts_if_not_exists( m, transform.contexts) execution = execution_publish_utils.register_execution( m, transform.node_info.type, contexts) execution_publish_utils.publish_succeeded_execution( m, execution.id, contexts, { 'transform_graph': [output_transform_graph], })
def setUp(self): self._source_data_dir = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), 'components', 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.gfile.MakeDirs(self._output_data_dir) self._model_export = types.Artifact(type_name='ModelExportPath') self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._model_blessing = types.Artifact(type_name='ModelBlessingPath') self._input_dict = { 'model_export': [self._model_export], 'model_blessing': [self._model_blessing], } self._model_push = types.Artifact(type_name='ModelPushPath') self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.gfile.MakeDirs(self._model_push.uri) self._output_dict = { 'model_push': [self._model_push], } self._exec_properties = { 'custom_config': { 'ai_platform_serving_args': { 'model_name': 'model_name', 'project_id': 'project_id' }, }, } self._executor = Executor()
def testMainEmptyInputs(self): """Test executor class import under empty inputs/outputs.""" inputs = { 'x': [types.Artifact(type_name='X'), types.Artifact(type_name='X')] } outputs = {'y': [types.Artifact(type_name='Y')]} exec_properties = {'a': 'b'} args = [ '--executor_class_path=%s.%s' % (FakeExecutor.__module__, FakeExecutor.__name__), '--inputs=%s' % artifact_utils.jsonify_artifact_dict(inputs), '--outputs=%s' % artifact_utils.jsonify_artifact_dict(outputs), '--exec-properties=%s' % json.dumps(exec_properties), ] with ArgsCapture() as args_capture: run_executor.main(args) # TODO(b/131417512): Add equal comparison to types.Artifact class so we # can use asserters. self.assertSetEqual(set(args_capture.input_dict.keys()), set(inputs.keys())) self.assertSetEqual(set(args_capture.output_dict.keys()), set(outputs.keys())) self.assertDictEqual(args_capture.exec_properties, exec_properties)
def testDoWithCache(self): # First run that creates cache. output_cache_artifact = types.Artifact('OutputCache') output_cache_artifact.uri = os.path.join(self._output_data_dir, 'CACHE/') self._output_dict['cache_output_path'] = [output_cache_artifact] self._exec_properties['module_file'] = self._module_file self._transform_executor.Do(self._input_dict, self._output_dict, self._exec_properties) self._verify_transform_outputs() self.assertNotEqual(0, len(tf.gfile.ListDirectory(output_cache_artifact.uri))) # Second run from cache. self._output_data_dir = self._get_output_data_dir('2nd_run') input_cache_artifact = types.Artifact('InputCache') input_cache_artifact.uri = output_cache_artifact.uri output_cache_artifact = types.Artifact('OutputCache') output_cache_artifact.uri = os.path.join(self._output_data_dir, 'CACHE/') self._make_base_do_params(self._source_data_dir, self._output_data_dir) self._input_dict['cache_input_path'] = [input_cache_artifact] self._output_dict['cache_output_path'] = [output_cache_artifact] self._exec_properties['module_file'] = self._module_file self._transform_executor.Do(self._input_dict, self._output_dict, self._exec_properties) self._verify_transform_outputs() self.assertNotEqual(0, len(tf.gfile.ListDirectory(output_cache_artifact.uri)))
def test_construct(self): examples = types.Artifact(type_name='ExamplesPath') model = types.Artifact(type_name='ModelExportPath') model_validator = component.ModelValidator( examples=channel.as_channel([examples]), model=channel.as_channel([model])) self.assertEqual('ModelBlessingPath', model_validator.outputs.blessing.type_name)
def test_construct(self): train_examples = types.Artifact(type_name='ExamplesPath', split='train') eval_examples = types.Artifact(type_name='ExamplesPath', split='eval') statistics_gen = component.StatisticsGen( input_data=channel.as_channel([train_examples, eval_examples])) self.assertEqual('ExampleStatisticsPath', statistics_gen.outputs.output.type_name)
def test_do(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. train_examples = types.Artifact(type_name='ExamplesPath', split='train') train_examples.uri = os.path.join( source_data_dir, 'transform/transformed_examples/train/') eval_examples = types.Artifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join( source_data_dir, 'transform/transformed_examples/eval/') transform_output = types.Artifact(type_name='TransformPath') transform_output.uri = os.path.join(source_data_dir, 'transform/transform_output/') schema = types.Artifact(type_name='ExamplesPath') schema.uri = os.path.join(source_data_dir, 'schema_gen/') input_dict = { 'examples': [train_examples, eval_examples], 'transform_output': [transform_output], 'schema': [schema], } # Create output dict. model_exports = types.Artifact(type_name='ModelExportPath') model_exports.uri = os.path.join(output_data_dir, 'model_export_path') output_dict = {'output': [model_exports]} # Create exec properties. module_file_path = os.path.join(source_data_dir, 'module_file', 'trainer_module.py') exec_properties = { 'train_args': json_format.MessageToJson(trainer_pb2.TrainArgs(num_steps=1000)), 'eval_args': json_format.MessageToJson(trainer_pb2.EvalArgs(num_steps=500)), 'module_file': module_file_path, 'warm_starting': False, } trainer_executor = executor.Executor() trainer_executor.Do(input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties) # Check outputs. self.assertTrue( tf.gfile.Exists(os.path.join(model_exports.uri, 'eval_model_dir'))) self.assertTrue( tf.gfile.Exists( os.path.join(model_exports.uri, 'serving_model_dir')))
def test_unwrap_channel_dict(self): instance_a = types.Artifact('MyTypeName') instance_b = types.Artifact('MyTypeName') channel_dict = { 'id': channel.Channel('MyTypeName', artifacts=[instance_a, instance_b]) } result = channel.unwrap_channel_dict(channel_dict) self.assertDictEqual(result, {'id': [instance_a, instance_b]})
def test_run(self, mock_publisher): mock_publisher.return_value.publish_execution.return_value = {} example_gen = FileBasedExampleGen( executor_class=parquet_executor.Executor, input_base=external_input(self.parquet_dir_path), input_config=self.input_config, output_config=self.output_config, name='parquetExampleGenComponent') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) pipeline_root = os.path.join(output_data_dir, 'Test') tf.gfile.MakeDirs(pipeline_root) pipeline_info = data_types.PipelineInfo(pipeline_name='Test', pipeline_root=pipeline_root, run_id='123') driver_args = data_types.DriverArgs(enable_cache=True) connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.SetInParent() launcher = component_launcher.ComponentLauncher( component=example_gen, pipeline_info=pipeline_info, driver_args=driver_args, metadata_connection_config=connection_config, additional_pipeline_args={}) self.assertEqual( launcher._component_info.component_type, '.'.join( [FileBasedExampleGen.__module__, FileBasedExampleGen.__name__])) launcher.launch() mock_publisher.return_value.publish_execution.assert_called_once() # Get output paths. component_id = '.'.join([example_gen.component_name, example_gen.name]) output_path = os.path.join(pipeline_root, component_id, 'examples/1') train_examples = types.Artifact(type_name='ExamplesPath', split='train') train_examples.uri = os.path.join(output_path, 'train') eval_examples = types.Artifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join(output_path, 'eval') # Check parquet example gen outputs. train_output_file = os.path.join(train_examples.uri, 'data_tfrecord-00000-of-00001.gz') eval_output_file = os.path.join(eval_examples.uri, 'data_tfrecord-00000-of-00001.gz') self.assertTrue(tf.gfile.Exists(train_output_file)) self.assertTrue(tf.gfile.Exists(eval_output_file)) self.assertGreater( tf.gfile.GFile(train_output_file).size(), tf.gfile.GFile(eval_output_file).size())
def setUp(self): self._mock_metadata = tf.test.mock.Mock() self._mock_metadata.publish_execution = tf.test.mock.Mock() self._input_dict = { 'input_data': [types.Artifact(type_name='InputType')], } self._output_dict = { 'output_data': [types.Artifact(type_name='OutputType')], } self._execution_id = 100
def test_construct_without_transform_output(self): transformed_examples = types.Artifact(type_name='ExamplesPath') schema = types.Artifact(type_name='SchemaPath') trainer = component.Trainer( module_file='/path/to/module/file', examples=channel.as_channel([transformed_examples]), schema=channel.as_channel([schema]), train_args=trainer_pb2.TrainArgs(num_steps=100), eval_args=trainer_pb2.EvalArgs(num_steps=50)) self.assertEqual('ModelExportPath', trainer.outputs.output.type_name)
def test_construct_with_slice_spec(self): examples = types.Artifact(type_name='ExamplesPath') model_exports = types.Artifact(type_name='ModelExportPath') evaluator = component.Evaluator( examples=channel.as_channel([examples]), model_exports=channel.as_channel([model_exports]), feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']) ])) self.assertEqual('ModelEvalPath', evaluator.outputs.output.type_name)
def testDo(self, mock_client): # Mock query result schema for _BigQueryConverter. mock_client.return_value.query.return_value.result.return_value.schema = self._schema output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create output dict. train_examples = types.Artifact(type_name='ExamplesPath', split='train') train_examples.uri = os.path.join(output_data_dir, 'train') eval_examples = types.Artifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join(output_data_dir, 'eval') output_dict = {'examples': [train_examples, eval_examples]} # Create exe properties. exec_properties = { 'input_config': json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='bq', pattern='SELECT i, f, s FROM `fake`'), ])), 'output_config': json_format.MessageToJson( example_gen_pb2.Output( split_config=example_gen_pb2.SplitConfig(splits=[ example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=2), example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1) ]))) } # Run executor. big_query_example_gen = executor.Executor() big_query_example_gen.Do({}, output_dict, exec_properties) # Check BigQuery example gen outputs. train_output_file = os.path.join(train_examples.uri, 'data_tfrecord-00000-of-00001.gz') eval_output_file = os.path.join(eval_examples.uri, 'data_tfrecord-00000-of-00001.gz') self.assertTrue(tf.gfile.Exists(train_output_file)) self.assertTrue(tf.gfile.Exists(eval_output_file)) self.assertGreater( tf.gfile.GFile(train_output_file).size(), tf.gfile.GFile(eval_output_file).size())
def testPreExecutionNewExecution(self, mock_verify_input_artifacts_fn): input_dict = { 'input_a': types.Channel(type_name='input_a', artifacts=[types.Artifact(type_name='input_a')]) } output_dict = { 'output_a': types.Channel(type_name='output_a', artifacts=[ types.Artifact(type_name='output_a', split='split') ]) } execution_id = 1 context_id = 123 exec_properties = copy.deepcopy(self._exec_properties) driver_args = data_types.DriverArgs(enable_cache=True) pipeline_info = data_types.PipelineInfo( pipeline_name='my_pipeline_name', pipeline_root=os.environ.get('TEST_TMP_DIR', self.get_temp_dir()), run_id='my_run_id') component_info = data_types.ComponentInfo( component_type='a.b.c', component_id='my_component_id') self._mock_metadata.get_artifacts_by_info.side_effect = list( input_dict['input_a'].get()) self._mock_metadata.register_execution.side_effect = [execution_id] self._mock_metadata.previous_execution.side_effect = [None] self._mock_metadata.register_run_context_if_not_exists.side_effect = [ context_id ] driver = base_driver.BaseDriver(metadata_handler=self._mock_metadata) execution_decision = driver.pre_execution( input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, driver_args=driver_args, pipeline_info=pipeline_info, component_info=component_info) self.assertFalse(execution_decision.use_cached_results) self.assertEqual(execution_decision.execution_id, 1) self.assertItemsEqual(execution_decision.exec_properties, exec_properties) self.assertEqual( execution_decision.output_dict['output_a'][0].uri, os.path.join(pipeline_info.pipeline_root, component_info.component_id, 'output_a', str(execution_id), 'split', ''))
def test_do(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. train_examples = types.Artifact(type_name='ExamplesPath', split='train') eval_examples = types.Artifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join(source_data_dir, 'csv_example_gen/eval/') model_exports = types.Artifact(type_name='ModelExportPath') model_exports.uri = os.path.join(source_data_dir, 'trainer/current/') input_dict = { 'examples': [train_examples, eval_examples], 'model_exports': [model_exports], } # Create output dict. eval_output = types.Artifact('ModelEvalPath') eval_output.uri = os.path.join(output_data_dir, 'eval_output') output_dict = {'output': [eval_output]} # Create exec proterties. exec_properties = { 'feature_slicing_spec': json_format.MessageToJson( evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']), evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_day', 'trip_miles']), ])) } # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( tf.gfile.Exists(os.path.join(eval_output.uri, 'eval_config'))) self.assertTrue( tf.gfile.Exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(tf.gfile.Exists(os.path.join(eval_output.uri, 'plots')))
def _create_launcher_context(self, component_config=None): test_dir = self.get_temp_dir() connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.SetInParent() pipeline_root = os.path.join(test_dir, 'Test') input_artifact = types.Artifact(type_name='InputPath') input_artifact.uri = os.path.join(test_dir, 'input') component = test_utils._FakeComponent( name='FakeComponent', input_channel=channel_utils.as_channel([input_artifact]), custom_executor_spec=executor_spec.ExecutorContainerSpec( image='gcr://test', args=['{{input_dict["input"][0].uri}}'])) pipeline_info = data_types.PipelineInfo(pipeline_name='Test', pipeline_root=pipeline_root, run_id='123') driver_args = data_types.DriverArgs(enable_cache=True) launcher = kubernetes_component_launcher.KubernetesComponentLauncher.create( component=component, pipeline_info=pipeline_info, driver_args=driver_args, metadata_connection_config=connection_config, beam_pipeline_args=[], additional_pipeline_args={}, component_config=component_config) return {'launcher': launcher, 'input_artifact': input_artifact}
def testUnresolvedChannel(self): class _FakeComponentSpec(types.ComponentSpec): PARAMETERS = {} INPUTS = { 'input': component_spec.ChannelParameter(type_name='Foo') } OUTPUTS = {} class _FakeExecutor(base_executor.BaseExecutor): CALLED = False def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: _FakeExecutor.CALLED = True class _FakeComponent(base_component.BaseComponent): SPEC_CLASS = _FakeComponentSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(_FakeExecutor) def __init__(self, spec: types.ComponentSpec): super(_FakeComponent, self).__init__(spec=spec) c = interactive_context.InteractiveContext() foo = types.Channel(type_name='Foo', artifacts=[types.Artifact('Foo')]) component = _FakeComponent(_FakeComponentSpec(input=foo)) with self.assertRaisesRegexp(ValueError, 'Unresolved input channel'): c.run(component)
def __init__(self, examples: channel.Channel, model: channel.Channel, blessing: Optional[channel.Channel] = None, name: Optional[Text] = None): """Construct a ModelValidator component. Args: examples: A Channel of 'ExamplesPath' type, usually produced by ExampleGen component. model: A Channel of 'ModelExportPath' type, usually produced by Trainer component. blessing: Optional output channel of 'ModelBlessingPath' for result of blessing. name: Optional unique name. Necessary if multiple ModelValidator components are declared in the same pipeline. """ blessing = blessing or channel.Channel( type_name='ModelBlessingPath', artifacts=[types.Artifact('ModelBlessingPath')]) name = name or '' spec = ModelValidatorSpec( examples=channel.as_channel(examples), model=channel.as_channel(model), component_unique_name=name, blessing=blessing) super(ModelValidator, self).__init__(spec=spec, name=name)
def schedule(self) -> task_scheduler.TaskSchedulerResult: def _as_dict(proto_map) -> Dict[str, types.Property]: return { k: data_types_utils.get_value(v) for k, v in proto_map.items() } pipeline_node = self.task.get_pipeline_node() output_spec = pipeline_node.outputs.outputs[importer.IMPORT_RESULT_KEY] properties = _as_dict(output_spec.artifact_spec.additional_properties) custom_properties = _as_dict( output_spec.artifact_spec.additional_custom_properties) output_artifacts = importer.generate_output_dict( metadata_handler=self.mlmd_handle, uri=str(self.task.exec_properties[importer.SOURCE_URI_KEY]), properties=properties, custom_properties=custom_properties, reimport=bool( self.task.exec_properties[importer.REIMPORT_OPTION_KEY]), output_artifact_class=types.Artifact( output_spec.artifact_spec.type).type, mlmd_artifact_type=output_spec.artifact_spec.type) return task_scheduler.TaskSchedulerResult( status=status_lib.Status(code=status_lib.Code.OK), output=task_scheduler.ImporterNodeOutput( output_artifacts=output_artifacts))