def test_construct(self): examples = types.TfxArtifact(type_name='ExamplesPath') model_exports = types.TfxArtifact(type_name='ModelExportPath') evaluator = component.Evaluator( examples=channel.as_channel([examples]), model_exports=channel.as_channel([model_exports])) self.assertEqual('ModelEvalPath', evaluator.outputs.output.type_name)
def setUp(self): self._source_data_dir = os.path.join( os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), 'components', 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.gfile.MakeDirs(self._output_data_dir) self._model_export = types.TfxArtifact(type_name='ModelExportPath') self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._model_blessing = types.TfxArtifact(type_name='ModelBlessingPath') self._input_dict = { 'model_export': [self._model_export], 'model_blessing': [self._model_blessing], } self._model_push = types.TfxArtifact(type_name='ModelPushPath') self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.gfile.MakeDirs(self._model_push.uri) self._output_dict = { 'model_push': [self._model_push], } self._exec_properties = { 'custom_config': { 'ai_platform_serving_args': { 'model_name': 'model_name', 'project_id': 'project_id' }, }, } self._executor = Executor()
def testCsvExampleGenWrapper(self): input_base = types.TfxArtifact(type_name='ExternalPath', split='') input_base.uri = '/path/to/dataset' with patch.object(executor, 'Executor', autospec=True) as _: wrapper = executor_wrappers.CsvExampleGenWrapper( argparse.Namespace( exec_properties=json.dumps(self.exec_properties), outputs=types.jsonify_tfx_type_dict( {'examples': self.examples}), executor_class_path= ('tfx.components.example_gen.csv_example_gen.executor.Executor' ), input_base=json.dumps([input_base.json_dict()])), ) wrapper.run(output_basedir=self.output_basedir) # TODO(b/133011207): Validate arguments for executor and Do() method. metadata_file = os.path.join(self.output_basedir, 'output/ml_metadata/examples') expected_output_examples = types.TfxArtifact( type_name='ExamplesPath', split='dummy') # Expect that span and path are resolved. expected_output_examples.span = 1 expected_output_examples.uri = ( '/path/to/output/csv_example_gen/examples/mock_workflow_id/dummy/' ) with tf.gfile.GFile(metadata_file) as f: self.assertEqual([expected_output_examples.json_dict()], json.loads(f.read()))
def setUp(self): self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self.component_name = 'test_component' # Create input dict. eval_examples = types.TfxArtifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join(self._source_data_dir, 'csv_example_gen/eval/') model = types.TfxArtifact(type_name='ModelExportPath') model.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._input_dict = { 'examples': [eval_examples], 'model': [model], } # Create output dict. self._blessing = types.TfxArtifact('ModelBlessingPath') self._blessing.uri = os.path.join(output_data_dir, 'blessing') self._output_dict = {'blessing': [self._blessing]} # Create context self._tmp_dir = os.path.join(output_data_dir, '.temp') self._context = executor.Executor.Context(tmp_dir=self._tmp_dir, unique_id='2')
def test_fetch_previous_result(self): with metadata.Metadata(connection_config=self._connection_config) as m: # Create an 'previous' execution. exec_properties = {'log_root': 'path'} eid = m.prepare_execution('Test', exec_properties) input_artifact = types.TfxArtifact(type_name='ExamplesPath') m.publish_artifacts([input_artifact]) output_artifact = types.TfxArtifact(type_name='ExamplesPath') input_dict = {'input': [input_artifact]} output_dict = {'output': [output_artifact]} m.publish_execution(eid, input_dict, output_dict) # Test previous_run. self.assertEqual(None, m.previous_run('Test', input_dict, {})) self.assertEqual(None, m.previous_run('Test', {}, exec_properties)) self.assertEqual(None, m.previous_run('Test2', input_dict, exec_properties)) self.assertEqual(eid, m.previous_run('Test', input_dict, exec_properties)) # Test fetch_previous_result_artifacts. new_output_artifact = types.TfxArtifact(type_name='ExamplesPath') self.assertNotEqual(types.ARTIFACT_STATE_PUBLISHED, new_output_artifact.state) new_output_dict = {'output': [new_output_artifact]} updated_output_dict = m.fetch_previous_result_artifacts( new_output_dict, eid) previous_artifact = output_dict['output'][-1].artifact current_artifact = updated_output_dict['output'][-1].artifact self.assertEqual(types.ARTIFACT_STATE_PUBLISHED, current_artifact.properties['state'].string_value) self.assertEqual(previous_artifact.id, current_artifact.id) self.assertEqual(previous_artifact.type_id, current_artifact.type_id)
def setUp(self): self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.gfile.MakeDirs(self._output_data_dir) self._model_export = types.TfxArtifact(type_name='ModelExportPath') self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._model_blessing = types.TfxArtifact(type_name='ModelBlessingPath') self._input_dict = { 'model_export': [self._model_export], 'model_blessing': [self._model_blessing], } self._model_push = types.TfxArtifact(type_name='ModelPushPath') self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.gfile.MakeDirs(self._model_push.uri) self._output_dict = { 'model_push': [self._model_push], } self._serving_model_dir = os.path.join(self._output_data_dir, 'serving_model_dir') tf.gfile.MakeDirs(self._serving_model_dir) self._exec_properties = { 'push_destination': json_format.MessageToJson( pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=self._serving_model_dir))), } self._executor = executor.Executor()
def setUp(self): self._mock_metadata = tf.test.mock.Mock() self._input_dict = { 'input_data': [types.TfxArtifact(type_name='InputType')], } input_dir = os.path.join( os.environ.get('TEST_TMP_DIR', self.get_temp_dir()), self._testMethodName, 'input_dir') # valid input artifacts must have a uri pointing to an existing directory. for key, input_list in self._input_dict.items(): for index, artifact in enumerate(input_list): artifact.id = index + 1 uri = os.path.join(input_dir, key, str(artifact.id), '') artifact.uri = uri tf.gfile.MakeDirs(uri) self._output_dict = { 'output_data': [types.TfxArtifact(type_name='OutputType')], } self._exec_properties = { 'key': 'value', } self._base_output_dir = os.path.join( os.environ.get('TEST_TMP_DIR', self.get_temp_dir()), self._testMethodName, 'base_output_dir') self._driver_args = data_types.DriverArgs( worker_name='worker_name', base_output_dir=self._base_output_dir, enable_cache=True) self._execution_id = 100
def test_do(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') train_stats_artifact = types.TfxArtifact('ExampleStatsPath', split='train') train_stats_artifact.uri = os.path.join(source_data_dir, 'statistics_gen/train/') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) schema_output = types.TfxArtifact('SchemaPath') schema_output.uri = os.path.join(output_data_dir, 'schema_output') input_dict = { 'stats': [train_stats_artifact], } output_dict = { 'output': [schema_output], } exec_properties = {} schema_gen_executor = executor.Executor() schema_gen_executor.Do(input_dict, output_dict, exec_properties) self.assertNotEqual(0, len(tf.gfile.ListDirectory(schema_output.uri)))
def test_valid_channel(self): instance_a = types.TfxArtifact('MyTypeName') instance_b = types.TfxArtifact('MyTypeName') chnl = channel.Channel('MyTypeName', artifacts=[instance_a, instance_b]) self.assertEqual(chnl.type_name, 'MyTypeName') self.assertItemsEqual(chnl.get(), [instance_a, instance_b])
def test_channel_as_channel_success(self): instance_a = types.TfxArtifact('MyTypeName') instance_b = types.TfxArtifact('MyTypeName') chnl_original = channel.Channel('MyTypeName', artifacts=[instance_a, instance_b]) chnl_result = channel.as_channel(chnl_original) self.assertEqual(chnl_original, chnl_result)
def setUp(self): self._mock_metadata = tf.test.mock.Mock() self._input_dict = { 'input_data': channel.Channel( type_name='input_data', artifacts=[types.TfxArtifact(type_name='input_data')]) } input_dir = os.path.join( os.environ.get('TEST_TMP_DIR', self.get_temp_dir()), self._testMethodName, 'input_dir') # valid input artifacts must have a uri pointing to an existing directory. for key, input_channel in self._input_dict.items(): for index, artifact in enumerate(input_channel.get()): artifact.id = index + 1 uri = os.path.join(input_dir, key, str(artifact.id), '') artifact.uri = uri tf.gfile.MakeDirs(uri) self._output_dict = { 'output_data': channel.Channel(type_name='output_data', artifacts=[ types.TfxArtifact(type_name='output_data', split='split') ]) } self._input_artifacts = channel.unwrap_channel_dict(self._input_dict) self._output_artifacts = { 'output_data': [types.TfxArtifact(type_name='OutputType')], } self._exec_properties = { 'key': 'value', } self._execution_id = 100
def test_invalid_channel_type(self): instance_a = types.TfxArtifact('MyTypeName') instance_b = types.TfxArtifact('MyTypeName') with self.assertRaises(ValueError): channel.Channel( 'AnotherTypeName', static_artifact_collection=[instance_a, instance_b])
def test_pipeline_with_artifact_info(self): artifacts_collection = [types.TfxArtifact('channel_one')] channel_one = channel.Channel(type_name='channel_one', artifacts=artifacts_collection) component_a = _make_fake_component_instance( name='component_a', inputs={}, outputs={'one': channel_one}) component_b = _make_fake_component_instance( name='component_b', inputs={ 'a': component_a.outputs.one, }, outputs={}) my_pipeline = pipeline.Pipeline( pipeline_name='a', pipeline_root='b', components=[component_b, component_a], metadata_connection_config=self._metadata_connection_config) expected_artifact = types.TfxArtifact('channel_one') expected_artifact.name = 'one' expected_artifact.pipeline_name = 'a' expected_artifact.pipeline_timestamp_ms = 0 expected_artifact.producer_component = 'component_a' self.assertItemsEqual(my_pipeline.components, [component_a, component_b]) self.assertEqual(component_a.outputs.one._artifacts[0].pipeline_name, 'a') self.assertEqual( component_a.outputs.one._artifacts[0].producer_component, component_a.component_id) self.assertEqual(component_a.outputs.one._artifacts[0].name, 'one') self.assertEqual(component_b.inputs.a._artifacts[0].pipeline_name, 'a') self.assertEqual(component_b.inputs.a._artifacts[0].producer_component, component_a.component_id) self.assertEqual(component_b.inputs.a._artifacts[0].name, 'one')
def test_do(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. train_examples = types.TfxArtifact(type_name='ExamplesPath', split='train') train_examples.uri = os.path.join( source_data_dir, 'transform/transformed_examples/train/') eval_examples = types.TfxArtifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join( source_data_dir, 'transform/transformed_examples/eval/') transform_output = types.TfxArtifact(type_name='TransformPath') transform_output.uri = os.path.join(source_data_dir, 'transform/transform_output/') schema = types.TfxArtifact(type_name='ExamplesPath') schema.uri = os.path.join(source_data_dir, 'schema_gen/') input_dict = { 'transformed_examples': [train_examples, eval_examples], 'transform_output': [transform_output], 'schema': [schema], } # Create output dict. model_exports = types.TfxArtifact(type_name='ModelExportPath') model_exports.uri = os.path.join(output_data_dir, 'model_export_path') output_dict = {'output': [model_exports]} # Create exec properties. module_file_path = os.path.join(source_data_dir, 'module_file', 'trainer_module.py') exec_properties = { 'train_args': json_format.MessageToJson(trainer_pb2.TrainArgs(num_steps=1000)), 'eval_args': json_format.MessageToJson(trainer_pb2.EvalArgs(num_steps=500)), 'module_file': module_file_path, 'warm_starting': False, } trainer_executor = executor.Executor() trainer_executor.Do(input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties) # Check outputs. self.assertTrue( tf.gfile.Exists(os.path.join(model_exports.uri, 'eval_model_dir'))) self.assertTrue( tf.gfile.Exists( os.path.join(model_exports.uri, 'serving_model_dir')))
def test_construct(self): examples = types.TfxArtifact(type_name='ExamplesPath') model = types.TfxArtifact(type_name='ModelExportPath') model_validator = component.ModelValidator( examples=channel.as_channel([examples]), model=channel.as_channel([model])) self.assertEqual('ModelBlessingPath', model_validator.outputs.blessing.type_name)
def test_execution(self): with Metadata(connection_config=self._connection_config, logger=self._logger) as m: # Test prepare_execution. exec_properties = {} eid = m.prepare_execution('Test', exec_properties) [execution] = m.store.get_executions() self.assertProtoEquals( """ id: 1 type_id: 1 properties { key: "state" value { string_value: "new" } }""", execution) # Test publish_execution. input_artifact = types.TfxArtifact(type_name='ExamplesPath') m.publish_artifacts([input_artifact]) output_artifact = types.TfxArtifact(type_name='ExamplesPath') input_dict = {'input': [input_artifact]} output_dict = {'output': [output_artifact]} m.publish_execution(eid, input_dict, output_dict) # Make sure artifacts in output_dict are published. self.assertEqual(types.ARTIFACT_STATE_PUBLISHED, output_artifact.state) # Make sure execution state are changed. [execution] = m.store.get_executions_by_id([eid]) self.assertEqual('complete', execution.properties['state'].string_value) # Make sure events are published. events = m.store.get_events_by_execution_ids([eid]) self.assertEqual(2, len(events)) self.assertEqual(input_artifact.id, events[0].artifact_id) self.assertEqual(metadata_store_pb2.Event.DECLARED_INPUT, events[0].type) self.assertProtoEquals( """ steps { key: "input" } steps { index: 0 }""", events[0].path) self.assertEqual(output_artifact.id, events[1].artifact_id) self.assertEqual(metadata_store_pb2.Event.DECLARED_OUTPUT, events[1].type) self.assertProtoEquals( """ steps { key: "output" } steps { index: 0 }""", events[1].path)
def test_run(self, mock_publisher): mock_publisher.return_value.publish_execution.return_value = {} example_gen = FileBasedExampleGen( executor_class=avro_executor.Executor, input_base=external_input(self.avro_dir_path), input_config=self.input_config, output_config=self.output_config, name='AvroExampleGenComponent') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) pipeline_root = os.path.join(output_data_dir, 'Test') tf.gfile.MakeDirs(pipeline_root) pipeline_info = data_types.PipelineInfo(pipeline_name='Test', pipeline_root=pipeline_root, run_id='123') driver_args = data_types.DriverArgs(enable_cache=True) connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.SetInParent() launcher = component_launcher.ComponentLauncher( component=example_gen, pipeline_info=pipeline_info, driver_args=driver_args, metadata_connection_config=connection_config, additional_pipeline_args={}) self.assertEqual( launcher._component_info.component_type, '.'.join( [FileBasedExampleGen.__module__, FileBasedExampleGen.__name__])) launcher.launch() mock_publisher.return_value.publish_execution.assert_called_once() # Get output paths. component_id = '.'.join([example_gen.component_name, example_gen.name]) output_path = os.path.join(pipeline_root, component_id, 'examples/1') train_examples = types.TfxArtifact(type_name='ExamplesPath', split='train') train_examples.uri = os.path.join(output_path, 'train') eval_examples = types.TfxArtifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join(output_path, 'eval') # Check Avro example gen outputs. train_output_file = os.path.join(train_examples.uri, 'data_tfrecord-00000-of-00001.gz') eval_output_file = os.path.join(eval_examples.uri, 'data_tfrecord-00000-of-00001.gz') self.assertTrue(tf.gfile.Exists(train_output_file)) self.assertTrue(tf.gfile.Exists(eval_output_file)) self.assertGreater( tf.gfile.GFile(train_output_file).size(), tf.gfile.GFile(eval_output_file).size())
def test_unwrap_channel_dict(self): instance_a = types.TfxArtifact('MyTypeName') instance_b = types.TfxArtifact('MyTypeName') channel_dict = { 'id': channel.Channel('MyTypeName', artifacts=[instance_a, instance_b]) } result = channel.unwrap_channel_dict(channel_dict) self.assertDictEqual(result, {'id': [instance_a, instance_b]})
def test_fetch_previous_result(self): with metadata.Metadata(connection_config=self._connection_config) as m: # Create an 'previous' execution. exec_properties = {'log_root': 'path'} eid = m.register_execution(exec_properties=exec_properties, pipeline_info=self._pipeline_info, component_info=self._component_info) input_artifact = types.TfxArtifact(type_name='ExamplesPath') m.publish_artifacts([input_artifact]) output_artifact = types.TfxArtifact(type_name='ExamplesPath') input_artifacts = {'input': [input_artifact]} output_artifacts = {'output': [output_artifact]} m.publish_execution(eid, input_artifacts, output_artifacts) # Test previous_run. self.assertEqual( None, m.previous_execution(input_artifacts=input_artifacts, exec_properties={}, pipeline_info=self._pipeline_info, component_info=self._component_info)) self.assertEqual( None, m.previous_execution(input_artifacts={}, exec_properties=exec_properties, pipeline_info=self._pipeline_info, component_info=self._component_info)) self.assertEqual( None, m.previous_execution(input_artifacts=input_artifacts, exec_properties=exec_properties, pipeline_info=self._pipeline_info, component_info=data_types.ComponentInfo( component_id='unique', component_type='a.b.c'))) self.assertEqual( eid, m.previous_execution(input_artifacts=input_artifacts, exec_properties=exec_properties, pipeline_info=self._pipeline_info, component_info=self._component_info)) # Test fetch_previous_result_artifacts. new_output_artifact = types.TfxArtifact(type_name='ExamplesPath') self.assertNotEqual(types.ARTIFACT_STATE_PUBLISHED, new_output_artifact.state) new_output_dict = {'output': [new_output_artifact]} updated_output_dict = m.fetch_previous_result_artifacts( new_output_dict, eid) previous_artifact = output_artifacts['output'][-1].artifact current_artifact = updated_output_dict['output'][-1].artifact self.assertEqual(types.ARTIFACT_STATE_PUBLISHED, current_artifact.properties['state'].string_value) self.assertEqual(previous_artifact.id, current_artifact.id) self.assertEqual(previous_artifact.type_id, current_artifact.type_id)
def test_construct(self): train_examples = types.TfxArtifact(type_name='ExamplesPath', split='train') eval_examples = types.TfxArtifact(type_name='ExamplesPath', split='eval') statistics_gen = component.StatisticsGen( input_data=channel.as_channel([train_examples, eval_examples])) self.assertEqual('ExampleStatisticsPath', statistics_gen.outputs.output.type_name)
def setUp(self): self.input_one = types.TfxArtifact('INPUT_ONE') self.input_one.source = airflow_component._OrchestrationSource( 'input_one_key', 'input_one_component_id') self.output_one = types.TfxArtifact('OUTPUT_ONE') self.output_one.source = airflow_component._OrchestrationSource( 'output_one_key', 'output_one_component_id') self.input_one_json = json.dumps([self.input_one.json_dict()]) self.output_one_json = json.dumps([self.output_one.json_dict()]) self._logger_config = logging_utils.LoggerConfig()
def setUp(self): self._mock_metadata = tf.test.mock.Mock() self._mock_metadata.publish_execution = tf.test.mock.Mock() self._input_dict = { 'input_data': [types.TfxArtifact(type_name='InputType')], } self._output_dict = { 'output_data': [types.TfxArtifact(type_name='OutputType')], } self._execution_id = 100
def test_construct_without_transform_output(self): transformed_examples = types.TfxArtifact(type_name='ExamplesPath') schema = types.TfxArtifact(type_name='SchemaPath') trainer = component.Trainer( module_file='/path/to/module/file', examples=channel.as_channel([transformed_examples]), schema=channel.as_channel([schema]), train_args=trainer_pb2.TrainArgs(num_steps=100), eval_args=trainer_pb2.EvalArgs(num_steps=50)) self.assertEqual('ModelExportPath', trainer.outputs.output.type_name)
def test_construct(self): model_export = types.TfxArtifact(type_name='ModelExportPath') model_blessing = types.TfxArtifact(type_name='ModelBlessingPath') pusher = component.Pusher( model_export=channel.as_channel([model_export]), model_blessing=channel.as_channel([model_blessing]), push_destination=pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory='push_destination'))) self.assertEqual('ModelPushPath', pusher.outputs.model_push.type_name)
def test_construct(self): example_validator = component.ExampleValidator( stats=channel.as_channel([ types.TfxArtifact(type_name='ExampleStatisticsPath', split='eval') ]), schema=channel.as_channel( [types.TfxArtifact(type_name='SchemaPath')]), ) self.assertEqual('ExampleValidationPath', example_validator.outputs.output.type_name)
def test_construct_with_slice_spec(self): examples = types.TfxArtifact(type_name='ExamplesPath') model_exports = types.TfxArtifact(type_name='ModelExportPath') evaluator = component.Evaluator( examples=channel.as_channel([examples]), model_exports=channel.as_channel([model_exports]), feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']) ])) self.assertEqual('ModelEvalPath', evaluator.outputs.output.type_name)
def testDo(self, mock_client): # Mock query result schema for _BigQueryConverter. mock_client.return_value.query.return_value.result.return_value.schema = self._schema output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create output dict. train_examples = types.TfxArtifact(type_name='ExamplesPath', split='train') train_examples.uri = os.path.join(output_data_dir, 'train') eval_examples = types.TfxArtifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join(output_data_dir, 'eval') output_dict = {'examples': [train_examples, eval_examples]} # Create exe properties. exec_properties = { 'input_config': json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split( name='bq', pattern='SELECT i, f, s FROM `fake`'), ])), 'output_config': json_format.MessageToJson( example_gen_pb2.Output( split_config=example_gen_pb2.SplitConfig(splits=[ example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=2), example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=1) ]))) } # Run executor. big_query_example_gen = executor.Executor() big_query_example_gen.Do({}, output_dict, exec_properties) # Check BigQuery example gen outputs. train_output_file = os.path.join(train_examples.uri, 'data_tfrecord-00000-of-00001.gz') eval_output_file = os.path.join(eval_examples.uri, 'data_tfrecord-00000-of-00001.gz') self.assertTrue(tf.gfile.Exists(train_output_file)) self.assertTrue(tf.gfile.Exists(eval_output_file)) self.assertGreater( tf.gfile.GFile(train_output_file).size(), tf.gfile.GFile(eval_output_file).size())
def test_do(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. train_examples = types.TfxArtifact(type_name='ExamplesPath', split='train') eval_examples = types.TfxArtifact(type_name='ExamplesPath', split='eval') eval_examples.uri = os.path.join(source_data_dir, 'csv_example_gen/eval/') model_exports = types.TfxArtifact(type_name='ModelExportPath') model_exports.uri = os.path.join(source_data_dir, 'trainer/current/') input_dict = { 'examples': [train_examples, eval_examples], 'model_exports': [model_exports], } # Create output dict. eval_output = types.TfxArtifact('ModelEvalPath') eval_output.uri = os.path.join(output_data_dir, 'eval_output') output_dict = {'output': [eval_output]} # Create exec proterties. exec_properties = { 'feature_slicing_spec': json_format.MessageToJson( evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']), evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_day', 'trip_miles']), ])) } # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( tf.gfile.Exists(os.path.join(eval_output.uri, 'eval_config'))) self.assertTrue( tf.gfile.Exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(tf.gfile.Exists(os.path.join(eval_output.uri, 'plots')))
def __init__(self, input_config: example_gen_pb2.Input, output_config: Optional[example_gen_pb2.Output] = None, component_name: Optional[Text] = 'ExampleGen', example_artifacts: Optional[channel.Channel] = None, name: Optional[Text] = None): """Construct an QueryBasedExampleGen component. Args: input_config: An example_gen_pb2.Input instance, providing input configuration. output_config: An example_gen_pb2.Output instance, providing output configuration. If unset, default splits will be 'train' and 'eval' with size 2:1. component_name: Name of the component, should be unique per component class. Default to 'ExampleGen', can be overwritten by sub-classes. example_artifacts: Optional channel of 'ExamplesPath' for output train and eval examples. name: Unique name for every component class instance. """ # Configure outputs. output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = example_artifacts or channel.as_channel([ types.TfxArtifact('ExamplesPath', split=split_name) for split_name in utils.generate_output_split_names( input_config, output_config) ]) spec = QueryBasedExampleGenSpec(component_name=component_name, input_config=input_config, output_config=output_config, examples=example_artifacts) super(_QueryBasedExampleGen, self).__init__(spec=spec, name=name)
def __init__(self, model_export: channel.Channel, model_blessing: channel.Channel, slack_token: Text, channel_id: Text, timeout_sec: int, slack_blessing: Optional[channel.Channel] = None, name: Optional[Text] = None): """Construct a SlackComponent. Args: model_export: A Channel of 'ModelExportPath' type, usually produced by Trainer component. model_blessing: A Channel of 'ModelBlessingPath' type, usually produced by ModelValidator component. slack_token: A token used for setting up connection with Slack server. channel_id: Slack channel id to communicate on. timeout_sec: Seconds to wait for response before default to reject. slack_blessing: Optional output channel of 'ModelBlessingPath' with result of blessing; will be created for you if not specified. name: Optional unique name. Necessary if multiple Pusher components are declared in the same pipeline. """ slack_blessing = slack_blessing or channel.Channel( type_name='ModelBlessingPath', artifacts=[types.TfxArtifact('ModelBlessingPath')]) spec = SlackComponentSpec(slack_token=slack_token, channel_id=channel_id, timeout_sec=timeout_sec, model_export=model_export, model_blessing=model_blessing, slack_blessing=slack_blessing) super(SlackComponent, self).__init__(spec=spec, name=name)