def testStrategy_IrMode(self): # Model with id 1, will be blessed. model_one = standard_artifacts.Model() model_one.uri = 'model_one' model_one.id = 1 # Model with id 2, will be blessed. model_two = standard_artifacts.Model() model_two.uri = 'model_two' model_two.id = 2 # Model with id 3, will not be blessed. model_three = standard_artifacts.Model() model_three.uri = 'model_three' model_three.id = 3 model_blessing_one = standard_artifacts.ModelBlessing() self._set_model_blessing_bit(model_blessing_one, model_one.id, 1) model_blessing_two = standard_artifacts.ModelBlessing() self._set_model_blessing_bit(model_blessing_two, model_two.id, 1) strategy = latest_blessed_model_strategy.LatestBlessedModelStrategy() result = strategy.resolve_artifacts( self._store, { 'model': [model_one, model_two, model_three], 'model_blessing': [model_blessing_one, model_blessing_two] }) self.assertIsNotNone(result) self.assertEqual([a.uri for a in result['model']], ['model_two'])
def testGetLatestBlessedModelArtifact_IrMode(self): with metadata.Metadata(connection_config=self._connection_config) as m: # Model with id 1, will be blessed. model_one = standard_artifacts.Model() model_one.uri = 'model_one' model_one.id = 1 # Model with id 2, will be blessed. model_two = standard_artifacts.Model() model_two.uri = 'model_two' model_two.id = 2 # Model with id 3, will not be blessed. model_three = standard_artifacts.Model() model_three.uri = 'model_three' model_three.id = 3 model_blessing_one = standard_artifacts.ModelBlessing() self._set_model_blessing_bit(model_blessing_one, model_one.id, 1) model_blessing_two = standard_artifacts.ModelBlessing() self._set_model_blessing_bit(model_blessing_two, model_two.id, 1) resolver = latest_blessed_model_resolver.LatestBlessedModelResolver() result = resolver.resolve_artifacts( m, { 'model': [model_one, model_two, model_three], 'model_blessing': [model_blessing_one, model_blessing_two] }) self.assertIsNotNone(result) self.assertEqual([a.uri for a in result['model']], ['model_two'])
def setUp(self): super(ExecutorTest, self).setUp() self._testdata_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._module_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'example') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self._context = executor.Executor.Context( tmp_dir=self._output_data_dir, unique_id='1') # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(self._testdata_dir, 'data') examples.split_names = artifact_utils.encode_split_names(['train', 'eval']) schema = standard_artifacts.Schema() schema.uri = os.path.join(self._testdata_dir, 'schema') self._input_dict = { 'examples': [examples], 'schema': [schema], } # Create output dict. model = standard_artifacts.Model() model.uri = os.path.join(self._output_data_dir, 'model') self._best_hparams = standard_artifacts.Model() self._best_hparams.uri = os.path.join(self._output_data_dir, 'best_hparams') self._output_dict = { 'model': [model], 'best_hyperparameters': [self._best_hparams], }
def testDoValidation(self, exec_properties, blessed, has_baseline): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) model = standard_artifacts.Model() baseline_model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') schema = standard_artifacts.Schema() schema.uri = os.path.join(source_data_dir, 'schema_gen') input_dict = { EXAMPLES_KEY: [examples], MODEL_KEY: [model], SCHEMA_KEY: [schema], } if has_baseline: input_dict[BASELINE_MODEL_KEY] = [baseline_model] # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { EVALUATION_KEY: [eval_output], BLESSING_KEY: [blessing_output], } # List needs to be serialized before being passed into Do function. exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps(None) # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots'))) self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'validations'))) if blessed: self.assertTrue( fileio.exists(os.path.join(blessing_output.uri, 'BLESSED'))) else: self.assertTrue( fileio.exists(os.path.join(blessing_output.uri, 'NOT_BLESSED')))
def setUp(self): super(KubeflowGCPIntegrationTest, self).setUp() # Example artifacts for testing. raw_train_examples = standard_artifacts.Examples(split='train') raw_train_examples.uri = os.path.join( self._intermediate_data_root, 'csv_example_gen/examples/test-pipeline/train/') raw_eval_examples = standard_artifacts.Examples(split='eval') raw_eval_examples.uri = os.path.join( self._intermediate_data_root, 'csv_example_gen/examples/test-pipeline/eval/') self._test_raw_examples = [raw_train_examples, raw_eval_examples] # Transformed Example artifacts for testing. transformed_train_examples = standard_artifacts.Examples(split='train') transformed_train_examples.uri = os.path.join( self._intermediate_data_root, 'transform/transformed_examples/test-pipeline/train/') transformed_eval_examples = standard_artifacts.Examples(split='eval') transformed_eval_examples.uri = os.path.join( self._intermediate_data_root, 'transform/transformed_examples/test-pipeline/eval/') self._test_transformed_examples = [ transformed_train_examples, transformed_eval_examples ] # Schema artifact for testing. schema = standard_artifacts.Schema() schema.uri = os.path.join(self._intermediate_data_root, 'schema_gen/output/test-pipeline/') self._test_schema = [schema] # TransformGraph artifact for testing. transform_graph = standard_artifacts.TransformGraph() transform_graph.uri = os.path.join( self._intermediate_data_root, 'transform/transform_output/test-pipeline/') self._test_transform_graph = [transform_graph] # Model artifact for testing. model_1 = standard_artifacts.Model() model_1.uri = os.path.join(self._intermediate_data_root, 'trainer/output/test-pipeline/1/') self._test_model_1 = [model_1] model_2 = standard_artifacts.Model() model_2.uri = os.path.join(self._intermediate_data_root, 'trainer/output/test-pipeline/2/') self._test_model_2 = [model_2] # ModelBlessing artifact for testing. model_blessing = standard_artifacts.ModelBlessing() model_blessing.uri = os.path.join( self._intermediate_data_root, 'model_validator/blessing/test-pipeline/') self._test_model_blessing = [model_blessing]
def testStrategy(self): contexts = self._metadata.register_pipeline_contexts_if_not_exists( self._pipeline_info) # Model with id 1, will be blessed. model_one = standard_artifacts.Model() model_one.uri = 'model_one' self._metadata.publish_artifacts([model_one]) # Model with id 2, will be blessed. model_two = standard_artifacts.Model() model_two.uri = 'model_two' self._metadata.publish_artifacts([model_two]) # Model with id 3, will not be blessed. model_three = standard_artifacts.Model() model_three.uri = 'model_three' self._metadata.publish_artifacts([model_three]) model_blessing_one = standard_artifacts.ModelBlessing() self._set_model_blessing_bit(model_blessing_one, model_one.id, 1) model_blessing_two = standard_artifacts.ModelBlessing() self._set_model_blessing_bit(model_blessing_two, model_two.id, 1) self._metadata.publish_artifacts([model_blessing_one, model_blessing_two]) self._metadata.register_execution( exec_properties={}, pipeline_info=self._pipeline_info, component_info=self._component_info, contexts=contexts) self._metadata.publish_execution( component_info=self._component_info, output_artifacts={ 'a': [model_one, model_two, model_three], 'b': [model_blessing_one, model_blessing_two] }) strategy = latest_blessed_model_strategy.LatestBlessedModelStrategy() resolve_result = strategy.resolve( pipeline_info=self._pipeline_info, metadata_handler=self._metadata, source_channels={ 'model': types.Channel( type=standard_artifacts.Model, producer_component_id=self._component_info.component_id, output_key='a'), 'model_blessing': types.Channel( type=standard_artifacts.ModelBlessing, producer_component_id=self._component_info.component_id, output_key='b') }) self.assertTrue(resolve_result.has_complete_result) self.assertEqual([ a.uri for a in resolve_result.per_key_resolve_result['model'] ], ['model_two']) self.assertTrue(resolve_result.per_key_resolve_state['model'])
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(self._source_data_dir, 'transform/transformed_examples') examples.split_names = artifact_utils.encode_split_names(['train', 'eval']) transform_output = standard_artifacts.TransformGraph() transform_output.uri = os.path.join(self._source_data_dir, 'transform/transform_output') schema = standard_artifacts.Schema() schema.uri = os.path.join(self._source_data_dir, 'schema_gen') previous_model = standard_artifacts.Model() previous_model.uri = os.path.join(self._source_data_dir, 'trainer/previous') self._input_dict = { executor.EXAMPLES_KEY: [examples], executor.TRANSFORM_GRAPH_KEY: [transform_output], executor.SCHEMA_KEY: [schema], executor.BASE_MODEL_KEY: [previous_model] } # Create output dict. self._model_exports = standard_artifacts.Model() self._model_exports.uri = os.path.join(self._output_data_dir, 'model_export_path') self._output_dict = {executor.OUTPUT_MODEL_KEY: [self._model_exports]} # Create exec properties skeleton. self._exec_properties = { 'train_args': json_format.MessageToJson( trainer_pb2.TrainArgs(num_steps=1000), preserving_proto_field_name=True), 'eval_args': json_format.MessageToJson( trainer_pb2.EvalArgs(num_steps=500), preserving_proto_field_name=True), 'warm_starting': False, } self._module_file = os.path.join(self._source_data_dir, 'module_file', 'trainer_module.py') self._trainer_fn = '%s.%s' % (trainer_module.trainer_fn.__module__, trainer_module.trainer_fn.__name__) # Executors for test. self._trainer_executor = executor.Executor() self._generic_trainer_executor = executor.GenericExecutor()
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. train_examples = standard_artifacts.Examples(split='train') train_examples.uri = os.path.join( self._source_data_dir, 'transform/transformed_examples/train/') eval_examples = standard_artifacts.Examples(split='eval') eval_examples.uri = os.path.join( self._source_data_dir, 'transform/transformed_examples/eval/') transform_output = standard_artifacts.TransformGraph() transform_output.uri = os.path.join(self._source_data_dir, 'transform/transform_output/') schema = standard_artifacts.Examples() schema.uri = os.path.join(self._source_data_dir, 'schema_gen/') previous_model = standard_artifacts.Model() previous_model.uri = os.path.join(self._source_data_dir, 'trainer/previous/') self._input_dict = { 'examples': [train_examples, eval_examples], 'transform_output': [transform_output], 'schema': [schema], 'base_model': [previous_model] } # Create output dict. self._model_exports = standard_artifacts.Model() self._model_exports.uri = os.path.join(self._output_data_dir, 'model_export_path') self._output_dict = {'output': [self._model_exports]} # Create exec properties skeleton. self._exec_properties = { 'train_args': json_format.MessageToJson(trainer_pb2.TrainArgs(num_steps=1000), preserving_proto_field_name=True), 'eval_args': json_format.MessageToJson(trainer_pb2.EvalArgs(num_steps=500), preserving_proto_field_name=True), 'warm_starting': False, } self._module_file = os.path.join(self._source_data_dir, 'module_file', 'trainer_module.py') self._trainer_fn = '%s.%s' % (trainer_module.trainer_fn.__module__, trainer_module.trainer_fn.__name__) # Executor for test. self._trainer_executor = executor.Executor()
def testConstructWithBaselineModel(self): examples = standard_artifacts.Examples() model_exports = standard_artifacts.Model() baseline_model = standard_artifacts.Model() evaluator = component.Evaluator( examples=channel_utils.as_channel([examples]), model=channel_utils.as_channel([model_exports]), baseline_model=channel_utils.as_channel([baseline_model])) self.assertEqual(standard_artifacts.ModelEvaluation.TYPE_NAME, evaluator.outputs['evaluation'].type_name)
def testEvalution(self, exec_properties, model_agnostic=False): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) baseline_model = standard_artifacts.Model() baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') schema = standard_artifacts.Schema() schema.uri = os.path.join(source_data_dir, 'schema_gen') input_dict = { EXAMPLES_KEY: [examples], SCHEMA_KEY: [schema], } if not model_agnostic: model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') input_dict[MODEL_KEY] = [model] # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { EVALUATION_KEY: [eval_output], BLESSING_KEY: [blessing_output], } # Test multiple splits. exec_properties[EXAMPLE_SPLITS_KEY] = json_utils.dumps( ['train', 'eval']) if MODULE_FILE_KEY in exec_properties: exec_properties[MODULE_FILE_KEY] = os.path.join( source_data_dir, 'module_file', 'evaluator_module.py') # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( fileio.exists(os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue(fileio.exists(os.path.join(eval_output.uri, 'plots'))) self.assertFalse( fileio.exists(os.path.join(blessing_output.uri, 'BLESSED')))
def testGetQualifiedArtifacts(self): with self.metadata() as m: contexts_one = m.register_pipeline_contexts_if_not_exists( self._pipeline_info) contexts_two = m.register_pipeline_contexts_if_not_exists( self._pipeline_info3) # The first execution, with matched: # - pipeline context # - producer component id m.register_execution( exec_properties={}, pipeline_info=self._pipeline_info, component_info=self._component_info, contexts=list(contexts_one)) # artifact_one will be output with matched artifact type and output key artifact_one = standard_artifacts.Model() # artifact_one will be output with matched artifact type only artifact_two = standard_artifacts.Model() m.publish_execution( component_info=self._component_info, output_artifacts={ 'k1': [artifact_one], 'k2': [artifact_two] }) # The second execution, with matched pipeline context only m.register_execution( exec_properties={}, pipeline_info=self._pipeline_info, component_info=self._component_info2, contexts=list(contexts_one)) # artifact_three will be output with matched artifact type and output key artifact_three = standard_artifacts.Model() m.publish_execution( component_info=self._component_info2, output_artifacts={'k1': [artifact_three]}) # The third execution, with matched producer component id only m.register_execution( exec_properties={}, pipeline_info=self._pipeline_info3, component_info=self._component_info3, contexts=list(contexts_two)) # artifact_three will be output with matched artifact type and output key artifact_four = standard_artifacts.Model() m.publish_execution( component_info=self._component_info3, output_artifacts={'k1': [artifact_four]}) result = m.get_qualified_artifacts( contexts=contexts_one, type_name=standard_artifacts.Model().type_name, producer_component_id=self._component_info.component_id, output_key='k1') self.assertEqual(len(result), 1) self.assertEqual(result[0].artifact.id, artifact_one.id)
def testConstruct(self): input_model = standard_artifacts.Model() output_model = standard_artifacts.Model() this_component = component.Transform( function_name='component_test.pouet', input_model=channel_utils.as_channel([input_model]), output_model=channel_utils.as_channel([output_model ])).with_id(u'Testing123') self.assertEqual(standard_artifacts.Model.TYPE_NAME, this_component.outputs[OUTPUT_MODEL_KEY].type_name) artifact_collection = this_component.outputs[OUTPUT_MODEL_KEY].get() self.assertIsNotNone(artifact_collection)
def testDoLegacySingleEvalSavedModelWFairness(self, exec_properties): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) model = standard_artifacts.Model() baseline_model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') input_dict = { executor.EXAMPLES_KEY: [examples], executor.MODEL_KEY: [model], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') output_dict = {executor.EVALUATION_KEY: [eval_output]} try: # Need to import the following module so that the fairness indicator # post-export metric is registered. This may raise an ImportError if the # currently-installed version of TFMA does not support fairness # indicators. import tensorflow_model_analysis.addons.fairness.post_export_metrics.fairness_indicators # pylint: disable=g-import-not-at-top, unused-variable exec_properties['fairness_indicator_thresholds'] = [ 0.1, 0.3, 0.5, 0.7, 0.9 ] except ImportError: absl.logging.warning( 'Not testing fairness indicators because a compatible TFMA version ' 'is not installed.') # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( tf.io.gfile.exists( os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots')))
def testBuildDummyConsumerWithCondition(self): producer_task_1 = test_utils.dummy_producer_component( output1=channel_utils.as_channel([standard_artifacts.Model()]), param1='value1', ).with_id('producer_task_1') producer_task_2 = test_utils.dummy_producer_component_2( output1=channel_utils.as_channel([standard_artifacts.Model()]), param1='value2', ).with_id('producer_task_2') # This test tests two things: # 1. Nested conditions. The condition string of consumer_task should contain # both predicates. # 2. Implicit channels. consumer_task only takes producer_task_1's output. # But producer_task_2 is used in condition, hence producer_task_2 should # be added to the dependency of consumer_task. # See testdata for detail. with conditional.Cond( producer_task_1.outputs['output1'].future()[0].uri != 'uri'): with conditional.Cond(producer_task_2.outputs['output1'].future() [0].property('property') == 'value1'): consumer_task = test_utils.dummy_consumer_component( input1=producer_task_1.outputs['output1'], param1=1, ) # Need to construct a pipeline to set producer_component_id. unused_pipeline = tfx.dsl.Pipeline( pipeline_name='pipeline-with-condition', pipeline_root='', components=[producer_task_1, producer_task_2, consumer_task], ) deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=consumer_task, image='gcr.io/tensorflow/tfx:latest', deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_consumer_with_condition_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_consumer_with_condition_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_consumer_with_condition_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def setUp(self): super(ExecutorTest, self).setUp() self.component_id = 'test_component' self._model_data_dir = tempfile.mkdtemp() num_classes = 10 input_shape = (28, 28, 1) model = keras.Sequential( [ keras.Input(shape=input_shape), keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), keras.layers.MaxPooling2D(pool_size=(2, 2)), keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), keras.layers.MaxPooling2D(pool_size=(2, 2)), keras.layers.Flatten(), keras.layers.Dropout(0.5), keras.layers.Dense(num_classes, activation="softmax"), ] ) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) model.save(os.path.join(self._model_data_dir, 'Format-Serving')) del model # Create input dict. self._model = standard_artifacts.Model() self._model.uri = self._model_data_dir self._input_dict = { INPUT_MODEL_KEY: [self._model], } # Create output dict. self._output_model = standard_artifacts.Model() self._output_model_dir = os.path.join(tempfile.mkdtemp()) self._output_model.uri = self._output_model_dir self._output_dict_sr = { OUTPUT_MODEL_KEY: [self._output_model], } # Create exe properties. self._exec_properties = { FUNCTION_NAME_KEY: 'tfx_x.components.model.transform.executor.identity', } # Create context self._tmp_dir = os.path.join(self._output_model_dir, '.temp') self._context = executor.Executor.Context( tmp_dir=self._tmp_dir, unique_id='2')
def setUp(self): super().setUp() self._testdata_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self._context = executor.Executor.Context( tmp_dir=self._output_data_dir, unique_id='1') # Create input dict. e1 = standard_artifacts.Examples() e1.uri = os.path.join(self._testdata_dir, 'penguin', 'data') e1.split_names = artifact_utils.encode_split_names(['train', 'eval']) e2 = copy.deepcopy(e1) self._single_artifact = [e1] self._multiple_artifacts = [e1, e2] schema = standard_artifacts.Schema() schema.uri = os.path.join(self._testdata_dir, 'penguin', 'schema') base_model = standard_artifacts.Model() base_model.uri = os.path.join(self._testdata_dir, 'trainer/previous') self._input_dict = { standard_component_specs.EXAMPLES_KEY: self._single_artifact, standard_component_specs.SCHEMA_KEY: [schema], standard_component_specs.BASE_MODEL_KEY: [base_model] } # Create output dict. self._best_hparams = standard_artifacts.Model() self._best_hparams.uri = os.path.join(self._output_data_dir, 'best_hparams') self._output_dict = { standard_component_specs.BEST_HYPERPARAMETERS_KEY: [self._best_hparams], } # Create exec properties. self._exec_properties = { standard_component_specs.TRAIN_ARGS_KEY: proto_utils.proto_to_json(trainer_pb2.TrainArgs(num_steps=100)), standard_component_specs.EVAL_ARGS_KEY: proto_utils.proto_to_json(trainer_pb2.EvalArgs(num_steps=50)), }
def testEvalution(self, exec_properties): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) # Create input dict. examples = standard_artifacts.Examples() examples.uri = os.path.join(source_data_dir, 'csv_example_gen') examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) model = standard_artifacts.Model() baseline_model = standard_artifacts.Model() model.uri = os.path.join(source_data_dir, 'trainer/current') baseline_model.uri = os.path.join(source_data_dir, 'trainer/previous/') schema = standard_artifacts.Schema() schema.uri = os.path.join(source_data_dir, 'schema_gen') input_dict = { constants.EXAMPLES_KEY: [examples], constants.MODEL_KEY: [model], constants.SCHEMA_KEY: [schema], } # Create output dict. eval_output = standard_artifacts.ModelEvaluation() eval_output.uri = os.path.join(output_data_dir, 'eval_output') blessing_output = standard_artifacts.ModelBlessing() blessing_output.uri = os.path.join(output_data_dir, 'blessing_output') output_dict = { constants.EVALUATION_KEY: [eval_output], constants.BLESSING_KEY: [blessing_output], } # Run executor. evaluator = executor.Executor() evaluator.Do(input_dict, output_dict, exec_properties) # Check evaluator outputs. self.assertTrue( tf.io.gfile.exists( os.path.join(eval_output.uri, 'eval_config.json'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'metrics'))) self.assertTrue( tf.io.gfile.exists(os.path.join(eval_output.uri, 'plots'))) self.assertFalse( tf.io.gfile.exists(os.path.join(blessing_output.uri, 'BLESSED')))
def testGetLatestBlessedModelArtifact(self): with metadata.Metadata(connection_config=self._connection_config) as m: contexts = m.register_pipeline_contexts_if_not_exists(self._pipeline_info) # Model with id 1, will be blessed. model_one = standard_artifacts.Model() model_one.uri = 'model_one' m.publish_artifacts([model_one]) # Model with id 2, will be blessed. model_two = standard_artifacts.Model() model_two.uri = 'model_two' m.publish_artifacts([model_two]) # Model with id 3, will not be blessed. model_three = standard_artifacts.Model() model_three.uri = 'model_three' m.publish_artifacts([model_three]) model_blessing_one = standard_artifacts.ModelBlessing() self._set_model_blessing_bit(model_blessing_one, model_one.id, 1) model_blessing_two = standard_artifacts.ModelBlessing() self._set_model_blessing_bit(model_blessing_two, model_two.id, 1) m.publish_artifacts([model_blessing_one, model_blessing_two]) m.register_execution( input_artifacts={ 'a': [model_one, model_two, model_three], 'b': [model_blessing_one, model_blessing_two] }, exec_properties={}, pipeline_info=self._pipeline_info, component_info=self._component_info, contexts=contexts) resolver = latest_blessed_model_resolver.LatestBlessedModelResolver() resolve_result = resolver.resolve( pipeline_info=self._pipeline_info, metadata_handler=m, source_channels={ 'model': types.Channel(type=standard_artifacts.Model), 'model_blessing': types.Channel(type=standard_artifacts.ModelBlessing) }) self.assertTrue(resolve_result.has_complete_result) self.assertEqual([ artifact.uri for artifact in resolve_result.per_key_resolve_result['model'] ], ['model_two']) self.assertTrue(resolve_result.per_key_resolve_state['model'])
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self.component_id = 'test_component' # Create input dict. self._examples = standard_artifacts.Examples(split='unlabelled') self._examples.uri = os.path.join(self._source_data_dir, 'csv_example_gen/unlabelled/') self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._model_blessing = standard_artifacts.ModelBlessing() self._model_blessing.uri = os.path.join(self._source_data_dir, 'model_validator/blessed') self._model_blessing.set_int_custom_property('blessed', 1) self._inference_result = standard_artifacts.InferenceResult() self._prediction_log_dir = os.path.join(self._output_data_dir, 'prediction_logs') self._inference_result.uri = self._prediction_log_dir # Create context self._tmp_dir = os.path.join(self._output_data_dir, '.temp') self._context = executor.Executor.Context(tmp_dir=self._tmp_dir, unique_id='2')
def setUp(self): super(LocalDockerRunnerTest, self).setUp() base_dir = os.path.join( os.path.dirname( # components/ os.path.dirname( # infra_validator/ os.path.dirname(__file__))), # model_server_runners/ 'testdata') self.model = standard_artifacts.Model() self.model.uri = os.path.join(base_dir, 'trainer', 'current') # Mock LocalDockerModelServerRunner._FindAvailablePort self.find_available_port_patcher = mock.patch.object( LocalDockerModelServerRunner, '_FindAvailablePort') self.find_available_port = self.find_available_port_patcher.start() self.find_available_port.return_value = 1234 # Mock docker.DockerClient self.docker_client_patcher = mock.patch('docker.DockerClient') self.docker_client_cls = self.docker_client_patcher.start() self.docker_client = self.docker_client_cls.return_value # Mock client factory self.client_factory = mock.Mock() self.client = self.client_factory.return_value
def testRunExecutor_with_InplaceUpdateExecutor(self): executor_sepc = text_format.Parse( """ class_path: "tfx.orchestration.portable.python_executor_operator_test.InplaceUpdateExecutor" """, executable_spec_pb2.PythonClassExecutableSpec()) operator = python_executor_operator.PythonExecutorOperator( executor_sepc) input_dict = {'input_key': [standard_artifacts.Examples()]} output_dict = {'output_key': [standard_artifacts.Model()]} exec_properties = { 'string': 'value', 'int': 1, 'float': 0.0, # This should not happen on production and will be # dropped. 'proto': execution_result_pb2.ExecutorOutput() } executor_output = operator.run_executor( self._get_execution_info(input_dict, output_dict, exec_properties)) self.assertProtoPartiallyEquals( """ output_artifacts { key: "output_key" value { artifacts { custom_properties { key: "name" value { string_value: "MyPipeline.MyPythonNode.my_model" } } } } }""", executor_output)
def setUp(self): super(LocalDockerRunnerTest, self).setUp() base_dir = os.path.join( os.path.dirname( # components/ os.path.dirname( # infra_validator/ os.path.dirname(__file__))), # model_server_runners/ 'testdata') self._model = standard_artifacts.Model() self._model.uri = os.path.join(base_dir, 'trainer', 'current') self._model_name = 'chicago-taxi' self._model_path = path_utils.serving_model_path(self._model.uri) # Mock docker.DockerClient patcher = mock.patch('docker.DockerClient') self._docker_client = patcher.start().return_value self.addCleanup(patcher.stop) self._serving_spec = _create_serving_spec({ 'tensorflow_serving': { 'tags': ['1.15.0'] }, 'local_docker': {}, 'model_name': self._model_name, }) self._serving_binary = serving_bins.parse_serving_binaries( self._serving_spec)[0] patcher = mock.patch.object(self._serving_binary, 'MakeClient') self._model_server_client = patcher.start().return_value self.addCleanup(patcher.stop)
def testRunExecutor_with_InprocessExecutor(self): executor_sepc = text_format.Parse( """ class_path: "tfx.orchestration.portable.python_executor_operator_test.InprocessExecutor" """, executable_spec_pb2.PythonClassExecutableSpec()) operator = python_executor_operator.PythonExecutorOperator(executor_sepc) input_dict = {'input_key': [standard_artifacts.Examples()]} output_dict = {'output_key': [standard_artifacts.Model()]} exec_properties = {'key': 'value'} stateful_working_dir = os.path.join(self.tmp_dir, 'stateful_working_dir') executor_output_uri = os.path.join(self.tmp_dir, 'executor_output') executor_output = operator.run_executor( base_executor_operator.ExecutionInfo( input_dict=input_dict, output_dict=output_dict, exec_properties=exec_properties, stateful_working_dir=stateful_working_dir, executor_output_uri=executor_output_uri)) self.assertProtoPartiallyEquals(""" execution_properties { key: "key" value { string_value: "value" } } output_artifacts { key: "output_key" value { artifacts { } } }""", executor_output)
def testBuildContainerTask2(self): task = test_utils.dummy_producer_component( output1=channel_utils.as_channel([standard_artifacts.Model()]), param1='value1', ) deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=task, image='gcr.io/tensorflow/tfx:latest', deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) # Same as in testBuildContainerTask self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_container_spec_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_container_spec_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_container_spec_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def testGetCacheContextTwiceDifferentOutputs(self): with metadata.Metadata(connection_config=self._connection_config) as m: self._get_cache_context(m) self._get_cache_context( m, custom_output_artifacts={'k': [standard_artifacts.Model()]}) # Different output skeleton will result in a new cache context. self.assertLen(m.store.get_contexts(), 2)
def testRunExecutor_with_InprocessExecutor(self): executor_sepc = text_format.Parse( """ class_path: "tfx.orchestration.portable.python_executor_operator_test.InprocessExecutor" """, executable_spec_pb2.PythonClassExecutableSpec()) operator = python_executor_operator.PythonExecutorOperator( executor_sepc) input_dict = {'input_key': [standard_artifacts.Examples()]} output_dict = {'output_key': [standard_artifacts.Model()]} exec_properties = {'key': 'value'} executor_output = operator.run_executor( self._get_execution_info(input_dict, output_dict, exec_properties)) self.assertProtoPartiallyEquals( """ execution_properties { key: "key" value { string_value: "value" } } output_artifacts { key: "output_key" value { artifacts { } } }""", executor_output)
def setUp(self): self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self.component_id = 'test_component' # Create input dict. eval_examples = standard_artifacts.Examples(split='eval') eval_examples.uri = os.path.join(self._source_data_dir, 'csv_example_gen/eval/') model = standard_artifacts.Model() model.uri = os.path.join(self._source_data_dir, 'trainer/current/') self._input_dict = { 'examples': [eval_examples], 'model': [model], } # Create output dict. self._blessing = standard_artifacts.ModelBlessing() self._blessing.uri = os.path.join(output_data_dir, 'blessing') self._output_dict = {'blessing': [self._blessing]} # Create context self._tmp_dir = os.path.join(output_data_dir, '.temp') self._context = executor.Executor.Context(tmp_dir=self._tmp_dir, unique_id='2')
def setUp(self): super(ComponentTest, self).setUp() self._examples = channel_utils.as_channel( [standard_artifacts.Examples()]) self._model = channel_utils.as_channel([standard_artifacts.Model()]) self._model_blessing = channel_utils.as_channel( [standard_artifacts.ModelBlessing()])
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) tf.io.gfile.makedirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { executor.MODEL_KEY: [self._model_export], executor.MODEL_BLESSING_KEY: [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') tf.io.gfile.makedirs(self._model_push.uri) self._output_dict = { executor.PUSHED_MODEL_KEY: [self._model_push], } self._serving_model_dir = os.path.join(self._output_data_dir, 'serving_model_dir') tf.io.gfile.makedirs(self._serving_model_dir) self._exec_properties = { 'push_destination': json_format.MessageToJson( pusher_pb2.PushDestination( filesystem=pusher_pb2.PushDestination.Filesystem( base_directory=self._serving_model_dir)), preserving_proto_field_name=True), } self._executor = executor.Executor()
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) fileio.makedirs(self._output_data_dir) self._model_export = standard_artifacts.Model() self._model_export.uri = os.path.join(self._source_data_dir, 'trainer/current') self._model_blessing = standard_artifacts.ModelBlessing() self._input_dict = { MODEL_KEY: [self._model_export], MODEL_BLESSING_KEY: [self._model_blessing], } self._model_push = standard_artifacts.PushedModel() self._model_push.uri = os.path.join(self._output_data_dir, 'model_push') fileio.makedirs(self._model_push.uri) self._output_dict = { PUSHED_MODEL_KEY: [self._model_push], } self._serving_model_dir = os.path.join(self._output_data_dir, 'serving_model_dir') fileio.makedirs(self._serving_model_dir) self._exec_properties = self._MakeExecProperties() self._executor = executor.Executor()