def testDoWithBlessedModel(self): input_dict = { 'examples': [self._examples], 'model_export': [self._model_export], 'model_blessing': [self._model_blessing], } output_dict = { 'output': [self._inference_result], } # Create exe properties. exec_properties = { 'data_spec': json_format.MessageToJson(bulk_inferrer_pb2.DataSpec()), 'model_spec': json_format.MessageToJson(bulk_inferrer_pb2.ModelSpec()), 'component_id': self.component_id, } # Run executor. bulk_inferrer = executor.Executor(self._context) bulk_inferrer.Do(input_dict, output_dict, exec_properties) # Check outputs. self.assertTrue(tf.io.gfile.exists(self._prediction_log_dir)) results = self._get_results(self._prediction_log_dir) self.assertTrue(results) self.assertEqual( len(results[0].classify_log.response.result.classifications), 1) self.assertEqual( len(results[0].classify_log.response.result.classifications[0]. classes), 2)
def __init__(self, examples: types.Channel = None, model: Optional[types.Channel] = None, model_blessing: Optional[types.Channel] = None, data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec, Dict[Text, Any]]] = None, model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec, Dict[Text, Any]]] = None, output_example_spec: Optional[Union[ bulk_inferrer_pb2.OutputExampleSpec, Dict[Text, Any]]] = None): """Construct an BulkInferrer component. Args: examples: A Channel of type `standard_artifacts.Examples`, usually produced by an ExampleGen component. _required_ model: A Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. model_blessing: A Channel of type `standard_artifacts.ModelBlessing`, usually produced by a ModelValidator component. data_spec: bulk_inferrer_pb2.DataSpec instance that describes data selection. If any field is provided as a RuntimeParameter, data_spec should be constructed as a dict with the same field names as DataSpec proto message. model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model specification. If any field is provided as a RuntimeParameter, model_spec should be constructed as a dict with the same field names as ModelSpec proto message. output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify if you want BulkInferrer to output examples instead of inference result. If any field is provided as a RuntimeParameter, output_example_spec should be constructed as a dict with the same field names as OutputExampleSpec proto message. """ if output_example_spec: output_examples = types.Channel(type=standard_artifacts.Examples) inference_result = None else: inference_result = types.Channel( type=standard_artifacts.InferenceResult) output_examples = None spec = BulkInferrerSpec(examples=examples, model=model, model_blessing=model_blessing, data_spec=data_spec or bulk_inferrer_pb2.DataSpec(), model_spec=model_spec or bulk_inferrer_pb2.ModelSpec(), output_example_spec=output_example_spec, inference_result=inference_result, output_examples=output_examples) super(BulkInferrer, self).__init__(spec=spec)
def _get_inference_spec( self, model_path: Text, exec_properties: Dict[Text, Any]) -> model_spec_pb2.InferenceSpecType: model_spec = bulk_inferrer_pb2.ModelSpec() proto_utils.json_to_proto(exec_properties['model_spec'], model_spec) saved_model_spec = model_spec_pb2.SavedModelSpec( model_path=model_path, tag=model_spec.tag, signature_name=model_spec.model_signature_name) result = model_spec_pb2.InferenceSpecType() result.saved_model_spec.CopyFrom(saved_model_spec) return result
def _get_inference_spec( self, model_path: str, exec_properties: Dict[str, Any]) -> model_spec_pb2.InferenceSpecType: model_spec = bulk_inferrer_pb2.ModelSpec() proto_utils.json_to_proto( exec_properties[standard_component_specs.MODEL_SPEC_KEY], model_spec) saved_model_spec = model_spec_pb2.SavedModelSpec( model_path=model_path, tag=model_spec.tag, signature_name=model_spec.model_signature_name) result = model_spec_pb2.InferenceSpecType() result.saved_model_spec.CopyFrom(saved_model_spec) return result
def __init__(self, examples: types.Channel = None, model: Optional[types.Channel] = None, model_blessing: Optional[types.Channel] = None, data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec, Dict[Text, Any]]] = None, model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec, Dict[Text, Any]]] = None, inference_result: Optional[types.Channel] = None, instance_name: Optional[Text] = None, enable_cache: Optional[bool] = None): """Construct an BulkInferrer component. Args: examples: A Channel of type `standard_artifacts.Examples`, usually produced by an ExampleGen component. _required_ model: A Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. model_blessing: A Channel of type `standard_artifacts.ModelBlessing`, usually produced by a ModelValidator component. data_spec: bulk_inferrer_pb2.DataSpec instance that describes data selection. If any field is provided as a RuntimeParameter, data_spec should be constructed as a dict with the same field names as DataSpec proto message. model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model specification. If any field is provided as a RuntimeParameter, model_spec should be constructed as a dict with the same field names as ModelSpec proto message. inference_result: Channel of type `standard_artifacts.InferenceResult` to store the inference results. instance_name: Optional name assigned to this specific instance of BulkInferrer. Required only if multiple BulkInferrer components are declared in the same pipeline. enable_cache: Optional boolean to indicate if cache is enabled for the BulkInferrer component. If not specified, defaults to the value specified for pipeline's enable_cache parameter. """ inference_result = inference_result or types.Channel( type=standard_artifacts.InferenceResult, artifacts=[standard_artifacts.InferenceResult()]) spec = BulkInferrerSpec(examples=examples, model=model, model_blessing=model_blessing, data_spec=data_spec or bulk_inferrer_pb2.DataSpec(), model_spec=model_spec or bulk_inferrer_pb2.ModelSpec(), inference_result=inference_result) super(BulkInferrer, self).__init__(spec=spec, instance_name=instance_name, enable_cache=enable_cache)
def __init__( self, examples: types.BaseChannel, model: Optional[types.BaseChannel] = None, model_blessing: Optional[types.BaseChannel] = None, data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec, data_types.RuntimeParameter]] = None, model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec, data_types.RuntimeParameter]] = None, output_example_spec: Optional[ Union[bulk_inferrer_pb2.OutputExampleSpec, data_types.RuntimeParameter]] = None): """Construct an BulkInferrer component. Args: examples: A BaseChannel of type `standard_artifacts.Examples`, usually produced by an ExampleGen component. _required_ model: A BaseChannel of type `standard_artifacts.Model`, usually produced by a Trainer component. model_blessing: A BaseChannel of type `standard_artifacts.ModelBlessing`, usually produced by a ModelValidator component. data_spec: bulk_inferrer_pb2.DataSpec instance that describes data selection. model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model specification. output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify if you want BulkInferrer to output examples instead of inference result. """ if output_example_spec: output_examples = types.Channel(type=standard_artifacts.Examples) inference_result = None else: inference_result = types.Channel( type=standard_artifacts.InferenceResult) output_examples = None spec = standard_component_specs.BulkInferrerSpec( examples=examples, model=model, model_blessing=model_blessing, data_spec=data_spec or bulk_inferrer_pb2.DataSpec(), model_spec=model_spec or bulk_inferrer_pb2.ModelSpec(), output_example_spec=output_example_spec, inference_result=inference_result, output_examples=output_examples) super().__init__(spec=spec)
def __init__(self, examples: types.Channel = None, model_export: Optional[types.Channel] = None, model_blessing: Optional[types.Channel] = None, model_push: Optional[types.Channel] = None, data_spec: Optional[bulk_inferrer_pb2.DataSpec] = None, model_spec: Optional[bulk_inferrer_pb2.ModelSpec] = None, output: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct an BulkInferrer component. Args: examples: A Channel of 'ExamplesPath' type, usually produced by ExampleGen component. _required_ model_export: A Channel of 'ModelExportPath' type, usually produced by Trainer component. model_blessing: A Channel of 'ModelBlessingPath' type, usually produced by Model Validator component. model_push: A Channel of 'PushedModel' type, usually produced by Pusher component. data_spec: bulk_inferrer_pb2.DataSpec instance that describes data selection. model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model specification. output: Channel of `InferenceResult` to store the inference results. instance_name: Optional name assigned to this specific instance of BulkInferrer. Required only if multiple BulkInferrer components are declared in the same pipeline. """ output = output or types.Channel( type=standard_artifacts.InferenceResult, artifacts=[standard_artifacts.InferenceResult()]) spec = BulkInferrerSpec(examples=examples, model_export=model_export, model_blessing=model_blessing, model_push=model_push, data_spec=data_spec or bulk_inferrer_pb2.DataSpec(), model_spec=model_spec or bulk_inferrer_pb2.ModelSpec(), output=output) super(BulkInferrer, self).__init__(spec=spec, instance_name=instance_name)
def generate_pipeline(pipeline_name, pipeline_root, train_data, test_data, train_steps, eval_steps, pusher_target, runner): module_file = 'util.py' # util.py is a file in the same folder # RuntimeParameter is only supported on KubeflowDagRunner currently if runner == 'kubeflow': pipeline_root_param = os.path.join('gs://{{kfp-default-bucket}}', pipeline_name, '{{workflow.uid}}') train_data_param = data_types.RuntimeParameter( name='train-data', default= 'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/train', ptype=Text) test_data_param = data_types.RuntimeParameter( name='test-data', default= 'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/test', ptype=Text) pusher_target_param = data_types.RuntimeParameter( name='pusher-destination', default= 'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/serving', ptype=Text) else: pipeline_root_param = pipeline_root train_data_param = train_data test_data_param = test_data pusher_target_param = pusher_target examples = external_input(train_data_param) example_gen = CsvExampleGen(input=examples, instance_name="train") test_examples = external_input(test_data_param) test_example_gen = CsvExampleGen(input=test_examples, output_config={ 'split_config': { 'splits': [{ 'name': 'test', 'hash_buckets': 1 }] } }, instance_name="test") statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True ) # infer_feature_shape controls sparse or dense # Transform is too slow in my side. transform = Transform(examples=example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) trainer = Trainer( custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor), examples=transform.outputs['transformed_examples'], transform_graph=transform.outputs['transform_graph'], schema=schema_gen.outputs['schema'], module_file=module_file, train_args=trainer_pb2.TrainArgs(num_steps=train_steps), eval_args=trainer_pb2.EvalArgs(num_steps=eval_steps), instance_name="train", enable_cache=False) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute a evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(label_key='target')], # tfma.SlicingSpec(feature_keys=['var_0', 'var_1']) when add more, Evaluator can't ouptput BLESSED status. It should be a bug in TFMA. slicing_specs=[tfma.SlicingSpec()], metrics_specs=[ tfma.MetricsSpec( thresholds={ 'binary_accuracy': tfma.config.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.4}), change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10})) }) ]) evaluator = Evaluator( examples=example_gen.outputs['examples'], model=trainer.outputs['model'], # baseline_model=model_resolver.outputs['model'], # Change threshold will be ignored if there is no baseline (first run). eval_config=eval_config, instance_name="eval5") # Checks whether the model passed the validation steps and pushes the model # to a file destination if check passed. pusher = Pusher(model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], push_destination={ 'filesystem': { 'base_directory': pusher_target_param } }) bulk_inferrer = BulkInferrer( examples=test_example_gen.outputs['examples'], model=trainer.outputs['model'], # model_blessing=evaluator.outputs['blessing'], data_spec=bulk_inferrer_pb2.DataSpec(), model_spec=bulk_inferrer_pb2.ModelSpec(), instance_name="bulkInferrer") hello = component.HelloComponent( input_data=bulk_inferrer.outputs['inference_result'], instance_name='csvGen') return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root_param, components=[ example_gen, statistics_gen, schema_gen, transform, trainer, model_resolver, evaluator, pusher, hello, test_example_gen, bulk_inferrer ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( os.path.join(pipeline_root, 'metadata.sqlite')), beam_pipeline_args=['--direct_num_workers=0'])
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, training_data_root: Text, inference_data_root: Text, module_file: Text, metadata_path: Text, direct_num_workers: int) -> pipeline.Pipeline: """Implements the chicago taxi pipeline with TFX.""" training_examples = external_input(training_data_root) # Brings training data into the pipeline or otherwise joins/converts # training data. training_example_gen = CsvExampleGen( input_base=training_examples, instance_name='training_example_gen') # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen( input_data=training_example_gen.outputs['examples']) # Generates schema based on statistics files. infer_schema = SchemaGen( statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False) # Performs anomaly detection based on statistics and data schema. validate_stats = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=infer_schema.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform( examples=training_example_gen.outputs['examples'], schema=infer_schema.outputs['schema'], module_file=module_file) # Uses user-provided Python function that implements a model using TF-Learn. trainer = Trainer( module_file=module_file, transformed_examples=transform.outputs['transformed_examples'], schema=infer_schema.outputs['schema'], transform_graph=transform.outputs['transform_graph'], train_args=trainer_pb2.TrainArgs(num_steps=10000), eval_args=trainer_pb2.EvalArgs(num_steps=5000)) # Uses TFMA to compute a evaluation statistics over features of a model. model_analyzer = Evaluator( examples=training_example_gen.outputs['examples'], model_exports=trainer.outputs['model'], feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[ evaluator_pb2.SingleSlicingSpec( column_for_slicing=['trip_start_hour']) ])) # Performs quality validation of a candidate model (compared to a baseline). model_validator = ModelValidator( examples=training_example_gen.outputs['examples'], model=trainer.outputs['model']) inference_examples = external_input(inference_data_root) # Brings inference data into the pipeline. inference_example_gen = CsvExampleGen( input_base=inference_examples, output_config=example_gen_pb2.Output( split_config=example_gen_pb2.SplitConfig( splits=[example_gen_pb2.SplitConfig.Split( name='unlabelled', hash_buckets=100)])), instance_name='inference_example_gen') # Performs offline batch inference over inference examples. bulk_inferrer = BulkInferrer( examples=inference_example_gen.outputs['examples'], model=trainer.outputs['model'], model_blessing=model_validator.outputs['blessing'], # Empty data_spec.example_splits will result in using all splits. data_spec=bulk_inferrer_pb2.DataSpec(), model_spec=bulk_inferrer_pb2.ModelSpec()) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ training_example_gen, inference_example_gen, statistics_gen, infer_schema, validate_stats, transform, trainer, model_analyzer, model_validator, bulk_inferrer ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), # TODO(b/141578059): The multi-processing API might change. beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers])
def _create_pipeline(pipeline_name: Text, pipeline_root: Text, training_data_root: Text, inference_data_root: Text, module_file: Text, metadata_path: Text, beam_pipeline_args: List[Text]) -> pipeline.Pipeline: """Implements the chicago taxi pipeline with TFX.""" # Brings training data into the pipeline or otherwise joins/converts # training data. training_example_gen = CsvExampleGen(input_base=training_data_root, instance_name='training_example_gen') # Computes statistics over data for visualization and example validation. statistics_gen = StatisticsGen( input_data=training_example_gen.outputs['examples']) # Generates schema based on statistics files. schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=False) # Performs anomaly detection based on statistics and data schema. example_validator = ExampleValidator( statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema']) # Performs transformations and feature engineering in training and serving. transform = Transform(examples=training_example_gen.outputs['examples'], schema=schema_gen.outputs['schema'], module_file=module_file) # Uses user-provided Python function that implements a model using TF-Learn. trainer = Trainer( module_file=module_file, transformed_examples=transform.outputs['transformed_examples'], schema=schema_gen.outputs['schema'], transform_graph=transform.outputs['transform_graph'], train_args=trainer_pb2.TrainArgs(num_steps=10000), eval_args=trainer_pb2.EvalArgs(num_steps=5000)) # Get the latest blessed model for model validation. model_resolver = ResolverNode( instance_name='latest_blessed_model_resolver', resolver_class=latest_blessed_model_resolver. LatestBlessedModelResolver, model=Channel(type=Model), model_blessing=Channel(type=ModelBlessing)) # Uses TFMA to compute a evaluation statistics over features of a model and # perform quality validation of a candidate model (compared to a baseline). eval_config = tfma.EvalConfig( model_specs=[tfma.ModelSpec(signature_name='eval')], slicing_specs=[ tfma.SlicingSpec(), tfma.SlicingSpec(feature_keys=['trip_start_hour']) ], metrics_specs=[ tfma.MetricsSpec( thresholds={ 'accuracy': tfma.config.MetricThreshold( value_threshold=tfma.GenericValueThreshold( lower_bound={'value': 0.6}), # Change threshold will be ignored if there is no # baseline model resolved from MLMD (first run). change_threshold=tfma.GenericChangeThreshold( direction=tfma.MetricDirection.HIGHER_IS_BETTER, absolute={'value': -1e-10})) }) ]) evaluator = Evaluator(examples=training_example_gen.outputs['examples'], model=trainer.outputs['model'], baseline_model=model_resolver.outputs['model'], eval_config=eval_config) # Brings inference data into the pipeline. inference_example_gen = CsvExampleGen( input_base=inference_data_root, output_config=example_gen_pb2.Output( split_config=example_gen_pb2.SplitConfig(splits=[ example_gen_pb2.SplitConfig.Split(name='unlabelled', hash_buckets=100) ])), instance_name='inference_example_gen') # Performs offline batch inference over inference examples. bulk_inferrer = BulkInferrer( examples=inference_example_gen.outputs['examples'], model=trainer.outputs['model'], model_blessing=evaluator.outputs['blessing'], # Empty data_spec.example_splits will result in using all splits. data_spec=bulk_inferrer_pb2.DataSpec(), model_spec=bulk_inferrer_pb2.ModelSpec()) return pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=[ training_example_gen, inference_example_gen, statistics_gen, schema_gen, example_validator, transform, trainer, model_resolver, evaluator, bulk_inferrer ], enable_cache=True, metadata_connection_config=metadata.sqlite_metadata_connection_config( metadata_path), beam_pipeline_args=beam_pipeline_args)
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs batch inference on a given model with given input examples. Args: input_dict: Input dict from input key to a list of Artifacts. - examples: examples for inference. - model: exported model. - model_blessing: model blessing result output_dict: Output dict from output key to a list of Artifacts. - output: bulk inference results. exec_properties: A dict of execution properties. - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance. - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) if 'examples' not in input_dict: raise ValueError('\'examples\' is missing in input dict.') if 'inference_result' not in output_dict: raise ValueError('\'inference_result\' is missing in output dict.') output = artifact_utils.get_single_instance( output_dict['inference_result']) if 'model' not in input_dict: raise ValueError('Input models are not valid, model ' 'need to be specified.') if 'model_blessing' in input_dict: model_blessing = artifact_utils.get_single_instance( input_dict['model_blessing']) if not model_utils.is_model_blessed(model_blessing): output.set_int_custom_property('inferred', 0) logging.info('Model on %s was not blessed', model_blessing.uri) return else: logging.info( 'Model blessing is not provided, exported model will be ' 'used.') model = artifact_utils.get_single_instance(input_dict['model']) model_path = path_utils.serving_model_path(model.uri) logging.info('Use exported model from %s.', model_path) data_spec = bulk_inferrer_pb2.DataSpec() json_format.Parse(exec_properties['data_spec'], data_spec) example_uris = {} if data_spec.example_splits: for example in input_dict['examples']: for split in artifact_utils.decode_split_names( example.split_names): if split in data_spec.example_splits: example_uris[split] = os.path.join(example.uri, split) else: for example in input_dict['examples']: for split in artifact_utils.decode_split_names( example.split_names): example_uris[split] = os.path.join(example.uri, split) model_spec = bulk_inferrer_pb2.ModelSpec() json_format.Parse(exec_properties['model_spec'], model_spec) output_path = os.path.join(output.uri, _PREDICTION_LOGS_DIR_NAME) self._run_model_inference(model_path, example_uris, output_path, model_spec) logging.info('BulkInferrer generates prediction log to %s', output_path) output.set_int_custom_property('inferred', 1)
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') self._output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) self.component_id = 'test_component' # Create input dict. self._examples = standard_artifacts.Examples() unlabelled_path = os.path.join(self._source_data_dir, 'csv_example_gen', 'unlabelled') self._examples.uri = os.path.join(self._output_data_dir, 'csv_example_gen') io_utils.copy_dir(unlabelled_path, os.path.join(self._examples.uri, 'unlabelled')) io_utils.copy_dir(unlabelled_path, os.path.join(self._examples.uri, 'unlabelled2')) self._examples.split_names = artifact_utils.encode_split_names( ['unlabelled', 'unlabelled2']) self._model = standard_artifacts.Model() self._model.uri = os.path.join(self._source_data_dir, 'trainer/current') self._model_blessing = standard_artifacts.ModelBlessing() self._model_blessing.uri = os.path.join(self._source_data_dir, 'model_validator/blessed') self._model_blessing.set_int_custom_property('blessed', 1) self._input_dict = { 'examples': [self._examples], 'model': [self._model], 'model_blessing': [self._model_blessing], } # Create output dict. self._inference_result = standard_artifacts.InferenceResult() self._prediction_log_dir = os.path.join(self._output_data_dir, 'prediction_logs') self._inference_result.uri = self._prediction_log_dir self._output_examples = standard_artifacts.Examples() self._output_examples_dir = os.path.join(self._output_data_dir, 'output_examples') self._output_examples.uri = self._output_examples_dir self._output_dict_ir = { 'inference_result': [self._inference_result], } self._output_dict_oe = { 'output_examples': [self._output_examples], } # Create exe properties. self._exec_properties = { 'data_spec': proto_utils.proto_to_json(bulk_inferrer_pb2.DataSpec()), 'model_spec': proto_utils.proto_to_json(bulk_inferrer_pb2.ModelSpec()), 'component_id': self.component_id, } # Create context self._tmp_dir = os.path.join(self._output_data_dir, '.temp') self._context = executor.Executor.Context(tmp_dir=self._tmp_dir, unique_id='2')
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs batch inference on a given model with given input examples. Args: input_dict: Input dict from input key to a list of Artifacts. - examples: examples for inference. - model: exported model. - model_blessing: model blessing result, optional. output_dict: Output dict from output key to a list of Artifacts. - output: bulk inference results. exec_properties: A dict of execution properties. - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance. - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) source = exec_properties[StepKeys.SOURCE] args = exec_properties[StepKeys.ARGS] c = source_utils.load_source_path_class(source) inferrer_step: BaseInferrer = c(**args) output_examples = artifact_utils.get_single_instance( output_dict[PREDICTIONS]) if EXAMPLES not in input_dict: raise ValueError('\'examples\' is missing in input dict.') if MODEL not in input_dict: raise ValueError('Input models are not valid, model ' 'need to be specified.') if MODEL_BLESSING in input_dict: model_blessing = artifact_utils.get_single_instance( input_dict['model_blessing']) if not model_utils.is_model_blessed(model_blessing): logging.info('Model on %s was not blessed', model_blessing.uri) return else: logging.info( 'Model blessing is not provided, exported model will be ' 'used.') model = artifact_utils.get_single_instance(input_dict[MODEL]) model_path = path_utils.serving_model_path(model.uri) logging.info('Use exported model from %s.', model_path) output_example_spec = bulk_inferrer_pb2.OutputExampleSpec( output_columns_spec=[ bulk_inferrer_pb2.OutputColumnsSpec( predict_output=bulk_inferrer_pb2.PredictOutput( output_columns=[ bulk_inferrer_pb2.PredictOutputCol( output_key=x, output_column=f'{x}_label', ) for x in inferrer_step.get_labels() ])) ]) model_spec = bulk_inferrer_pb2.ModelSpec() saved_model_spec = model_spec_pb2.SavedModelSpec( model_path=model_path, tag=model_spec.tag, signature_name=model_spec.model_signature_name) inference_spec = model_spec_pb2.InferenceSpecType() inference_spec.saved_model_spec.CopyFrom(saved_model_spec) self._run_model_inference(output_example_spec, input_dict[EXAMPLES], output_examples, inference_spec, inferrer_step)
def __init__(self, examples: types.Channel = None, model: Optional[types.Channel] = None, model_blessing: Optional[types.Channel] = None, data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec, Dict[Text, Any]]] = None, model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec, Dict[Text, Any]]] = None, output_example_spec: Optional[Union[ bulk_inferrer_pb2.OutputExampleSpec, Dict[Text, Any]]] = None, inference_result: Optional[types.Channel] = None, output_examples: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct an BulkInferrer component. Args: examples: A Channel of type `standard_artifacts.Examples`, usually produced by an ExampleGen component. _required_ model: A Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. model_blessing: A Channel of type `standard_artifacts.ModelBlessing`, usually produced by a ModelValidator component. data_spec: bulk_inferrer_pb2.DataSpec instance that describes data selection. If any field is provided as a RuntimeParameter, data_spec should be constructed as a dict with the same field names as DataSpec proto message. model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model specification. If any field is provided as a RuntimeParameter, model_spec should be constructed as a dict with the same field names as ModelSpec proto message. output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify if you want BulkInferrer to output examples instead of inference result. If any field is provided as a RuntimeParameter, output_example_spec should be constructed as a dict with the same field names as OutputExampleSpec proto message. inference_result: Channel of type `standard_artifacts.InferenceResult` to store the inference results, must not be specified when output_example_spec is set. output_examples: Channel of type `standard_artifacts.Examples` to store the output examples, must not be specified when output_example_spec is unset. Check output_example_spec for details. instance_name: Optional name assigned to this specific instance of BulkInferrer. Required only if multiple BulkInferrer components are declared in the same pipeline. Raises: ValueError: Must not specify inference_result or output_examples depends on whether output_example_spec is set or not. """ if output_example_spec: if inference_result: raise ValueError( 'Must not specify inference_result when output_example_spec is set.' ) output_examples = output_examples or types.Channel( type=standard_artifacts.Examples) else: if output_examples: raise ValueError( 'Must not specify output_examples when output_example_spec is unset.' ) inference_result = inference_result or types.Channel( type=standard_artifacts.InferenceResult) spec = BulkInferrerSpec(examples=examples, model=model, model_blessing=model_blessing, data_spec=data_spec or bulk_inferrer_pb2.DataSpec(), model_spec=model_spec or bulk_inferrer_pb2.ModelSpec(), output_example_spec=output_example_spec, inference_result=inference_result, output_examples=output_examples) super(BulkInferrer, self).__init__(spec=spec, instance_name=instance_name)
def Do(self, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Runs batch inference on a given model with given input examples. Args: input_dict: Input dict from input key to a list of Artifacts. - examples: examples for inference. - model_export: exported model. - model_blessing: model blessing result - model_push: pushed model Either model_push or (model_export and model_blessing) need to present. output_dict: Output dict from output key to a list of Artifacts. - output: bulk inference results. exec_properties: A dict of execution properties. - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance. - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance. Returns: None """ self._log_startup(input_dict, output_dict, exec_properties) if 'examples' not in input_dict: raise ValueError('\'examples\' is missing in input dict.') if 'output' not in output_dict: raise ValueError('\'output\' is missing in output dict.') output = artifact_utils.get_single_instance(output_dict['output']) if 'model_push' in input_dict: model_push = artifact_utils.get_single_instance( input_dict['model_push']) model_path = io_utils.get_only_uri_in_dir(model_push.uri) logging.info('Use pushed model from %s.', model_path) elif 'model_blessing' in input_dict and 'model_export' in input_dict: model_blessing = artifact_utils.get_single_instance( input_dict['model_blessing']) if not model_utils.is_model_blessed(model_blessing): output.set_int_custom_property('inferred', 0) logging.info('Model on %s was not blessed', model_blessing.uri) return model_export = artifact_utils.get_single_instance( input_dict['model_export']) model_path = path_utils.serving_model_path(model_export.uri) logging.info('Use exported model from %s.', model_path) else: raise ValueError( 'Input models are not valid. Either model_push or ' '(model_blessing and model_export) need to be ' 'specified.') data_spec = bulk_inferrer_pb2.DataSpec() json_format.Parse(exec_properties['data_spec'], data_spec) example_uris = {} if data_spec.example_splits: for example in input_dict['examples']: if example.split in data_spec.example_splits: example_uris[example.split] = example.uri else: for example in input_dict['examples']: example_uris[example.split] = example.uri model_spec = bulk_inferrer_pb2.ModelSpec() json_format.Parse(exec_properties['model_spec'], model_spec) output_path = os.path.join(output.uri, _PREDICTION_LOGS_DIR_NAME) self._run_model_inference(model_path, example_uris, output_path, model_spec) logging.info('BulkInferrer generates prediction log to %s', output_path) output.set_int_custom_property('inferred', 1)