Exemple #1
0
    def testDoWithBlessedModel(self):
        input_dict = {
            'examples': [self._examples],
            'model_export': [self._model_export],
            'model_blessing': [self._model_blessing],
        }
        output_dict = {
            'output': [self._inference_result],
        }
        # Create exe properties.
        exec_properties = {
            'data_spec':
            json_format.MessageToJson(bulk_inferrer_pb2.DataSpec()),
            'model_spec':
            json_format.MessageToJson(bulk_inferrer_pb2.ModelSpec()),
            'component_id': self.component_id,
        }

        # Run executor.
        bulk_inferrer = executor.Executor(self._context)
        bulk_inferrer.Do(input_dict, output_dict, exec_properties)

        # Check outputs.
        self.assertTrue(tf.io.gfile.exists(self._prediction_log_dir))
        results = self._get_results(self._prediction_log_dir)
        self.assertTrue(results)
        self.assertEqual(
            len(results[0].classify_log.response.result.classifications), 1)
        self.assertEqual(
            len(results[0].classify_log.response.result.classifications[0].
                classes), 2)
Exemple #2
0
    def __init__(self,
                 examples: types.Channel = None,
                 model: Optional[types.Channel] = None,
                 model_blessing: Optional[types.Channel] = None,
                 data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                           Dict[Text, Any]]] = None,
                 model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec,
                                            Dict[Text, Any]]] = None,
                 output_example_spec: Optional[Union[
                     bulk_inferrer_pb2.OutputExampleSpec, Dict[Text,
                                                               Any]]] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A Channel of type `standard_artifacts.Model`, usually produced by
        a Trainer component.
      model_blessing: A Channel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection. If any field is provided as a RuntimeParameter, data_spec
        should be constructed as a dict with the same field names as DataSpec
        proto message.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification. If any field is provided as a RuntimeParameter,
        model_spec should be constructed as a dict with the same field names as
        ModelSpec proto message.
      output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify
        if you want BulkInferrer to output examples instead of inference result.
        If any field is provided as a RuntimeParameter, output_example_spec
        should be constructed as a dict with the same field names as
        OutputExampleSpec proto message.
    """
        if output_example_spec:
            output_examples = types.Channel(type=standard_artifacts.Examples)
            inference_result = None
        else:
            inference_result = types.Channel(
                type=standard_artifacts.InferenceResult)
            output_examples = None

        spec = BulkInferrerSpec(examples=examples,
                                model=model,
                                model_blessing=model_blessing,
                                data_spec=data_spec
                                or bulk_inferrer_pb2.DataSpec(),
                                model_spec=model_spec
                                or bulk_inferrer_pb2.ModelSpec(),
                                output_example_spec=output_example_spec,
                                inference_result=inference_result,
                                output_examples=output_examples)
        super(BulkInferrer, self).__init__(spec=spec)
Exemple #3
0
 def _get_inference_spec(
     self, model_path: Text,
     exec_properties: Dict[Text, Any]) -> model_spec_pb2.InferenceSpecType:
   model_spec = bulk_inferrer_pb2.ModelSpec()
   proto_utils.json_to_proto(exec_properties['model_spec'], model_spec)
   saved_model_spec = model_spec_pb2.SavedModelSpec(
       model_path=model_path,
       tag=model_spec.tag,
       signature_name=model_spec.model_signature_name)
   result = model_spec_pb2.InferenceSpecType()
   result.saved_model_spec.CopyFrom(saved_model_spec)
   return result
Exemple #4
0
 def _get_inference_spec(
     self, model_path: str,
     exec_properties: Dict[str, Any]) -> model_spec_pb2.InferenceSpecType:
   model_spec = bulk_inferrer_pb2.ModelSpec()
   proto_utils.json_to_proto(
       exec_properties[standard_component_specs.MODEL_SPEC_KEY], model_spec)
   saved_model_spec = model_spec_pb2.SavedModelSpec(
       model_path=model_path,
       tag=model_spec.tag,
       signature_name=model_spec.model_signature_name)
   result = model_spec_pb2.InferenceSpecType()
   result.saved_model_spec.CopyFrom(saved_model_spec)
   return result
Exemple #5
0
    def __init__(self,
                 examples: types.Channel = None,
                 model: Optional[types.Channel] = None,
                 model_blessing: Optional[types.Channel] = None,
                 data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                           Dict[Text, Any]]] = None,
                 model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec,
                                            Dict[Text, Any]]] = None,
                 inference_result: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None,
                 enable_cache: Optional[bool] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A Channel of type `standard_artifacts.Model`, usually produced by
        a Trainer component.
      model_blessing: A Channel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection. If any field is provided as a RuntimeParameter, data_spec
        should be constructed as a dict with the same field names as DataSpec
        proto message.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification. If any field is provided as a RuntimeParameter,
        model_spec should be constructed as a dict with the same field names as
        ModelSpec proto message.
      inference_result: Channel of type `standard_artifacts.InferenceResult`
        to store the inference results.
      instance_name: Optional name assigned to this specific instance of
        BulkInferrer. Required only if multiple BulkInferrer components are
        declared in the same pipeline.
      enable_cache: Optional boolean to indicate if cache is enabled for the
        BulkInferrer component. If not specified, defaults to the value
        specified for pipeline's enable_cache parameter.
    """
        inference_result = inference_result or types.Channel(
            type=standard_artifacts.InferenceResult,
            artifacts=[standard_artifacts.InferenceResult()])
        spec = BulkInferrerSpec(examples=examples,
                                model=model,
                                model_blessing=model_blessing,
                                data_spec=data_spec
                                or bulk_inferrer_pb2.DataSpec(),
                                model_spec=model_spec
                                or bulk_inferrer_pb2.ModelSpec(),
                                inference_result=inference_result)
        super(BulkInferrer, self).__init__(spec=spec,
                                           instance_name=instance_name,
                                           enable_cache=enable_cache)
Exemple #6
0
    def __init__(
        self,
        examples: types.BaseChannel,
        model: Optional[types.BaseChannel] = None,
        model_blessing: Optional[types.BaseChannel] = None,
        data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                  data_types.RuntimeParameter]] = None,
        model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec,
                                   data_types.RuntimeParameter]] = None,
        output_example_spec: Optional[
            Union[bulk_inferrer_pb2.OutputExampleSpec,
                  data_types.RuntimeParameter]] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A BaseChannel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A BaseChannel of type `standard_artifacts.Model`, usually produced
        by a Trainer component.
      model_blessing: A BaseChannel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification.
      output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify
        if you want BulkInferrer to output examples instead of inference result.
    """
        if output_example_spec:
            output_examples = types.Channel(type=standard_artifacts.Examples)
            inference_result = None
        else:
            inference_result = types.Channel(
                type=standard_artifacts.InferenceResult)
            output_examples = None

        spec = standard_component_specs.BulkInferrerSpec(
            examples=examples,
            model=model,
            model_blessing=model_blessing,
            data_spec=data_spec or bulk_inferrer_pb2.DataSpec(),
            model_spec=model_spec or bulk_inferrer_pb2.ModelSpec(),
            output_example_spec=output_example_spec,
            inference_result=inference_result,
            output_examples=output_examples)
        super().__init__(spec=spec)
Exemple #7
0
    def __init__(self,
                 examples: types.Channel = None,
                 model_export: Optional[types.Channel] = None,
                 model_blessing: Optional[types.Channel] = None,
                 model_push: Optional[types.Channel] = None,
                 data_spec: Optional[bulk_inferrer_pb2.DataSpec] = None,
                 model_spec: Optional[bulk_inferrer_pb2.ModelSpec] = None,
                 output: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A Channel of 'ExamplesPath' type, usually produced by ExampleGen
        component. _required_
      model_export: A Channel of 'ModelExportPath' type, usually produced by
        Trainer component.
      model_blessing: A Channel of 'ModelBlessingPath' type, usually produced by
        Model Validator component.
      model_push: A Channel of 'PushedModel' type, usually produced by Pusher
        component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification.
      output: Channel of `InferenceResult` to store the inference results.
      instance_name: Optional name assigned to this specific instance of
        BulkInferrer. Required only if multiple BulkInferrer components are
        declared in the same pipeline.
    """
        output = output or types.Channel(
            type=standard_artifacts.InferenceResult,
            artifacts=[standard_artifacts.InferenceResult()])
        spec = BulkInferrerSpec(examples=examples,
                                model_export=model_export,
                                model_blessing=model_blessing,
                                model_push=model_push,
                                data_spec=data_spec
                                or bulk_inferrer_pb2.DataSpec(),
                                model_spec=model_spec
                                or bulk_inferrer_pb2.ModelSpec(),
                                output=output)
        super(BulkInferrer, self).__init__(spec=spec,
                                           instance_name=instance_name)
Exemple #8
0
def generate_pipeline(pipeline_name, pipeline_root, train_data, test_data,
                      train_steps, eval_steps, pusher_target, runner):
    module_file = 'util.py'  # util.py is a file in the same folder

    # RuntimeParameter is only supported on KubeflowDagRunner currently
    if runner == 'kubeflow':
        pipeline_root_param = os.path.join('gs://{{kfp-default-bucket}}',
                                           pipeline_name, '{{workflow.uid}}')
        train_data_param = data_types.RuntimeParameter(
            name='train-data',
            default=
            'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/train',
            ptype=Text)
        test_data_param = data_types.RuntimeParameter(
            name='test-data',
            default=
            'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/test',
            ptype=Text)
        pusher_target_param = data_types.RuntimeParameter(
            name='pusher-destination',
            default=
            'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/serving',
            ptype=Text)
    else:
        pipeline_root_param = pipeline_root
        train_data_param = train_data
        test_data_param = test_data
        pusher_target_param = pusher_target

    examples = external_input(train_data_param)
    example_gen = CsvExampleGen(input=examples, instance_name="train")

    test_examples = external_input(test_data_param)
    test_example_gen = CsvExampleGen(input=test_examples,
                                     output_config={
                                         'split_config': {
                                             'splits': [{
                                                 'name': 'test',
                                                 'hash_buckets': 1
                                             }]
                                         }
                                     },
                                     instance_name="test")

    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=True
                           )  # infer_feature_shape controls sparse or dense

    # Transform is too slow in my side.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=module_file)

    trainer = Trainer(
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=schema_gen.outputs['schema'],
        module_file=module_file,
        train_args=trainer_pb2.TrainArgs(num_steps=train_steps),
        eval_args=trainer_pb2.EvalArgs(num_steps=eval_steps),
        instance_name="train",
        enable_cache=False)

    # Get the latest blessed model for model validation.
    model_resolver = ResolverNode(
        instance_name='latest_blessed_model_resolver',
        resolver_class=latest_blessed_model_resolver.
        LatestBlessedModelResolver,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing))

    # Uses TFMA to compute a evaluation statistics over features of a model and
    # perform quality validation of a candidate model (compared to a baseline).
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(label_key='target')],
        # tfma.SlicingSpec(feature_keys=['var_0', 'var_1']) when add more, Evaluator can't ouptput BLESSED status. It should be a bug in TFMA.
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(
                thresholds={
                    'binary_accuracy':
                    tfma.config.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.4}),
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={'value': -1e-10}))
                })
        ])
    evaluator = Evaluator(
        examples=example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        # baseline_model=model_resolver.outputs['model'],
        # Change threshold will be ignored if there is no baseline (first run).
        eval_config=eval_config,
        instance_name="eval5")

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher = Pusher(model=trainer.outputs['model'],
                    model_blessing=evaluator.outputs['blessing'],
                    push_destination={
                        'filesystem': {
                            'base_directory': pusher_target_param
                        }
                    })

    bulk_inferrer = BulkInferrer(
        examples=test_example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        # model_blessing=evaluator.outputs['blessing'],
        data_spec=bulk_inferrer_pb2.DataSpec(),
        model_spec=bulk_inferrer_pb2.ModelSpec(),
        instance_name="bulkInferrer")

    hello = component.HelloComponent(
        input_data=bulk_inferrer.outputs['inference_result'],
        instance_name='csvGen')

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root_param,
        components=[
            example_gen, statistics_gen, schema_gen, transform, trainer,
            model_resolver, evaluator, pusher, hello, test_example_gen,
            bulk_inferrer
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            os.path.join(pipeline_root, 'metadata.sqlite')),
        beam_pipeline_args=['--direct_num_workers=0'])
Exemple #9
0
def _create_pipeline(pipeline_name: Text, pipeline_root: Text,
                     training_data_root: Text, inference_data_root: Text,
                     module_file: Text,
                     metadata_path: Text,
                     direct_num_workers: int) -> pipeline.Pipeline:
  """Implements the chicago taxi pipeline with TFX."""
  training_examples = external_input(training_data_root)

  # Brings training data into the pipeline or otherwise joins/converts
  # training data.
  training_example_gen = CsvExampleGen(
      input_base=training_examples, instance_name='training_example_gen')

  # Computes statistics over data for visualization and example validation.
  statistics_gen = StatisticsGen(
      input_data=training_example_gen.outputs['examples'])

  # Generates schema based on statistics files.
  infer_schema = SchemaGen(
      statistics=statistics_gen.outputs['statistics'],
      infer_feature_shape=False)

  # Performs anomaly detection based on statistics and data schema.
  validate_stats = ExampleValidator(
      statistics=statistics_gen.outputs['statistics'],
      schema=infer_schema.outputs['schema'])

  # Performs transformations and feature engineering in training and serving.
  transform = Transform(
      examples=training_example_gen.outputs['examples'],
      schema=infer_schema.outputs['schema'],
      module_file=module_file)

  # Uses user-provided Python function that implements a model using TF-Learn.
  trainer = Trainer(
      module_file=module_file,
      transformed_examples=transform.outputs['transformed_examples'],
      schema=infer_schema.outputs['schema'],
      transform_graph=transform.outputs['transform_graph'],
      train_args=trainer_pb2.TrainArgs(num_steps=10000),
      eval_args=trainer_pb2.EvalArgs(num_steps=5000))

  # Uses TFMA to compute a evaluation statistics over features of a model.
  model_analyzer = Evaluator(
      examples=training_example_gen.outputs['examples'],
      model_exports=trainer.outputs['model'],
      feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[
          evaluator_pb2.SingleSlicingSpec(
              column_for_slicing=['trip_start_hour'])
      ]))

  # Performs quality validation of a candidate model (compared to a baseline).
  model_validator = ModelValidator(
      examples=training_example_gen.outputs['examples'],
      model=trainer.outputs['model'])

  inference_examples = external_input(inference_data_root)

  # Brings inference data into the pipeline.
  inference_example_gen = CsvExampleGen(
      input_base=inference_examples,
      output_config=example_gen_pb2.Output(
          split_config=example_gen_pb2.SplitConfig(
              splits=[example_gen_pb2.SplitConfig.Split(
                  name='unlabelled', hash_buckets=100)])),
      instance_name='inference_example_gen')

  # Performs offline batch inference over inference examples.
  bulk_inferrer = BulkInferrer(
      examples=inference_example_gen.outputs['examples'],
      model=trainer.outputs['model'],
      model_blessing=model_validator.outputs['blessing'],
      # Empty data_spec.example_splits will result in using all splits.
      data_spec=bulk_inferrer_pb2.DataSpec(),
      model_spec=bulk_inferrer_pb2.ModelSpec())

  return pipeline.Pipeline(
      pipeline_name=pipeline_name,
      pipeline_root=pipeline_root,
      components=[
          training_example_gen, inference_example_gen, statistics_gen,
          infer_schema, validate_stats, transform, trainer, model_analyzer,
          model_validator, bulk_inferrer
      ],
      enable_cache=True,
      metadata_connection_config=metadata.sqlite_metadata_connection_config(
          metadata_path),
      # TODO(b/141578059): The multi-processing API might change.
      beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers])
Exemple #10
0
def _create_pipeline(pipeline_name: Text, pipeline_root: Text,
                     training_data_root: Text, inference_data_root: Text,
                     module_file: Text, metadata_path: Text,
                     beam_pipeline_args: List[Text]) -> pipeline.Pipeline:
    """Implements the chicago taxi pipeline with TFX."""
    # Brings training data into the pipeline or otherwise joins/converts
    # training data.
    training_example_gen = CsvExampleGen(input_base=training_data_root,
                                         instance_name='training_example_gen')

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(
        input_data=training_example_gen.outputs['examples'])

    # Generates schema based on statistics files.
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=False)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=training_example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=module_file)

    # Uses user-provided Python function that implements a model using TF-Learn.
    trainer = Trainer(
        module_file=module_file,
        transformed_examples=transform.outputs['transformed_examples'],
        schema=schema_gen.outputs['schema'],
        transform_graph=transform.outputs['transform_graph'],
        train_args=trainer_pb2.TrainArgs(num_steps=10000),
        eval_args=trainer_pb2.EvalArgs(num_steps=5000))

    # Get the latest blessed model for model validation.
    model_resolver = ResolverNode(
        instance_name='latest_blessed_model_resolver',
        resolver_class=latest_blessed_model_resolver.
        LatestBlessedModelResolver,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing))

    # Uses TFMA to compute a evaluation statistics over features of a model and
    # perform quality validation of a candidate model (compared to a baseline).
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(signature_name='eval')],
        slicing_specs=[
            tfma.SlicingSpec(),
            tfma.SlicingSpec(feature_keys=['trip_start_hour'])
        ],
        metrics_specs=[
            tfma.MetricsSpec(
                thresholds={
                    'accuracy':
                    tfma.config.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.6}),
                        # Change threshold will be ignored if there is no
                        # baseline model resolved from MLMD (first run).
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={'value': -1e-10}))
                })
        ])
    evaluator = Evaluator(examples=training_example_gen.outputs['examples'],
                          model=trainer.outputs['model'],
                          baseline_model=model_resolver.outputs['model'],
                          eval_config=eval_config)

    # Brings inference data into the pipeline.
    inference_example_gen = CsvExampleGen(
        input_base=inference_data_root,
        output_config=example_gen_pb2.Output(
            split_config=example_gen_pb2.SplitConfig(splits=[
                example_gen_pb2.SplitConfig.Split(name='unlabelled',
                                                  hash_buckets=100)
            ])),
        instance_name='inference_example_gen')

    # Performs offline batch inference over inference examples.
    bulk_inferrer = BulkInferrer(
        examples=inference_example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        model_blessing=evaluator.outputs['blessing'],
        # Empty data_spec.example_splits will result in using all splits.
        data_spec=bulk_inferrer_pb2.DataSpec(),
        model_spec=bulk_inferrer_pb2.ModelSpec())

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            training_example_gen, inference_example_gen, statistics_gen,
            schema_gen, example_validator, transform, trainer, model_resolver,
            evaluator, bulk_inferrer
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
        beam_pipeline_args=beam_pipeline_args)
Exemple #11
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'inference_result' not in output_dict:
            raise ValueError('\'inference_result\' is missing in output dict.')
        output = artifact_utils.get_single_instance(
            output_dict['inference_result'])
        if 'model' not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if 'model_blessing' in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                output.set_int_custom_property('inferred', 0)
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')

        model = artifact_utils.get_single_instance(input_dict['model'])
        model_path = path_utils.serving_model_path(model.uri)
        logging.info('Use exported model from %s.', model_path)

        data_spec = bulk_inferrer_pb2.DataSpec()
        json_format.Parse(exec_properties['data_spec'], data_spec)
        example_uris = {}
        if data_spec.example_splits:
            for example in input_dict['examples']:
                for split in artifact_utils.decode_split_names(
                        example.split_names):
                    if split in data_spec.example_splits:
                        example_uris[split] = os.path.join(example.uri, split)
        else:
            for example in input_dict['examples']:
                for split in artifact_utils.decode_split_names(
                        example.split_names):
                    example_uris[split] = os.path.join(example.uri, split)
        model_spec = bulk_inferrer_pb2.ModelSpec()
        json_format.Parse(exec_properties['model_spec'], model_spec)
        output_path = os.path.join(output.uri, _PREDICTION_LOGS_DIR_NAME)
        self._run_model_inference(model_path, example_uris, output_path,
                                  model_spec)
        logging.info('BulkInferrer generates prediction log to %s',
                     output_path)
        output.set_int_custom_property('inferred', 1)
Exemple #12
0
    def setUp(self):
        super(ExecutorTest, self).setUp()
        self._source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        self._output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        self.component_id = 'test_component'

        # Create input dict.
        self._examples = standard_artifacts.Examples()
        unlabelled_path = os.path.join(self._source_data_dir,
                                       'csv_example_gen', 'unlabelled')
        self._examples.uri = os.path.join(self._output_data_dir,
                                          'csv_example_gen')
        io_utils.copy_dir(unlabelled_path,
                          os.path.join(self._examples.uri, 'unlabelled'))
        io_utils.copy_dir(unlabelled_path,
                          os.path.join(self._examples.uri, 'unlabelled2'))
        self._examples.split_names = artifact_utils.encode_split_names(
            ['unlabelled', 'unlabelled2'])
        self._model = standard_artifacts.Model()
        self._model.uri = os.path.join(self._source_data_dir,
                                       'trainer/current')

        self._model_blessing = standard_artifacts.ModelBlessing()
        self._model_blessing.uri = os.path.join(self._source_data_dir,
                                                'model_validator/blessed')
        self._model_blessing.set_int_custom_property('blessed', 1)

        self._input_dict = {
            'examples': [self._examples],
            'model': [self._model],
            'model_blessing': [self._model_blessing],
        }

        # Create output dict.
        self._inference_result = standard_artifacts.InferenceResult()
        self._prediction_log_dir = os.path.join(self._output_data_dir,
                                                'prediction_logs')
        self._inference_result.uri = self._prediction_log_dir

        self._output_examples = standard_artifacts.Examples()
        self._output_examples_dir = os.path.join(self._output_data_dir,
                                                 'output_examples')
        self._output_examples.uri = self._output_examples_dir

        self._output_dict_ir = {
            'inference_result': [self._inference_result],
        }
        self._output_dict_oe = {
            'output_examples': [self._output_examples],
        }

        # Create exe properties.
        self._exec_properties = {
            'data_spec':
            proto_utils.proto_to_json(bulk_inferrer_pb2.DataSpec()),
            'model_spec':
            proto_utils.proto_to_json(bulk_inferrer_pb2.ModelSpec()),
            'component_id': self.component_id,
        }

        # Create context
        self._tmp_dir = os.path.join(self._output_data_dir, '.temp')
        self._context = executor.Executor.Context(tmp_dir=self._tmp_dir,
                                                  unique_id='2')
Exemple #13
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

        Args:
          input_dict: Input dict from input key to a list of Artifacts.
            - examples: examples for inference.
            - model: exported model.
            - model_blessing: model blessing result, optional.
          output_dict: Output dict from output key to a list of Artifacts.
            - output: bulk inference results.
          exec_properties: A dict of execution properties.
            - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
            - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

        Returns:
          None
        """
        self._log_startup(input_dict, output_dict, exec_properties)

        source = exec_properties[StepKeys.SOURCE]
        args = exec_properties[StepKeys.ARGS]
        c = source_utils.load_source_path_class(source)
        inferrer_step: BaseInferrer = c(**args)

        output_examples = artifact_utils.get_single_instance(
            output_dict[PREDICTIONS])

        if EXAMPLES not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if MODEL not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if MODEL_BLESSING in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')

        model = artifact_utils.get_single_instance(input_dict[MODEL])
        model_path = path_utils.serving_model_path(model.uri)
        logging.info('Use exported model from %s.', model_path)

        output_example_spec = bulk_inferrer_pb2.OutputExampleSpec(
            output_columns_spec=[
                bulk_inferrer_pb2.OutputColumnsSpec(
                    predict_output=bulk_inferrer_pb2.PredictOutput(
                        output_columns=[
                            bulk_inferrer_pb2.PredictOutputCol(
                                output_key=x,
                                output_column=f'{x}_label',
                            ) for x in inferrer_step.get_labels()
                        ]))
            ])

        model_spec = bulk_inferrer_pb2.ModelSpec()
        saved_model_spec = model_spec_pb2.SavedModelSpec(
            model_path=model_path,
            tag=model_spec.tag,
            signature_name=model_spec.model_signature_name)
        inference_spec = model_spec_pb2.InferenceSpecType()
        inference_spec.saved_model_spec.CopyFrom(saved_model_spec)

        self._run_model_inference(output_example_spec, input_dict[EXAMPLES],
                                  output_examples, inference_spec,
                                  inferrer_step)
Exemple #14
0
    def __init__(self,
                 examples: types.Channel = None,
                 model: Optional[types.Channel] = None,
                 model_blessing: Optional[types.Channel] = None,
                 data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                           Dict[Text, Any]]] = None,
                 model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec,
                                            Dict[Text, Any]]] = None,
                 output_example_spec: Optional[Union[
                     bulk_inferrer_pb2.OutputExampleSpec, Dict[Text,
                                                               Any]]] = None,
                 inference_result: Optional[types.Channel] = None,
                 output_examples: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A Channel of type `standard_artifacts.Model`, usually produced by
        a Trainer component.
      model_blessing: A Channel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection. If any field is provided as a RuntimeParameter, data_spec
        should be constructed as a dict with the same field names as DataSpec
        proto message.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification. If any field is provided as a RuntimeParameter,
        model_spec should be constructed as a dict with the same field names as
        ModelSpec proto message.
      output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify
        if you want BulkInferrer to output examples instead of inference result.
        If any field is provided as a RuntimeParameter, output_example_spec
        should be constructed as a dict with the same field names as
        OutputExampleSpec proto message.
      inference_result: Channel of type `standard_artifacts.InferenceResult`
        to store the inference results, must not be specified when
        output_example_spec is set.
      output_examples: Channel of type `standard_artifacts.Examples`
        to store the output examples, must not be specified when
        output_example_spec is unset. Check output_example_spec for details.
      instance_name: Optional name assigned to this specific instance of
        BulkInferrer. Required only if multiple BulkInferrer components are
        declared in the same pipeline.

    Raises:
      ValueError: Must not specify inference_result or output_examples depends
        on whether output_example_spec is set or not.
    """
        if output_example_spec:
            if inference_result:
                raise ValueError(
                    'Must not specify inference_result when output_example_spec is set.'
                )
            output_examples = output_examples or types.Channel(
                type=standard_artifacts.Examples)
        else:
            if output_examples:
                raise ValueError(
                    'Must not specify output_examples when output_example_spec is unset.'
                )
            inference_result = inference_result or types.Channel(
                type=standard_artifacts.InferenceResult)

        spec = BulkInferrerSpec(examples=examples,
                                model=model,
                                model_blessing=model_blessing,
                                data_spec=data_spec
                                or bulk_inferrer_pb2.DataSpec(),
                                model_spec=model_spec
                                or bulk_inferrer_pb2.ModelSpec(),
                                output_example_spec=output_example_spec,
                                inference_result=inference_result,
                                output_examples=output_examples)
        super(BulkInferrer, self).__init__(spec=spec,
                                           instance_name=instance_name)
Exemple #15
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model_export: exported model.
        - model_blessing: model blessing result
        - model_push: pushed model Either model_push or (model_export and
          model_blessing) need to present.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'output' not in output_dict:
            raise ValueError('\'output\' is missing in output dict.')
        output = artifact_utils.get_single_instance(output_dict['output'])
        if 'model_push' in input_dict:
            model_push = artifact_utils.get_single_instance(
                input_dict['model_push'])
            model_path = io_utils.get_only_uri_in_dir(model_push.uri)
            logging.info('Use pushed model from %s.', model_path)
        elif 'model_blessing' in input_dict and 'model_export' in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                output.set_int_custom_property('inferred', 0)
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
            model_export = artifact_utils.get_single_instance(
                input_dict['model_export'])
            model_path = path_utils.serving_model_path(model_export.uri)
            logging.info('Use exported model from %s.', model_path)
        else:
            raise ValueError(
                'Input models are not valid. Either model_push or '
                '(model_blessing and model_export) need to be '
                'specified.')
        data_spec = bulk_inferrer_pb2.DataSpec()
        json_format.Parse(exec_properties['data_spec'], data_spec)
        example_uris = {}
        if data_spec.example_splits:
            for example in input_dict['examples']:
                if example.split in data_spec.example_splits:
                    example_uris[example.split] = example.uri
        else:
            for example in input_dict['examples']:
                example_uris[example.split] = example.uri
        model_spec = bulk_inferrer_pb2.ModelSpec()
        json_format.Parse(exec_properties['model_spec'], model_spec)
        output_path = os.path.join(output.uri, _PREDICTION_LOGS_DIR_NAME)
        self._run_model_inference(model_path, example_uris, output_path,
                                  model_spec)
        logging.info('BulkInferrer generates prediction log to %s',
                     output_path)
        output.set_int_custom_property('inferred', 1)