Exemple #1
0
  def testDoSkippedModelCreation(self, mock_runner, mock_run_model_inference,
                                 _):
    input_dict = {
        'examples': [self._examples],
        'model': [self._model],
        'model_blessing': [self._model_blessing],
    }
    output_dict = {
        'inference_result': [self._inference_result],
    }
    ai_platform_serving_args = {
        'model_name': 'model_name',
        'project_id': 'project_id'
    }
    # Create exe properties.
    exec_properties = {
        'data_spec':
            proto_utils.proto_to_json(bulk_inferrer_pb2.DataSpec()),
        'custom_config':
            json_utils.dumps(
                {executor.SERVING_ARGS_KEY: ai_platform_serving_args}),
    }
    mock_runner.get_service_name_and_api_version.return_value = ('ml', 'v1')
    mock_runner.create_model_for_aip_prediction_if_not_exist.return_value = False

    # Run executor.
    bulk_inferrer = executor.Executor(self._context)
    bulk_inferrer.Do(input_dict, output_dict, exec_properties)

    ai_platform_prediction_model_spec = (
        model_spec_pb2.AIPlatformPredictionModelSpec(
            project_id='project_id',
            model_name='model_name',
            version_name=self._model_version))
    ai_platform_prediction_model_spec.use_serialization_config = True
    inference_endpoint = model_spec_pb2.InferenceSpecType()
    inference_endpoint.ai_platform_prediction_model_spec.CopyFrom(
        ai_platform_prediction_model_spec)
    mock_run_model_inference.assert_called_once_with(mock.ANY, mock.ANY,
                                                     mock.ANY, mock.ANY,
                                                     mock.ANY,
                                                     inference_endpoint)
    executor_class_path = '%s.%s' % (bulk_inferrer.__class__.__module__,
                                     bulk_inferrer.__class__.__name__)
    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
      job_labels = telemetry_utils.make_labels_dict()
    mock_runner.deploy_model_for_aip_prediction.assert_called_once_with(
        serving_path=path_utils.serving_model_path(self._model.uri),
        model_version_name=mock.ANY,
        ai_platform_serving_args=ai_platform_serving_args,
        labels=job_labels,
        api=mock.ANY,
        skip_model_endpoint_creation=True,
        set_default=False)
    mock_runner.delete_model_from_aip_if_exists.assert_called_once_with(
        model_version_name=mock.ANY,
        ai_platform_serving_args=ai_platform_serving_args,
        api=mock.ANY,
        delete_model_endpoint=False)
Exemple #2
0
    def testDoWithOutputExamplesSpecifiedSplits(self):
        self._exec_properties['data_spec'] = proto_utils.proto_to_json(
            text_format.Parse(
                """
                example_splits: 'unlabelled'
            """, bulk_inferrer_pb2.DataSpec()))
        self._exec_properties[
            'output_example_spec'] = proto_utils.proto_to_json(
                text_format.Parse(
                    """
                output_columns_spec {
                  classify_output {
                    label_column: 'classify_label'
                    score_column: 'classify_score'
                  }
                }
            """, bulk_inferrer_pb2.OutputExampleSpec()))

        # Run executor.
        bulk_inferrer = executor.Executor(self._context)
        bulk_inferrer.Do(self._input_dict, self._output_dict_oe,
                         self._exec_properties)

        # Check outputs.
        self.assertTrue(fileio.exists(self._output_examples_dir))
        self._verify_example_split('unlabelled')
        self.assertFalse(
            fileio.exists(
                os.path.join(self._output_examples_dir, 'unlabelled2')))
Exemple #3
0
    def testDoWithBlessedModel(self):
        input_dict = {
            'examples': [self._examples],
            'model_export': [self._model_export],
            'model_blessing': [self._model_blessing],
        }
        output_dict = {
            'output': [self._inference_result],
        }
        # Create exe properties.
        exec_properties = {
            'data_spec':
            json_format.MessageToJson(bulk_inferrer_pb2.DataSpec()),
            'model_spec':
            json_format.MessageToJson(bulk_inferrer_pb2.ModelSpec()),
            'component_id': self.component_id,
        }

        # Run executor.
        bulk_inferrer = executor.Executor(self._context)
        bulk_inferrer.Do(input_dict, output_dict, exec_properties)

        # Check outputs.
        self.assertTrue(tf.io.gfile.exists(self._prediction_log_dir))
        results = self._get_results(self._prediction_log_dir)
        self.assertTrue(results)
        self.assertEqual(
            len(results[0].classify_log.response.result.classifications), 1)
        self.assertEqual(
            len(results[0].classify_log.response.result.classifications[0].
                classes), 2)
Exemple #4
0
    def __init__(self,
                 examples: types.Channel,
                 model: Optional[types.Channel] = None,
                 model_blessing: Optional[types.Channel] = None,
                 data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                           Dict[Text, Any]]] = None,
                 output_example_spec: Optional[Union[
                     bulk_inferrer_pb2.OutputExampleSpec, Dict[Text,
                                                               Any]]] = None,
                 custom_config: Optional[Dict[Text, Any]] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A Channel of type `standard_artifacts.Model`, usually produced by
        a Trainer component.
      model_blessing: A Channel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection. If any field is provided as a RuntimeParameter, data_spec
        should be constructed as a dict with the same field names as DataSpec
        proto message.
      output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify
        if you want BulkInferrer to output examples instead of inference result.
        If any field is provided as a RuntimeParameter, output_example_spec
        should be constructed as a dict with the same field names as
        OutputExampleSpec proto message.
      custom_config: A dict which contains the deployment job parameters to be
        passed to Google Cloud AI Platform.
        custom_config.ai_platform_serving_args need to contain the serving job
        parameters. For the full set of parameters, refer to
        https://cloud.google.com/ml-engine/reference/rest/v1/projects.models

    Raises:
      ValueError: Must not specify inference_result or output_examples depends
        on whether output_example_spec is set or not.
    """
        if output_example_spec:
            output_examples = types.Channel(type=standard_artifacts.Examples)
            inference_result = None
        else:
            inference_result = types.Channel(
                type=standard_artifacts.InferenceResult)
            output_examples = None

        spec = CloudAIBulkInferrerComponentSpec(
            examples=examples,
            model=model,
            model_blessing=model_blessing,
            data_spec=data_spec or bulk_inferrer_pb2.DataSpec(),
            output_example_spec=output_example_spec,
            custom_config=json_utils.dumps(custom_config),
            inference_result=inference_result,
            output_examples=output_examples)
        super(CloudAIBulkInferrerComponent, self).__init__(spec=spec)
Exemple #5
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result, optional.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'inference_result' not in output_dict:
            raise ValueError('\'inference_result\' is missing in output dict.')
        output = artifact_utils.get_single_instance(
            output_dict['inference_result'])
        if 'model' not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if 'model_blessing' in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                output.set_int_custom_property('inferred', 0)
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')

        model = artifact_utils.get_single_instance(input_dict['model'])
        model_path = path_utils.serving_model_path(model.uri)
        logging.info('Use exported model from %s.', model_path)

        data_spec = bulk_inferrer_pb2.DataSpec()
        json_format.Parse(exec_properties['data_spec'], data_spec)
        if self._run_model_inference(
                data_spec, input_dict['examples'], output.uri,
                self._get_inference_spec(model_path, exec_properties)):
            output.set_int_custom_property('inferred', 1)
        else:
            output.set_int_custom_property('inferred', 0)
Exemple #6
0
    def __init__(self,
                 examples: types.Channel = None,
                 model: Optional[types.Channel] = None,
                 model_blessing: Optional[types.Channel] = None,
                 data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                           Dict[Text, Any]]] = None,
                 model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec,
                                            Dict[Text, Any]]] = None,
                 output_example_spec: Optional[Union[
                     bulk_inferrer_pb2.OutputExampleSpec, Dict[Text,
                                                               Any]]] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A Channel of type `standard_artifacts.Model`, usually produced by
        a Trainer component.
      model_blessing: A Channel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection. If any field is provided as a RuntimeParameter, data_spec
        should be constructed as a dict with the same field names as DataSpec
        proto message.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification. If any field is provided as a RuntimeParameter,
        model_spec should be constructed as a dict with the same field names as
        ModelSpec proto message.
      output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify
        if you want BulkInferrer to output examples instead of inference result.
        If any field is provided as a RuntimeParameter, output_example_spec
        should be constructed as a dict with the same field names as
        OutputExampleSpec proto message.
    """
        if output_example_spec:
            output_examples = types.Channel(type=standard_artifacts.Examples)
            inference_result = None
        else:
            inference_result = types.Channel(
                type=standard_artifacts.InferenceResult)
            output_examples = None

        spec = BulkInferrerSpec(examples=examples,
                                model=model,
                                model_blessing=model_blessing,
                                data_spec=data_spec
                                or bulk_inferrer_pb2.DataSpec(),
                                model_spec=model_spec
                                or bulk_inferrer_pb2.ModelSpec(),
                                output_example_spec=output_example_spec,
                                inference_result=inference_result,
                                output_examples=output_examples)
        super(BulkInferrer, self).__init__(spec=spec)
Exemple #7
0
    def __init__(self,
                 examples: types.Channel = None,
                 model: Optional[types.Channel] = None,
                 model_blessing: Optional[types.Channel] = None,
                 data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                           Dict[Text, Any]]] = None,
                 model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec,
                                            Dict[Text, Any]]] = None,
                 inference_result: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None,
                 enable_cache: Optional[bool] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A Channel of type `standard_artifacts.Model`, usually produced by
        a Trainer component.
      model_blessing: A Channel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection. If any field is provided as a RuntimeParameter, data_spec
        should be constructed as a dict with the same field names as DataSpec
        proto message.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification. If any field is provided as a RuntimeParameter,
        model_spec should be constructed as a dict with the same field names as
        ModelSpec proto message.
      inference_result: Channel of type `standard_artifacts.InferenceResult`
        to store the inference results.
      instance_name: Optional name assigned to this specific instance of
        BulkInferrer. Required only if multiple BulkInferrer components are
        declared in the same pipeline.
      enable_cache: Optional boolean to indicate if cache is enabled for the
        BulkInferrer component. If not specified, defaults to the value
        specified for pipeline's enable_cache parameter.
    """
        inference_result = inference_result or types.Channel(
            type=standard_artifacts.InferenceResult,
            artifacts=[standard_artifacts.InferenceResult()])
        spec = BulkInferrerSpec(examples=examples,
                                model=model,
                                model_blessing=model_blessing,
                                data_spec=data_spec
                                or bulk_inferrer_pb2.DataSpec(),
                                model_spec=model_spec
                                or bulk_inferrer_pb2.ModelSpec(),
                                inference_result=inference_result)
        super(BulkInferrer, self).__init__(spec=spec,
                                           instance_name=instance_name,
                                           enable_cache=enable_cache)
Exemple #8
0
    def __init__(
        self,
        examples: types.BaseChannel,
        model: Optional[types.BaseChannel] = None,
        model_blessing: Optional[types.BaseChannel] = None,
        data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                  data_types.RuntimeParameter]] = None,
        model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec,
                                   data_types.RuntimeParameter]] = None,
        output_example_spec: Optional[
            Union[bulk_inferrer_pb2.OutputExampleSpec,
                  data_types.RuntimeParameter]] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A BaseChannel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A BaseChannel of type `standard_artifacts.Model`, usually produced
        by a Trainer component.
      model_blessing: A BaseChannel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification.
      output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify
        if you want BulkInferrer to output examples instead of inference result.
    """
        if output_example_spec:
            output_examples = types.Channel(type=standard_artifacts.Examples)
            inference_result = None
        else:
            inference_result = types.Channel(
                type=standard_artifacts.InferenceResult)
            output_examples = None

        spec = standard_component_specs.BulkInferrerSpec(
            examples=examples,
            model=model,
            model_blessing=model_blessing,
            data_spec=data_spec or bulk_inferrer_pb2.DataSpec(),
            model_spec=model_spec or bulk_inferrer_pb2.ModelSpec(),
            output_example_spec=output_example_spec,
            inference_result=inference_result,
            output_examples=output_examples)
        super().__init__(spec=spec)
Exemple #9
0
  def __init__(self,
               examples: types.Channel = None,
               model: Optional[types.Channel] = None,
               model_blessing: Optional[types.Channel] = None,
               data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                         Dict[Text, Any]]] = None,
               custom_config: Dict[Text, Any] = None,
               inference_result: Optional[types.Channel] = None,
               instance_name: Optional[Text] = None):
    """Construct an BulkInferrer component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A Channel of type `standard_artifacts.Model`, usually produced by
        a Trainer component.
      model_blessing: A Channel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection. If any field is provided as a RuntimeParameter, data_spec
        should be constructed as a dict with the same field names as DataSpec
        proto message.
      custom_config: A dict which contains the deployment job parameters to be
        passed to Google Cloud AI Platform.
        custom_config.ai_platform_serving_args need to contain the serving job
        parameters. For the full set of parameters, refer to
        https://cloud.google.com/ml-engine/reference/rest/v1/projects.models
      inference_result: Channel of type `standard_artifacts.InferenceResult`
        to store the inference results.
      instance_name: Optional name assigned to this specific instance of
        BulkInferrer. Required only if multiple BulkInferrer components are
        declared in the same pipeline.
    """
    inference_result = inference_result or types.Channel(
        type=standard_artifacts.InferenceResult,
        artifacts=[standard_artifacts.InferenceResult()])
    spec = CloudAIBulkInferrerComponentSpec(
        examples=examples,
        model=model,
        model_blessing=model_blessing,
        data_spec=data_spec or bulk_inferrer_pb2.DataSpec(),
        custom_config=json_utils.dumps(custom_config),
        inference_result=inference_result)
    super(CloudAIBulkInferrerComponent, self).__init__(
        spec=spec, instance_name=instance_name)
Exemple #10
0
    def __init__(self,
                 examples: types.Channel = None,
                 model_export: Optional[types.Channel] = None,
                 model_blessing: Optional[types.Channel] = None,
                 model_push: Optional[types.Channel] = None,
                 data_spec: Optional[bulk_inferrer_pb2.DataSpec] = None,
                 model_spec: Optional[bulk_inferrer_pb2.ModelSpec] = None,
                 output: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A Channel of 'ExamplesPath' type, usually produced by ExampleGen
        component. _required_
      model_export: A Channel of 'ModelExportPath' type, usually produced by
        Trainer component.
      model_blessing: A Channel of 'ModelBlessingPath' type, usually produced by
        Model Validator component.
      model_push: A Channel of 'PushedModel' type, usually produced by Pusher
        component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification.
      output: Channel of `InferenceResult` to store the inference results.
      instance_name: Optional name assigned to this specific instance of
        BulkInferrer. Required only if multiple BulkInferrer components are
        declared in the same pipeline.
    """
        output = output or types.Channel(
            type=standard_artifacts.InferenceResult,
            artifacts=[standard_artifacts.InferenceResult()])
        spec = BulkInferrerSpec(examples=examples,
                                model_export=model_export,
                                model_blessing=model_blessing,
                                model_push=model_push,
                                data_spec=data_spec
                                or bulk_inferrer_pb2.DataSpec(),
                                model_spec=model_spec
                                or bulk_inferrer_pb2.ModelSpec(),
                                output=output)
        super(BulkInferrer, self).__init__(spec=spec,
                                           instance_name=instance_name)
Exemple #11
0
    def testDoFailedModelDeployment(self, mock_runner,
                                    mock_run_model_inference, _):
        input_dict = {
            'examples': [self._examples],
            'model': [self._model],
            'model_blessing': [self._model_blessing],
        }
        output_dict = {
            'inference_result': [self._inference_result],
        }
        ai_platform_serving_args = {
            'model_name': 'model_name',
            'project_id': 'project_id'
        }
        # Create exe properties.
        exec_properties = {
            'data_spec':
            json_format.MessageToJson(bulk_inferrer_pb2.DataSpec(),
                                      preserving_proto_field_name=True),
            'custom_config':
            json_utils.dumps(
                {executor.SERVING_ARGS_KEY: ai_platform_serving_args}),
        }
        mock_runner.deploy_model_for_aip_prediction.side_effect = (
            Exception('Deployment failed'))
        mock_runner.get_service_name_and_api_version.return_value = ('ml',
                                                                     'v1')
        mock_runner.create_model_for_aip_prediction_if_not_exist.return_value = True

        bulk_inferrer = executor.Executor(self._context)
        with self.assertRaises(Exception):
            bulk_inferrer.Do(input_dict, output_dict, exec_properties)

        mock_runner.delete_model_version_from_aip_if_exists.assert_called_once_with(
            mock.ANY, mock.ANY, ai_platform_serving_args)
        mock_runner.delete_model_from_aip_if_exists.assert_called_once_with(
            mock.ANY, ai_platform_serving_args)
Exemple #12
0
    def setUp(self):
        super(ExecutorTest, self).setUp()
        self._source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        self._output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        self.component_id = 'test_component'

        # Create input dict.
        self._examples = standard_artifacts.Examples()
        unlabelled_path = os.path.join(self._source_data_dir,
                                       'csv_example_gen', 'unlabelled')
        self._examples.uri = os.path.join(self._output_data_dir,
                                          'csv_example_gen')
        io_utils.copy_dir(unlabelled_path,
                          os.path.join(self._examples.uri, 'unlabelled'))
        io_utils.copy_dir(unlabelled_path,
                          os.path.join(self._examples.uri, 'unlabelled2'))
        self._examples.split_names = artifact_utils.encode_split_names(
            ['unlabelled', 'unlabelled2'])
        self._model = standard_artifacts.Model()
        self._model.uri = os.path.join(self._source_data_dir,
                                       'trainer/current')

        self._model_blessing = standard_artifacts.ModelBlessing()
        self._model_blessing.uri = os.path.join(self._source_data_dir,
                                                'model_validator/blessed')
        self._model_blessing.set_int_custom_property('blessed', 1)

        self._input_dict = {
            'examples': [self._examples],
            'model': [self._model],
            'model_blessing': [self._model_blessing],
        }

        # Create output dict.
        self._inference_result = standard_artifacts.InferenceResult()
        self._prediction_log_dir = os.path.join(self._output_data_dir,
                                                'prediction_logs')
        self._inference_result.uri = self._prediction_log_dir

        self._output_examples = standard_artifacts.Examples()
        self._output_examples_dir = os.path.join(self._output_data_dir,
                                                 'output_examples')
        self._output_examples.uri = self._output_examples_dir

        self._output_dict_ir = {
            'inference_result': [self._inference_result],
        }
        self._output_dict_oe = {
            'output_examples': [self._output_examples],
        }

        # Create exe properties.
        self._exec_properties = {
            'data_spec':
            proto_utils.proto_to_json(bulk_inferrer_pb2.DataSpec()),
            'model_spec':
            proto_utils.proto_to_json(bulk_inferrer_pb2.ModelSpec()),
            'component_id': self.component_id,
        }

        # Create context
        self._tmp_dir = os.path.join(self._output_data_dir, '.temp')
        self._context = executor.Executor.Context(tmp_dir=self._tmp_dir,
                                                  unique_id='2')
Exemple #13
0
  def Do(self, input_dict: Dict[Text, List[types.Artifact]],
         output_dict: Dict[Text, List[types.Artifact]],
         exec_properties: Dict[Text, Any]) -> None:
    """Runs batch inference on a given model with given input examples.

    This function creates a new model (if necessary) and a new model version
    before inference, and cleans up resources after inference. It provides
    re-executability as it cleans up (only) the model resources that are created
    during the process even inference job failed.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.
        - custom_config: custom_config.ai_platform_serving_args need to contain
          the serving job parameters sent to Google Cloud AI Platform. For the
          full set of parameters, refer to
          https://cloud.google.com/ml-engine/reference/rest/v1/projects.models

    Returns:
      None
    """
    self._log_startup(input_dict, output_dict, exec_properties)
    if 'examples' not in input_dict:
      raise ValueError('\'examples\' is missing in input dict.')
    if 'inference_result' not in output_dict:
      raise ValueError('\'inference_result\' is missing in output dict.')
    output = artifact_utils.get_single_instance(output_dict['inference_result'])
    if 'model' not in input_dict:
      raise ValueError('Input models are not valid, model '
                       'need to be specified.')
    if 'model_blessing' in input_dict:
      model_blessing = artifact_utils.get_single_instance(
          input_dict['model_blessing'])
      if not model_utils.is_model_blessed(model_blessing):
        output.set_int_custom_property('inferred', 0)
        logging.info('Model on %s was not blessed', model_blessing.uri)
        return
    else:
      logging.info('Model blessing is not provided, exported model will be '
                   'used.')
    if _CUSTOM_CONFIG_KEY not in exec_properties:
      raise ValueError('Input exec properties are not valid, {} '
                       'need to be specified.'.format(_CUSTOM_CONFIG_KEY))

    custom_config = json_utils.loads(
        exec_properties.get(_CUSTOM_CONFIG_KEY, 'null'))
    if custom_config is not None and not isinstance(custom_config, Dict):
      raise ValueError('custom_config in execution properties needs to be a '
                       'dict.')
    ai_platform_serving_args = custom_config.get(SERVING_ARGS_KEY)
    if not ai_platform_serving_args:
      raise ValueError(
          '\'ai_platform_serving_args\' is missing in \'custom_config\'')
    service_name, api_version = runner.get_service_name_and_api_version(
        ai_platform_serving_args)
    executor_class_path = '%s.%s' % (self.__class__.__module__,
                                     self.__class__.__name__)
    with telemetry_utils.scoped_labels(
        {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
      job_labels = telemetry_utils.get_labels_dict()
    model = artifact_utils.get_single_instance(input_dict['model'])
    model_path = path_utils.serving_model_path(model.uri)
    logging.info('Use exported model from %s.', model_path)
    # Use model artifact uri to generate model version to guarantee the
    # 1:1 mapping from model version to model.
    model_version = 'version_' + hashlib.sha256(model.uri.encode()).hexdigest()
    inference_spec = self._get_inference_spec(model_path, model_version,
                                              ai_platform_serving_args)
    data_spec = bulk_inferrer_pb2.DataSpec()
    json_format.Parse(exec_properties['data_spec'], data_spec)
    api = discovery.build(service_name, api_version)
    new_model_created = False
    try:
      new_model_created = runner.create_model_for_aip_prediction_if_not_exist(
          api, job_labels, ai_platform_serving_args)
      runner.deploy_model_for_aip_prediction(
          api,
          model_path,
          model_version,
          ai_platform_serving_args,
          job_labels,
          skip_model_creation=True,
          set_default_version=False,
      )
      self._run_model_inference(data_spec, input_dict['examples'], output.uri,
                                inference_spec)
    except Exception as e:
      logging.error('Error in executing CloudAIBulkInferrerComponent: %s',
                    str(e))
      output.set_int_custom_property('inferred', 0)
      raise
    finally:
      # Guarantee newly created resources are cleaned up even if theinference
      # job failed.

      # Clean up the newly deployed model.
      runner.delete_model_version_from_aip_if_exists(api, model_version,
                                                     ai_platform_serving_args)
      if new_model_created:
        runner.delete_model_from_aip_if_exists(api, ai_platform_serving_args)
    # Mark the inferenence as successful after resources are cleaned up.
    output.set_int_custom_property('inferred', 1)
Exemple #14
0
def _create_pipeline(pipeline_name: Text, pipeline_root: Text,
                     training_data_root: Text, inference_data_root: Text,
                     module_file: Text,
                     metadata_path: Text,
                     direct_num_workers: int) -> pipeline.Pipeline:
  """Implements the chicago taxi pipeline with TFX."""
  training_examples = external_input(training_data_root)

  # Brings training data into the pipeline or otherwise joins/converts
  # training data.
  training_example_gen = CsvExampleGen(
      input_base=training_examples, instance_name='training_example_gen')

  # Computes statistics over data for visualization and example validation.
  statistics_gen = StatisticsGen(
      input_data=training_example_gen.outputs['examples'])

  # Generates schema based on statistics files.
  infer_schema = SchemaGen(
      statistics=statistics_gen.outputs['statistics'],
      infer_feature_shape=False)

  # Performs anomaly detection based on statistics and data schema.
  validate_stats = ExampleValidator(
      statistics=statistics_gen.outputs['statistics'],
      schema=infer_schema.outputs['schema'])

  # Performs transformations and feature engineering in training and serving.
  transform = Transform(
      examples=training_example_gen.outputs['examples'],
      schema=infer_schema.outputs['schema'],
      module_file=module_file)

  # Uses user-provided Python function that implements a model using TF-Learn.
  trainer = Trainer(
      module_file=module_file,
      transformed_examples=transform.outputs['transformed_examples'],
      schema=infer_schema.outputs['schema'],
      transform_graph=transform.outputs['transform_graph'],
      train_args=trainer_pb2.TrainArgs(num_steps=10000),
      eval_args=trainer_pb2.EvalArgs(num_steps=5000))

  # Uses TFMA to compute a evaluation statistics over features of a model.
  model_analyzer = Evaluator(
      examples=training_example_gen.outputs['examples'],
      model_exports=trainer.outputs['model'],
      feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[
          evaluator_pb2.SingleSlicingSpec(
              column_for_slicing=['trip_start_hour'])
      ]))

  # Performs quality validation of a candidate model (compared to a baseline).
  model_validator = ModelValidator(
      examples=training_example_gen.outputs['examples'],
      model=trainer.outputs['model'])

  inference_examples = external_input(inference_data_root)

  # Brings inference data into the pipeline.
  inference_example_gen = CsvExampleGen(
      input_base=inference_examples,
      output_config=example_gen_pb2.Output(
          split_config=example_gen_pb2.SplitConfig(
              splits=[example_gen_pb2.SplitConfig.Split(
                  name='unlabelled', hash_buckets=100)])),
      instance_name='inference_example_gen')

  # Performs offline batch inference over inference examples.
  bulk_inferrer = BulkInferrer(
      examples=inference_example_gen.outputs['examples'],
      model=trainer.outputs['model'],
      model_blessing=model_validator.outputs['blessing'],
      # Empty data_spec.example_splits will result in using all splits.
      data_spec=bulk_inferrer_pb2.DataSpec(),
      model_spec=bulk_inferrer_pb2.ModelSpec())

  return pipeline.Pipeline(
      pipeline_name=pipeline_name,
      pipeline_root=pipeline_root,
      components=[
          training_example_gen, inference_example_gen, statistics_gen,
          infer_schema, validate_stats, transform, trainer, model_analyzer,
          model_validator, bulk_inferrer
      ],
      enable_cache=True,
      metadata_connection_config=metadata.sqlite_metadata_connection_config(
          metadata_path),
      # TODO(b/141578059): The multi-processing API might change.
      beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers])
Exemple #15
0
    def __init__(self,
                 examples: types.Channel = None,
                 model: Optional[types.Channel] = None,
                 model_blessing: Optional[types.Channel] = None,
                 data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec,
                                           Dict[Text, Any]]] = None,
                 model_spec: Optional[Union[bulk_inferrer_pb2.ModelSpec,
                                            Dict[Text, Any]]] = None,
                 output_example_spec: Optional[Union[
                     bulk_inferrer_pb2.OutputExampleSpec, Dict[Text,
                                                               Any]]] = None,
                 inference_result: Optional[types.Channel] = None,
                 output_examples: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None):
        """Construct an BulkInferrer component.

    Args:
      examples: A Channel of type `standard_artifacts.Examples`, usually
        produced by an ExampleGen component. _required_
      model: A Channel of type `standard_artifacts.Model`, usually produced by
        a Trainer component.
      model_blessing: A Channel of type `standard_artifacts.ModelBlessing`,
        usually produced by a ModelValidator component.
      data_spec: bulk_inferrer_pb2.DataSpec instance that describes data
        selection. If any field is provided as a RuntimeParameter, data_spec
        should be constructed as a dict with the same field names as DataSpec
        proto message.
      model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model
        specification. If any field is provided as a RuntimeParameter,
        model_spec should be constructed as a dict with the same field names as
        ModelSpec proto message.
      output_example_spec: bulk_inferrer_pb2.OutputExampleSpec instance, specify
        if you want BulkInferrer to output examples instead of inference result.
        If any field is provided as a RuntimeParameter, output_example_spec
        should be constructed as a dict with the same field names as
        OutputExampleSpec proto message.
      inference_result: Channel of type `standard_artifacts.InferenceResult`
        to store the inference results, must not be specified when
        output_example_spec is set.
      output_examples: Channel of type `standard_artifacts.Examples`
        to store the output examples, must not be specified when
        output_example_spec is unset. Check output_example_spec for details.
      instance_name: Optional name assigned to this specific instance of
        BulkInferrer. Required only if multiple BulkInferrer components are
        declared in the same pipeline.

    Raises:
      ValueError: Must not specify inference_result or output_examples depends
        on whether output_example_spec is set or not.
    """
        if output_example_spec:
            if inference_result:
                raise ValueError(
                    'Must not specify inference_result when output_example_spec is set.'
                )
            output_examples = output_examples or types.Channel(
                type=standard_artifacts.Examples)
        else:
            if output_examples:
                raise ValueError(
                    'Must not specify output_examples when output_example_spec is unset.'
                )
            inference_result = inference_result or types.Channel(
                type=standard_artifacts.InferenceResult)

        spec = BulkInferrerSpec(examples=examples,
                                model=model,
                                model_blessing=model_blessing,
                                data_spec=data_spec
                                or bulk_inferrer_pb2.DataSpec(),
                                model_spec=model_spec
                                or bulk_inferrer_pb2.ModelSpec(),
                                output_example_spec=output_example_spec,
                                inference_result=inference_result,
                                output_examples=output_examples)
        super(BulkInferrer, self).__init__(spec=spec,
                                           instance_name=instance_name)
Exemple #16
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'inference_result' not in output_dict:
            raise ValueError('\'inference_result\' is missing in output dict.')
        output = artifact_utils.get_single_instance(
            output_dict['inference_result'])
        if 'model' not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if 'model_blessing' in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                output.set_int_custom_property('inferred', 0)
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')

        model = artifact_utils.get_single_instance(input_dict['model'])
        model_path = path_utils.serving_model_path(model.uri)
        logging.info('Use exported model from %s.', model_path)

        data_spec = bulk_inferrer_pb2.DataSpec()
        json_format.Parse(exec_properties['data_spec'], data_spec)
        example_uris = {}
        if data_spec.example_splits:
            for example in input_dict['examples']:
                for split in artifact_utils.decode_split_names(
                        example.split_names):
                    if split in data_spec.example_splits:
                        example_uris[split] = os.path.join(example.uri, split)
        else:
            for example in input_dict['examples']:
                for split in artifact_utils.decode_split_names(
                        example.split_names):
                    example_uris[split] = os.path.join(example.uri, split)
        model_spec = bulk_inferrer_pb2.ModelSpec()
        json_format.Parse(exec_properties['model_spec'], model_spec)
        output_path = os.path.join(output.uri, _PREDICTION_LOGS_DIR_NAME)
        self._run_model_inference(model_path, example_uris, output_path,
                                  model_spec)
        logging.info('BulkInferrer generates prediction log to %s',
                     output_path)
        output.set_int_custom_property('inferred', 1)
Exemple #17
0
def _create_pipeline(pipeline_name: Text, pipeline_root: Text,
                     training_data_root: Text, inference_data_root: Text,
                     module_file: Text, metadata_path: Text,
                     beam_pipeline_args: List[Text]) -> pipeline.Pipeline:
    """Implements the chicago taxi pipeline with TFX."""
    # Brings training data into the pipeline or otherwise joins/converts
    # training data.
    training_example_gen = CsvExampleGen(input_base=training_data_root,
                                         instance_name='training_example_gen')

    # Computes statistics over data for visualization and example validation.
    statistics_gen = StatisticsGen(
        input_data=training_example_gen.outputs['examples'])

    # Generates schema based on statistics files.
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=False)

    # Performs anomaly detection based on statistics and data schema.
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema'])

    # Performs transformations and feature engineering in training and serving.
    transform = Transform(examples=training_example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=module_file)

    # Uses user-provided Python function that implements a model using TF-Learn.
    trainer = Trainer(
        module_file=module_file,
        transformed_examples=transform.outputs['transformed_examples'],
        schema=schema_gen.outputs['schema'],
        transform_graph=transform.outputs['transform_graph'],
        train_args=trainer_pb2.TrainArgs(num_steps=10000),
        eval_args=trainer_pb2.EvalArgs(num_steps=5000))

    # Get the latest blessed model for model validation.
    model_resolver = ResolverNode(
        instance_name='latest_blessed_model_resolver',
        resolver_class=latest_blessed_model_resolver.
        LatestBlessedModelResolver,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing))

    # Uses TFMA to compute a evaluation statistics over features of a model and
    # perform quality validation of a candidate model (compared to a baseline).
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(signature_name='eval')],
        slicing_specs=[
            tfma.SlicingSpec(),
            tfma.SlicingSpec(feature_keys=['trip_start_hour'])
        ],
        metrics_specs=[
            tfma.MetricsSpec(
                thresholds={
                    'accuracy':
                    tfma.config.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.6}),
                        # Change threshold will be ignored if there is no
                        # baseline model resolved from MLMD (first run).
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={'value': -1e-10}))
                })
        ])
    evaluator = Evaluator(examples=training_example_gen.outputs['examples'],
                          model=trainer.outputs['model'],
                          baseline_model=model_resolver.outputs['model'],
                          eval_config=eval_config)

    # Brings inference data into the pipeline.
    inference_example_gen = CsvExampleGen(
        input_base=inference_data_root,
        output_config=example_gen_pb2.Output(
            split_config=example_gen_pb2.SplitConfig(splits=[
                example_gen_pb2.SplitConfig.Split(name='unlabelled',
                                                  hash_buckets=100)
            ])),
        instance_name='inference_example_gen')

    # Performs offline batch inference over inference examples.
    bulk_inferrer = BulkInferrer(
        examples=inference_example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        model_blessing=evaluator.outputs['blessing'],
        # Empty data_spec.example_splits will result in using all splits.
        data_spec=bulk_inferrer_pb2.DataSpec(),
        model_spec=bulk_inferrer_pb2.ModelSpec())

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=[
            training_example_gen, inference_example_gen, statistics_gen,
            schema_gen, example_validator, transform, trainer, model_resolver,
            evaluator, bulk_inferrer
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path),
        beam_pipeline_args=beam_pipeline_args)
Exemple #18
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result, optional.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if output_dict.get(standard_component_specs.INFERENCE_RESULT_KEY):
            inference_result = artifact_utils.get_single_instance(
                output_dict[standard_component_specs.INFERENCE_RESULT_KEY])
        else:
            inference_result = None
        if output_dict.get(standard_component_specs.OUTPUT_EXAMPLES_KEY):
            output_examples = artifact_utils.get_single_instance(
                output_dict[standard_component_specs.OUTPUT_EXAMPLES_KEY])
        else:
            output_examples = None

        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'model' not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if standard_component_specs.MODEL_BLESSING_KEY in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict[standard_component_specs.MODEL_BLESSING_KEY])
            if not model_utils.is_model_blessed(model_blessing):
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')

        model = artifact_utils.get_single_instance(
            input_dict[standard_component_specs.MODEL_KEY])
        model_path = path_utils.serving_model_path(
            model.uri, path_utils.is_old_model_artifact(model))
        logging.info('Use exported model from %s.', model_path)

        data_spec = bulk_inferrer_pb2.DataSpec()
        proto_utils.json_to_proto(
            exec_properties[standard_component_specs.DATA_SPEC_KEY], data_spec)

        output_example_spec = bulk_inferrer_pb2.OutputExampleSpec()
        if exec_properties.get(
                standard_component_specs.OUTPUT_EXAMPLE_SPEC_KEY):
            proto_utils.json_to_proto(
                exec_properties[
                    standard_component_specs.OUTPUT_EXAMPLE_SPEC_KEY],
                output_example_spec)

        self._run_model_inference(
            data_spec, output_example_spec,
            input_dict[standard_component_specs.EXAMPLES_KEY], output_examples,
            inference_result,
            self._get_inference_spec(model_path, exec_properties))
Exemple #19
0
def generate_pipeline(pipeline_name, pipeline_root, train_data, test_data,
                      train_steps, eval_steps, pusher_target, runner):
    module_file = 'util.py'  # util.py is a file in the same folder

    # RuntimeParameter is only supported on KubeflowDagRunner currently
    if runner == 'kubeflow':
        pipeline_root_param = os.path.join('gs://{{kfp-default-bucket}}',
                                           pipeline_name, '{{workflow.uid}}')
        train_data_param = data_types.RuntimeParameter(
            name='train-data',
            default=
            'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/train',
            ptype=Text)
        test_data_param = data_types.RuntimeParameter(
            name='test-data',
            default=
            'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/test',
            ptype=Text)
        pusher_target_param = data_types.RuntimeParameter(
            name='pusher-destination',
            default=
            'gs://renming-mlpipeline-kubeflowpipelines-default/kaggle/santander/serving',
            ptype=Text)
    else:
        pipeline_root_param = pipeline_root
        train_data_param = train_data
        test_data_param = test_data
        pusher_target_param = pusher_target

    examples = external_input(train_data_param)
    example_gen = CsvExampleGen(input=examples, instance_name="train")

    test_examples = external_input(test_data_param)
    test_example_gen = CsvExampleGen(input=test_examples,
                                     output_config={
                                         'split_config': {
                                             'splits': [{
                                                 'name': 'test',
                                                 'hash_buckets': 1
                                             }]
                                         }
                                     },
                                     instance_name="test")

    statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
    schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'],
                           infer_feature_shape=True
                           )  # infer_feature_shape controls sparse or dense

    # Transform is too slow in my side.
    transform = Transform(examples=example_gen.outputs['examples'],
                          schema=schema_gen.outputs['schema'],
                          module_file=module_file)

    trainer = Trainer(
        custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor),
        examples=transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=schema_gen.outputs['schema'],
        module_file=module_file,
        train_args=trainer_pb2.TrainArgs(num_steps=train_steps),
        eval_args=trainer_pb2.EvalArgs(num_steps=eval_steps),
        instance_name="train",
        enable_cache=False)

    # Get the latest blessed model for model validation.
    model_resolver = ResolverNode(
        instance_name='latest_blessed_model_resolver',
        resolver_class=latest_blessed_model_resolver.
        LatestBlessedModelResolver,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing))

    # Uses TFMA to compute a evaluation statistics over features of a model and
    # perform quality validation of a candidate model (compared to a baseline).
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(label_key='target')],
        # tfma.SlicingSpec(feature_keys=['var_0', 'var_1']) when add more, Evaluator can't ouptput BLESSED status. It should be a bug in TFMA.
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(
                thresholds={
                    'binary_accuracy':
                    tfma.config.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={'value': 0.4}),
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={'value': -1e-10}))
                })
        ])
    evaluator = Evaluator(
        examples=example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        # baseline_model=model_resolver.outputs['model'],
        # Change threshold will be ignored if there is no baseline (first run).
        eval_config=eval_config,
        instance_name="eval5")

    # Checks whether the model passed the validation steps and pushes the model
    # to a file destination if check passed.
    pusher = Pusher(model=trainer.outputs['model'],
                    model_blessing=evaluator.outputs['blessing'],
                    push_destination={
                        'filesystem': {
                            'base_directory': pusher_target_param
                        }
                    })

    bulk_inferrer = BulkInferrer(
        examples=test_example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        # model_blessing=evaluator.outputs['blessing'],
        data_spec=bulk_inferrer_pb2.DataSpec(),
        model_spec=bulk_inferrer_pb2.ModelSpec(),
        instance_name="bulkInferrer")

    hello = component.HelloComponent(
        input_data=bulk_inferrer.outputs['inference_result'],
        instance_name='csvGen')

    return pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root_param,
        components=[
            example_gen, statistics_gen, schema_gen, transform, trainer,
            model_resolver, evaluator, pusher, hello, test_example_gen,
            bulk_inferrer
        ],
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            os.path.join(pipeline_root, 'metadata.sqlite')),
        beam_pipeline_args=['--direct_num_workers=0'])
Exemple #20
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model_export: exported model.
        - model_blessing: model blessing result
        - model_push: pushed model Either model_push or (model_export and
          model_blessing) need to present.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'output' not in output_dict:
            raise ValueError('\'output\' is missing in output dict.')
        output = artifact_utils.get_single_instance(output_dict['output'])
        if 'model_push' in input_dict:
            model_push = artifact_utils.get_single_instance(
                input_dict['model_push'])
            model_path = io_utils.get_only_uri_in_dir(model_push.uri)
            logging.info('Use pushed model from %s.', model_path)
        elif 'model_blessing' in input_dict and 'model_export' in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                output.set_int_custom_property('inferred', 0)
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
            model_export = artifact_utils.get_single_instance(
                input_dict['model_export'])
            model_path = path_utils.serving_model_path(model_export.uri)
            logging.info('Use exported model from %s.', model_path)
        else:
            raise ValueError(
                'Input models are not valid. Either model_push or '
                '(model_blessing and model_export) need to be '
                'specified.')
        data_spec = bulk_inferrer_pb2.DataSpec()
        json_format.Parse(exec_properties['data_spec'], data_spec)
        example_uris = {}
        if data_spec.example_splits:
            for example in input_dict['examples']:
                if example.split in data_spec.example_splits:
                    example_uris[example.split] = example.uri
        else:
            for example in input_dict['examples']:
                example_uris[example.split] = example.uri
        model_spec = bulk_inferrer_pb2.ModelSpec()
        json_format.Parse(exec_properties['model_spec'], model_spec)
        output_path = os.path.join(output.uri, _PREDICTION_LOGS_DIR_NAME)
        self._run_model_inference(model_path, example_uris, output_path,
                                  model_spec)
        logging.info('BulkInferrer generates prediction log to %s',
                     output_path)
        output.set_int_custom_property('inferred', 1)