コード例 #1
0
ファイル: executor_test.py プロジェクト: turhaltemizer/tfx
    def testDoWithOutputExamplesSpecifiedSplits(self):
        self._exec_properties['data_spec'] = proto_utils.proto_to_json(
            text_format.Parse(
                """
                example_splits: 'unlabelled'
            """, bulk_inferrer_pb2.DataSpec()))
        self._exec_properties[
            'output_example_spec'] = proto_utils.proto_to_json(
                text_format.Parse(
                    """
                output_columns_spec {
                  classify_output {
                    label_column: 'classify_label'
                    score_column: 'classify_score'
                  }
                }
            """, bulk_inferrer_pb2.OutputExampleSpec()))

        # Run executor.
        bulk_inferrer = executor.Executor(self._context)
        bulk_inferrer.Do(self._input_dict, self._output_dict_oe,
                         self._exec_properties)

        # Check outputs.
        self.assertTrue(fileio.exists(self._output_examples_dir))
        self._verify_example_split('unlabelled')
        self.assertFalse(
            fileio.exists(
                os.path.join(self._output_examples_dir, 'unlabelled2')))
コード例 #2
0
    def test_convert_for_regress_invalid_output_example_spec(self):
        prediction_log = text_format.Parse(
            """
      regress_log {
        request {
          input {
            example_list {
              examples {
                features {
                  feature: {
                    key: "regress_input"
                    value: { bytes_list: { value: "feature" } }
                  }
                }
              }
            }
          }
        }
        response {
          result {
            regressions {
              value: 0.7
            }
          }
        }
      }
    """, prediction_log_pb2.PredictionLog())

        output_example_spec = text_format.Parse(
            """
        output_columns_spec {
        }
    """, bulk_inferrer_pb2.OutputExampleSpec())
        with self.assertRaises(ValueError):
            utils.convert(prediction_log, output_example_spec)
コード例 #3
0
 def testConstructOutputExample(self):
     bulk_inferrer = component.CloudAIBulkInferrerComponent(
         examples=self._examples,
         model=self._model,
         model_blessing=self._model_blessing,
         output_example_spec=bulk_inferrer_pb2.OutputExampleSpec())
     self.assertEqual('Examples',
                      bulk_inferrer.outputs['output_examples'].type_name)
     self.assertNotIn('inference_result', bulk_inferrer.outputs.keys())
コード例 #4
0
 def testConstructOutputExample(self):
     bulk_inferrer = component.BulkInferrer(
         examples=self._examples,
         model=self._model,
         model_blessing=self._model_blessing,
         output_example_spec=bulk_inferrer_pb2.OutputExampleSpec())
     self.assertEqual(
         'Examples', bulk_inferrer.outputs[
             standard_component_specs.OUTPUT_EXAMPLES_KEY].type_name)
     self.assertNotIn('inference_result', bulk_inferrer.outputs.keys())
コード例 #5
0
    def test_convert_for_predict_invalid_output_example_spec(self):
        example = text_format.Parse(
            """
      features {
        feature { key: "predict_input" value: { bytes_list: { value: "feature" } } }
      }""", tf.train.Example())
        prediction_log = text_format.Parse(
            """
      predict_log {
        request {
          inputs {
            key: "%s"
            value {
              dtype: DT_STRING
              tensor_shape { dim { size: 1 } }
            }
          }
       }
       response {
         outputs {
           key: "output_float"
           value {
             dtype: DT_FLOAT
             tensor_shape { dim { size: 1 } dim { size: 2 }}
             float_val: 0.1
             float_val: 0.2
           }
         }
         outputs {
           key: "output_bytes"
           value {
             dtype: DT_STRING
             tensor_shape { dim { size: 1 }}
             string_val: "prediction"
           }
         }
       }
     }
    """ % (utils.INPUT_KEY), prediction_log_pb2.PredictionLog())

        # The ending quote cannot be recognized correctly when `string_val` field
        # is directly set with a serialized string quoted in the text format.
        prediction_log.predict_log.request.inputs[
            utils.INPUT_KEY].string_val.append(example.SerializeToString())

        output_example_spec = text_format.Parse(
            """
        output_columns_spec {
        }
    """, bulk_inferrer_pb2.OutputExampleSpec())
        with self.assertRaises(ValueError):
            utils.convert(prediction_log, output_example_spec)
コード例 #6
0
    def test_convert_for_regress(self):
        prediction_log = text_format.Parse(
            """
      regress_log {
        request {
          input {
            example_list {
              examples {
                features {
                  feature: {
                    key: "regress_input"
                    value: { bytes_list: { value: "feature" } }
                  }
                }
              }
            }
          }
        }
        response {
          result {
            regressions {
              value: 0.7
            }
          }
        }
      }
    """, prediction_log_pb2.PredictionLog())

        output_example_spec = text_format.Parse(
            """
        output_columns_spec {
          regress_output {
            value_column: 'regress_value'
          }
        }
    """, bulk_inferrer_pb2.OutputExampleSpec())
        expected_example = text_format.Parse(
            """
        features {
            feature: {
              key: "regress_input"
              value: { bytes_list: { value: "feature" } }
            }
            feature: {
              key: "regress_value"
              value: { float_list: { value: 0.7 } }
            }
          }
    """, tf.train.Example())
        self.assertProtoEquals(
            expected_example, utils.convert(prediction_log,
                                            output_example_spec))
コード例 #7
0
    def testConstructInferenceResultAndOutputExample(self):
        with self.assertRaises(ValueError):
            component.BulkInferrer(examples=self._examples,
                                   model=self._model,
                                   model_blessing=self._model_blessing,
                                   output_examples=channel_utils.as_channel(
                                       [standard_artifacts.Examples()]))

        with self.assertRaises(ValueError):
            component.BulkInferrer(
                examples=self._examples,
                model=self._model,
                model_blessing=self._model_blessing,
                output_example_spec=bulk_inferrer_pb2.OutputExampleSpec(),
                inference_result=channel_utils.as_channel(
                    [standard_artifacts.InferenceResult()]))
コード例 #8
0
    def testDoWithOutputExamplesAllSplits(self):
        self._exec_properties[
            standard_component_specs.
            OUTPUT_EXAMPLE_SPEC_KEY] = proto_utils.proto_to_json(
                text_format.Parse(
                    """
                output_columns_spec {
                  classify_output {
                    label_column: 'classify_label'
                    score_column: 'classify_score'
                  }
                }
            """, bulk_inferrer_pb2.OutputExampleSpec()))

        # Run executor.
        bulk_inferrer = executor.Executor(self._context)
        bulk_inferrer.Do(self._input_dict, self._output_dict_oe,
                         self._exec_properties)

        # Check outputs.
        self.assertTrue(fileio.exists(self._output_examples_dir))
        self._verify_example_split('unlabelled')
        self._verify_example_split('unlabelled2')
コード例 #9
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result, optional.
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if output_dict.get(standard_component_specs.INFERENCE_RESULT_KEY):
            inference_result = artifact_utils.get_single_instance(
                output_dict[standard_component_specs.INFERENCE_RESULT_KEY])
        else:
            inference_result = None
        if output_dict.get(standard_component_specs.OUTPUT_EXAMPLES_KEY):
            output_examples = artifact_utils.get_single_instance(
                output_dict[standard_component_specs.OUTPUT_EXAMPLES_KEY])
        else:
            output_examples = None

        if 'examples' not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if 'model' not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if standard_component_specs.MODEL_BLESSING_KEY in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict[standard_component_specs.MODEL_BLESSING_KEY])
            if not model_utils.is_model_blessed(model_blessing):
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')

        model = artifact_utils.get_single_instance(
            input_dict[standard_component_specs.MODEL_KEY])
        model_path = path_utils.serving_model_path(
            model.uri, path_utils.is_old_model_artifact(model))
        logging.info('Use exported model from %s.', model_path)

        data_spec = bulk_inferrer_pb2.DataSpec()
        proto_utils.json_to_proto(
            exec_properties[standard_component_specs.DATA_SPEC_KEY], data_spec)

        output_example_spec = bulk_inferrer_pb2.OutputExampleSpec()
        if exec_properties.get(
                standard_component_specs.OUTPUT_EXAMPLE_SPEC_KEY):
            proto_utils.json_to_proto(
                exec_properties[
                    standard_component_specs.OUTPUT_EXAMPLE_SPEC_KEY],
                output_example_spec)

        self._run_model_inference(
            data_spec, output_example_spec,
            input_dict[standard_component_specs.EXAMPLES_KEY], output_examples,
            inference_result,
            self._get_inference_spec(model_path, exec_properties))
コード例 #10
0
 def test_convert_for_multi_inference(self):
     prediction_log = text_format.Parse(
         """
   multi_inference_log {
     request {
       input {
         example_list {
           examples {
             features {
               feature: {
                 key: "input"
                 value: { bytes_list: { value: "feature" } }
               }
             }
           }
         }
       }
     }
     response {
       results {
         model_spec {
           signature_name: 'classification'
         }
         classification_result {
           classifications {
             classes {
               label: '1'
               score: 0.6
             }
             classes {
               label: '0'
               score: 0.4
             }
           }
         }
       }
       results {
         model_spec {
           signature_name: 'regression'
         }
         regression_result {
           regressions {
             value: 0.7
           }
         }
       }
     }
   }
 """, prediction_log_pb2.PredictionLog())
     output_example_spec = text_format.Parse(
         """
     output_columns_spec {
       signature_name: 'classification'
       classify_output {
         label_column: 'classify_label'
         score_column: 'classify_score'
       }
     }
     output_columns_spec {
       signature_name: 'regression'
       regress_output {
         value_column: 'regress_value'
       }
     }
 """, bulk_inferrer_pb2.OutputExampleSpec())
     expected_example = text_format.Parse(
         """
     features {
         feature: {
           key: "input"
           value: { bytes_list: { value: "feature" } }
         }
         feature: {
           key: "classify_label"
           value: { bytes_list: { value: "1" value: "0"} }
         }
         feature: {
           key: "classify_score"
           value: { float_list: { value: 0.6 value: 0.4} }
         }
         feature: {
           key: "regress_value"
           value: { float_list: { value: 0.7} }
         }
       }
 """, tf.train.Example())
     self.assertProtoEquals(
         expected_example, utils.convert(prediction_log,
                                         output_example_spec))
コード例 #11
0
    def Do(self, input_dict: Dict[Text, List[types.Artifact]],
           output_dict: Dict[Text, List[types.Artifact]],
           exec_properties: Dict[Text, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

        Args:
          input_dict: Input dict from input key to a list of Artifacts.
            - examples: examples for inference.
            - model: exported model.
            - model_blessing: model blessing result, optional.
          output_dict: Output dict from output key to a list of Artifacts.
            - output: bulk inference results.
          exec_properties: A dict of execution properties.
            - model_spec: JSON string of bulk_inferrer_pb2.ModelSpec instance.
            - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.

        Returns:
          None
        """
        self._log_startup(input_dict, output_dict, exec_properties)

        source = exec_properties[StepKeys.SOURCE]
        args = exec_properties[StepKeys.ARGS]
        c = source_utils.load_source_path_class(source)
        inferrer_step: BaseInferrer = c(**args)

        output_examples = artifact_utils.get_single_instance(
            output_dict[PREDICTIONS])

        if EXAMPLES not in input_dict:
            raise ValueError('\'examples\' is missing in input dict.')
        if MODEL not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if MODEL_BLESSING in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')

        model = artifact_utils.get_single_instance(input_dict[MODEL])
        model_path = path_utils.serving_model_path(model.uri)
        logging.info('Use exported model from %s.', model_path)

        output_example_spec = bulk_inferrer_pb2.OutputExampleSpec(
            output_columns_spec=[
                bulk_inferrer_pb2.OutputColumnsSpec(
                    predict_output=bulk_inferrer_pb2.PredictOutput(
                        output_columns=[
                            bulk_inferrer_pb2.PredictOutputCol(
                                output_key=x,
                                output_column=f'{x}_label',
                            ) for x in inferrer_step.get_labels()
                        ]))
            ])

        model_spec = bulk_inferrer_pb2.ModelSpec()
        saved_model_spec = model_spec_pb2.SavedModelSpec(
            model_path=model_path,
            tag=model_spec.tag,
            signature_name=model_spec.model_signature_name)
        inference_spec = model_spec_pb2.InferenceSpecType()
        inference_spec.saved_model_spec.CopyFrom(saved_model_spec)

        self._run_model_inference(output_example_spec, input_dict[EXAMPLES],
                                  output_examples, inference_spec,
                                  inferrer_step)
コード例 #12
0
ファイル: executor.py プロジェクト: jay90099/tfx
    def Do(self, input_dict: Dict[str, List[types.Artifact]],
           output_dict: Dict[str, List[types.Artifact]],
           exec_properties: Dict[str, Any]) -> None:
        """Runs batch inference on a given model with given input examples.

    This function creates a new model (if necessary) and a new model version
    before inference, and cleans up resources after inference. It provides
    re-executability as it cleans up (only) the model resources that are created
    during the process even inference job failed.

    Args:
      input_dict: Input dict from input key to a list of Artifacts.
        - examples: examples for inference.
        - model: exported model.
        - model_blessing: model blessing result
      output_dict: Output dict from output key to a list of Artifacts.
        - output: bulk inference results.
      exec_properties: A dict of execution properties.
        - data_spec: JSON string of bulk_inferrer_pb2.DataSpec instance.
        - custom_config: custom_config.ai_platform_serving_args need to contain
          the serving job parameters sent to Google Cloud AI Platform. For the
          full set of parameters, refer to
          https://cloud.google.com/ml-engine/reference/rest/v1/projects.models

    Returns:
      None
    """
        self._log_startup(input_dict, output_dict, exec_properties)

        if output_dict.get('inference_result'):
            inference_result = artifact_utils.get_single_instance(
                output_dict['inference_result'])
        else:
            inference_result = None
        if output_dict.get('output_examples'):
            output_examples = artifact_utils.get_single_instance(
                output_dict['output_examples'])
        else:
            output_examples = None

        if 'examples' not in input_dict:
            raise ValueError('`examples` is missing in input dict.')
        if 'model' not in input_dict:
            raise ValueError('Input models are not valid, model '
                             'need to be specified.')
        if 'model_blessing' in input_dict:
            model_blessing = artifact_utils.get_single_instance(
                input_dict['model_blessing'])
            if not model_utils.is_model_blessed(model_blessing):
                logging.info('Model on %s was not blessed', model_blessing.uri)
                return
        else:
            logging.info(
                'Model blessing is not provided, exported model will be '
                'used.')
        if _CUSTOM_CONFIG_KEY not in exec_properties:
            raise ValueError(
                'Input exec properties are not valid, {} '
                'need to be specified.'.format(_CUSTOM_CONFIG_KEY))

        custom_config = json_utils.loads(
            exec_properties.get(_CUSTOM_CONFIG_KEY, 'null'))
        if custom_config is not None and not isinstance(custom_config, Dict):
            raise ValueError(
                'custom_config in execution properties needs to be a '
                'dict.')
        ai_platform_serving_args = custom_config.get(SERVING_ARGS_KEY)
        if not ai_platform_serving_args:
            raise ValueError(
                '`ai_platform_serving_args` is missing in `custom_config`')
        service_name, api_version = runner.get_service_name_and_api_version(
            ai_platform_serving_args)
        executor_class_path = '%s.%s' % (self.__class__.__module__,
                                         self.__class__.__name__)
        with telemetry_utils.scoped_labels(
            {telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}):
            job_labels = telemetry_utils.make_labels_dict()
        model = artifact_utils.get_single_instance(input_dict['model'])
        model_path = path_utils.serving_model_path(
            model.uri, path_utils.is_old_model_artifact(model))
        logging.info('Use exported model from %s.', model_path)
        # Use model artifact uri to generate model version to guarantee the
        # 1:1 mapping from model version to model.
        model_version = 'version_' + hashlib.sha256(
            model.uri.encode()).hexdigest()
        inference_spec = self._get_inference_spec(model_path, model_version,
                                                  ai_platform_serving_args)
        data_spec = bulk_inferrer_pb2.DataSpec()
        proto_utils.json_to_proto(exec_properties['data_spec'], data_spec)
        output_example_spec = bulk_inferrer_pb2.OutputExampleSpec()
        if exec_properties.get('output_example_spec'):
            proto_utils.json_to_proto(exec_properties['output_example_spec'],
                                      output_example_spec)
        endpoint = custom_config.get(constants.ENDPOINT_ARGS_KEY)
        if endpoint and 'regions' in ai_platform_serving_args:
            raise ValueError(
                '`endpoint` and `ai_platform_serving_args.regions` cannot be set simultaneously'
            )
        api = discovery.build(
            service_name,
            api_version,
            requestBuilder=telemetry_utils.TFXHttpRequest,
            client_options=client_options.ClientOptions(api_endpoint=endpoint),
        )
        new_model_endpoint_created = False
        try:
            new_model_endpoint_created = runner.create_model_for_aip_prediction_if_not_exist(
                job_labels, ai_platform_serving_args, api)
            runner.deploy_model_for_aip_prediction(
                serving_path=model_path,
                model_version_name=model_version,
                ai_platform_serving_args=ai_platform_serving_args,
                api=api,
                labels=job_labels,
                skip_model_endpoint_creation=True,
                set_default=False,
            )
            self._run_model_inference(data_spec, output_example_spec,
                                      input_dict['examples'], output_examples,
                                      inference_result, inference_spec)
        except Exception as e:
            logging.error(
                'Error in executing CloudAIBulkInferrerComponent: %s', str(e))
            raise
        finally:
            # Guarantee newly created resources are cleaned up even if the inference
            # job failed.

            # Clean up the newly deployed model.
            runner.delete_model_from_aip_if_exists(
                model_version_name=model_version,
                ai_platform_serving_args=ai_platform_serving_args,
                api=api,
                delete_model_endpoint=new_model_endpoint_created)