コード例 #1
0
  def testBatchSizeLimit(self):
    temp_export_dir = self._getExportDir()
    _, export_dir = batch_size_limited_classifier.simple_batch_size_limited_classifier(
        None, temp_export_dir)
    eval_shared_model = self.createTestEvalSharedModel(
        eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
    eval_config = config.EvalConfig(model_specs=[config.ModelSpec()])
    schema = text_format.Parse(
        """
        feature {
          name: "classes"
          type: BYTES
        }
        feature {
          name: "scores"
          type: FLOAT
        }
        feature {
          name: "labels"
          type: BYTES
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
        arrow_schema=tfx_io.ArrowSchema(),
        tensor_representations=tfx_io.TensorRepresentations())
    feature_extractor = features_extractor.FeaturesExtractor(eval_config)
    prediction_extractor = predictions_extractor.PredictionsExtractor(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        tensor_adapter_config=tensor_adapter_config)

    examples = []
    for _ in range(4):
      examples.append(
          self._makeExample(classes='first', scores=0.0, labels='third'))

    with beam.Pipeline() as pipeline:
      predict_extracts = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=1)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | feature_extractor.stage_name >> feature_extractor.ptransform
          | prediction_extractor.stage_name >> prediction_extractor.ptransform)

      def check_result(got):
        try:
          self.assertLen(got, 4)
          # We can't verify the actual predictions, but we can verify the keys.
          for item in got:
            self.assertIn(constants.PREDICTIONS_KEY, item)

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(predict_extracts, check_result, label='result')
コード例 #2
0
  def benchmarkMiniPipeline(self):
    """Benchmark a "mini" TFMA - predict, slice and compute metrics.

    Runs a "mini" version of TFMA in a Beam pipeline. Records the wall time
    taken for the whole pipeline.
    """
    self._init_model()
    pipeline = self._create_beam_pipeline()
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=benchmark_utils.read_schema(
            self._dataset.tf_metadata_schema_path()),
        raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    raw_data = (
        pipeline
        | "Examples" >> beam.Create(
            self._dataset.read_raw_dataset(
                deserialize=False, limit=MAX_NUM_EXAMPLES))
        | "BatchExamples" >> tfx_io.BeamSource()
        | "InputsToExtracts" >> tfma.BatchedInputsToExtracts())

    _ = (
        raw_data
        | "FeaturesExtractor" >> features_extractor.FeaturesExtractor(
            eval_config=self._eval_config).ptransform
        | "LabelsExtractor" >> labels_extractor.LabelsExtractor(
            eval_config=self._eval_config).ptransform
        | "ExampleWeightsExtractor" >> example_weights_extractor
        .ExampleWeightsExtractor(eval_config=self._eval_config).ptransform
        | "PredictionsExtractor" >> predictions_extractor.PredictionsExtractor(
            eval_config=self._eval_config,
            eval_shared_model=self._eval_shared_model).ptransform
        | "UnbatchExtractor" >> unbatch_extractor.UnbatchExtractor().ptransform
        | "SliceKeyExtractor" >> tfma.extractors.SliceKeyExtractor().ptransform
        | "ComputeMetricsPlotsAndValidations" >>
        metrics_plots_and_validations_evaluator
        .MetricsPlotsAndValidationsEvaluator(
            eval_config=self._eval_config,
            eval_shared_model=self._eval_shared_model).ptransform)

    start = time.time()
    result = pipeline.run()
    result.wait_until_finish()
    end = time.time()
    delta = end - start

    self.report_benchmark(
        iters=1,
        wall_time=delta,
        extras={
            "num_examples": self._dataset.num_examples(limit=MAX_NUM_EXAMPLES)
        })
コード例 #3
0
  def testPredictionsExtractorWithoutEvalSharedModel(self):
    model_spec1 = config.ModelSpec(name='model1', prediction_key='prediction')
    model_spec2 = config.ModelSpec(
        name='model2',
        prediction_keys={
            'output1': 'prediction1',
            'output2': 'prediction2'
        })
    eval_config = config.EvalConfig(model_specs=[model_spec1, model_spec2])
    feature_extractor = features_extractor.FeaturesExtractor(eval_config)
    prediction_extractor = predictions_extractor.PredictionsExtractor(
        eval_config)

    schema = text_format.Parse(
        """
        feature {
          name: "prediction"
          type: FLOAT
        }
        feature {
          name: "prediction1"
          type: FLOAT
        }
        feature {
          name: "prediction2"
          type: FLOAT
        }
        feature {
          name: "fixed_int"
          type: INT
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)

    examples = [
        self._makeExample(
            prediction=1.0, prediction1=1.0, prediction2=0.0, fixed_int=1),
        self._makeExample(
            prediction=1.0, prediction1=1.0, prediction2=1.0, fixed_int=1)
    ]

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | feature_extractor.stage_name >> feature_extractor.ptransform
          | prediction_extractor.stage_name >> prediction_extractor.ptransform)

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 1)
          for model_name in ('model1', 'model2'):
            self.assertIn(model_name, got[0][constants.PREDICTIONS_KEY][0])
          self.assertAlmostEqual(got[0][constants.PREDICTIONS_KEY][0]['model1'],
                                 np.array([1.0]))
          self.assertDictElementsAlmostEqual(
              got[0][constants.PREDICTIONS_KEY][0]['model2'], {
                  'output1': np.array([1.0]),
                  'output2': np.array([0.0])
              })

          for model_name in ('model1', 'model2'):
            self.assertIn(model_name, got[0][constants.PREDICTIONS_KEY][1])
          self.assertAlmostEqual(got[0][constants.PREDICTIONS_KEY][1]['model1'],
                                 np.array([1.0]))
          self.assertDictElementsAlmostEqual(
              got[0][constants.PREDICTIONS_KEY][1]['model2'], {
                  'output1': np.array([1.0]),
                  'output2': np.array([1.0])
              })

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result, label='result')
コード例 #4
0
  def testBatchSizeLimitWithKerasModel(self):
    input1 = tf.keras.layers.Input(shape=(1,), batch_size=1, name='input1')
    input2 = tf.keras.layers.Input(shape=(1,), batch_size=1, name='input2')

    inputs = [input1, input2]
    input_layer = tf.keras.layers.concatenate(inputs)

    def add_1(tensor):
      return tf.add_n([tensor, tf.constant(1.0, shape=(1, 2))])

    assert_layer = tf.keras.layers.Lambda(add_1)(input_layer)

    model = tf.keras.models.Model(inputs, assert_layer)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr=.001),
        loss=tf.keras.losses.binary_crossentropy,
        metrics=['accuracy'])

    export_dir = self._getExportDir()
    model.save(export_dir, save_format='tf')

    eval_config = config.EvalConfig(model_specs=[config.ModelSpec()])
    eval_shared_model = self.createTestEvalSharedModel(
        eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
    schema = text_format.Parse(
        """
        tensor_representation_group {
          key: ""
          value {
            tensor_representation {
              key: "input1"
              value {
                dense_tensor {
                  column_name: "input1"
                  shape { dim { size: 1 } }
                }
              }
            }
            tensor_representation {
              key: "input2"
              value {
                dense_tensor {
                  column_name: "input2"
                  shape { dim { size: 1 } }
                }
              }
            }
          }
        }
        feature {
          name: "input1"
          type: FLOAT
        }
        feature {
          name: "input2"
          type: FLOAT
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
        arrow_schema=tfx_io.ArrowSchema(),
        tensor_representations=tfx_io.TensorRepresentations())
    feature_extractor = features_extractor.FeaturesExtractor(eval_config)
    prediction_extractor = predictions_extractor.PredictionsExtractor(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        tensor_adapter_config=tensor_adapter_config)

    examples = []
    for _ in range(4):
      examples.append(self._makeExample(input1=0.0, input2=1.0))

    with beam.Pipeline() as pipeline:
      predict_extracts = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=1)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | feature_extractor.stage_name >> feature_extractor.ptransform
          | prediction_extractor.stage_name >> prediction_extractor.ptransform)

      # pylint: enable=no-value-for-parameter
      def check_result(got):
        try:
          self.assertLen(got, 4)
          # We can't verify the actual predictions, but we can verify the keys.
          for item in got:
            self.assertIn(constants.PREDICTIONS_KEY, item)

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(predict_extracts, check_result, label='result')
コード例 #5
0
  def testPredictionsExtractorWithSequentialKerasModel(self):
    # Note that the input will be called 'test_input'
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(
            1, activation=tf.nn.sigmoid, input_shape=(2,), name='test')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr=.001),
        loss=tf.keras.losses.binary_crossentropy,
        metrics=['accuracy'])

    train_features = {'test_input': [[0.0, 0.0], [1.0, 1.0]]}
    labels = [[1], [0]]
    example_weights = [1.0, 0.5]
    dataset = tf.data.Dataset.from_tensor_slices(
        (train_features, labels, example_weights))
    dataset = dataset.shuffle(buffer_size=1).repeat().batch(2)
    model.fit(dataset, steps_per_epoch=1)

    export_dir = self._getExportDir()
    model.save(export_dir, save_format='tf')

    eval_config = config.EvalConfig(model_specs=[config.ModelSpec()])
    eval_shared_model = self.createTestEvalSharedModel(
        eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
    schema = text_format.Parse(
        """
        tensor_representation_group {
          key: ""
          value {
            tensor_representation {
              key: "test"
              value {
                dense_tensor {
                  column_name: "test"
                  shape { dim { size: 2 } }
                }
              }
            }
          }
        }
        feature {
          name: "test"
          type: FLOAT
        }
        feature {
          name: "non_model_feature"
          type: INT
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
        arrow_schema=tfx_io.ArrowSchema(),
        tensor_representations=tfx_io.TensorRepresentations())
    feature_extractor = features_extractor.FeaturesExtractor(eval_config)
    prediction_extractor = predictions_extractor.PredictionsExtractor(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        tensor_adapter_config=tensor_adapter_config)

    # Notice that the features are 'test' but the model expects 'test_input'.
    # This tests that the PredictExtractor properly handles this case.
    examples = [
        self._makeExample(test=[0.0, 0.0],
                          non_model_feature=0),  # should be ignored by model
        self._makeExample(test=[1.0, 1.0],
                          non_model_feature=1),  # should be ignored by model
    ]

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=2)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | feature_extractor.stage_name >> feature_extractor.ptransform
          | prediction_extractor.stage_name >> prediction_extractor.ptransform)

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 1)
          # We can't verify the actual predictions, but we can verify the keys.
          for item in got:
            self.assertIn(constants.PREDICTIONS_KEY, item)

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result, label='result')
コード例 #6
0
  def testPredictionsExtractorWithRegressionModel(self):
    temp_export_dir = self._getExportDir()
    export_dir, _ = (
        fixed_prediction_estimator_extra_fields
        .simple_fixed_prediction_estimator_extra_fields(temp_export_dir, None))

    eval_config = config.EvalConfig(model_specs=[config.ModelSpec()])
    eval_shared_model = self.createTestEvalSharedModel(
        eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
    schema = text_format.Parse(
        """
        feature {
          name: "prediction"
          type: FLOAT
        }
        feature {
          name: "label"
          type: FLOAT
        }
        feature {
          name: "fixed_int"
          type: INT
        }
        feature {
          name: "fixed_float"
          type: FLOAT
        }
        feature {
          name: "fixed_string"
          type: BYTES
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
        arrow_schema=tfx_io.ArrowSchema(),
        tensor_representations=tfx_io.TensorRepresentations())
    feature_extractor = features_extractor.FeaturesExtractor(eval_config)
    prediction_extractor = predictions_extractor.PredictionsExtractor(
        eval_config=eval_config,
        eval_shared_model=eval_shared_model,
        tensor_adapter_config=tensor_adapter_config)

    examples = [
        self._makeExample(
            prediction=0.2,
            label=1.0,
            fixed_int=1,
            fixed_float=1.0,
            fixed_string='fixed_string1'),
        self._makeExample(
            prediction=0.8,
            label=0.0,
            fixed_int=1,
            fixed_float=1.0,
            fixed_string='fixed_string2'),
        self._makeExample(
            prediction=0.5,
            label=0.0,
            fixed_int=2,
            fixed_float=1.0,
            fixed_string='fixed_string3')
    ]

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | feature_extractor.stage_name >> feature_extractor.ptransform
          | prediction_extractor.stage_name >> prediction_extractor.ptransform)

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 1)
          self.assertIn(constants.PREDICTIONS_KEY, got[0])
          expected_preds = [0.2, 0.8, 0.5]
          self.assertAlmostEqual(got[0][constants.PREDICTIONS_KEY],
                                 expected_preds)

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result, label='result')
コード例 #7
0
  def testPredictionsExtractorWithMultiModels(self):
    temp_export_dir = self._getExportDir()
    export_dir1, _ = multi_head.simple_multi_head(temp_export_dir, None)
    export_dir2, _ = multi_head.simple_multi_head(temp_export_dir, None)

    eval_config = config.EvalConfig(model_specs=[
        config.ModelSpec(name='model1'),
        config.ModelSpec(name='model2')
    ])
    eval_shared_model1 = self.createTestEvalSharedModel(
        eval_saved_model_path=export_dir1, tags=[tf.saved_model.SERVING])
    eval_shared_model2 = self.createTestEvalSharedModel(
        eval_saved_model_path=export_dir2, tags=[tf.saved_model.SERVING])
    schema = text_format.Parse(
        """
        feature {
          name: "age"
          type: FLOAT
        }
        feature {
          name: "langauge"
          type: BYTES
        }
        feature {
          name: "english_label"
          type: FLOAT
        }
        feature {
          name: "chinese_label"
          type: FLOAT
        }
        feature {
          name: "other_label"
          type: FLOAT
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
        arrow_schema=tfx_io.ArrowSchema(),
        tensor_representations=tfx_io.TensorRepresentations())
    feature_extractor = features_extractor.FeaturesExtractor(eval_config)
    prediction_extractor = predictions_extractor.PredictionsExtractor(
        eval_config=eval_config,
        eval_shared_model={
            'model1': eval_shared_model1,
            'model2': eval_shared_model2
        },
        tensor_adapter_config=tensor_adapter_config)

    examples = [
        self._makeExample(
            age=1.0,
            language='english',
            english_label=1.0,
            chinese_label=0.0,
            other_label=0.0),
        self._makeExample(
            age=1.0,
            language='chinese',
            english_label=0.0,
            chinese_label=1.0,
            other_label=0.0),
        self._makeExample(
            age=2.0,
            language='english',
            english_label=1.0,
            chinese_label=0.0,
            other_label=0.0),
        self._makeExample(
            age=2.0,
            language='other',
            english_label=0.0,
            chinese_label=1.0,
            other_label=1.0)
    ]

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=4)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | feature_extractor.stage_name >> feature_extractor.ptransform
          | prediction_extractor.stage_name >> prediction_extractor.ptransform)

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 1)
          for item in got:
            # We can't verify the actual predictions, but we can verify the keys
            self.assertIn(constants.PREDICTIONS_KEY, item)
            for pred in item[constants.PREDICTIONS_KEY]:
              for model_name in ('model1', 'model2'):
                self.assertIn(model_name, pred)
                for output_name in ('chinese_head', 'english_head',
                                    'other_head'):
                  for pred_key in ('logistic', 'probabilities', 'all_classes'):
                    self.assertIn(output_name + '/' + pred_key,
                                  pred[model_name])

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result, label='result')
コード例 #8
0
    def _runMiniPipeline(self, multi_model):
        """Benchmark a "mini" TFMA - predict, slice and compute metrics.

    Runs a "mini" version of TFMA in a Beam pipeline. Records the wall time
    taken for the whole pipeline.

    Args:
      multi_model: True if multiple models should be used in the benchmark.
    """
        self._init_model(multi_model, validation=False)
        pipeline = self._create_beam_pipeline()
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=benchmark_utils.read_schema(
                self._dataset.tf_metadata_schema_path()),
            raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        raw_data = (pipeline
                    | "Examples" >> beam.Create(
                        self._dataset.read_raw_dataset(
                            deserialize=False, limit=self._max_num_examples()))
                    | "BatchExamples" >> tfx_io.BeamSource()
                    | "InputsToExtracts" >> tfma.BatchedInputsToExtracts())

        def rescale_labels(extracts):
            # Transform labels to [0, 1] so we can test metrics that require labels in
            # that range.
            result = copy.copy(extracts)
            result[constants.LABELS_KEY] = self._transform_labels(
                extracts[constants.LABELS_KEY])
            return result

        _ = (raw_data
             | "FeaturesExtractor" >> features_extractor.FeaturesExtractor(
                 eval_config=self._eval_config).ptransform
             | "LabelsExtractor" >> labels_extractor.LabelsExtractor(
                 eval_config=self._eval_config).ptransform
             | "RescaleLabels" >> beam.Map(rescale_labels)
             | "ExampleWeightsExtractor" >> example_weights_extractor.
             ExampleWeightsExtractor(eval_config=self._eval_config).ptransform
             | "PredictionsExtractor" >>
             predictions_extractor.PredictionsExtractor(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_models).ptransform
             | "UnbatchExtractor" >>
             unbatch_extractor.UnbatchExtractor().ptransform
             | "SliceKeyExtractor" >>
             tfma.extractors.SliceKeyExtractor().ptransform
             | "ComputeMetricsPlotsAndValidations" >>
             metrics_plots_and_validations_evaluator.
             MetricsPlotsAndValidationsEvaluator(
                 eval_config=self._eval_config,
                 eval_shared_model=self._eval_shared_models).ptransform)

        start = time.time()
        for _ in range(_ITERS):
            result = pipeline.run()
            result.wait_until_finish()
        end = time.time()
        delta = end - start

        self.report_benchmark(
            iters=_ITERS,
            wall_time=delta,
            extras={
                "num_examples":
                self._dataset.num_examples(limit=self._max_num_examples())
            })
コード例 #9
0
    def testUnbatchExtractor(self):
        model_spec = config_pb2.ModelSpec(label_key='label',
                                          example_weight_key='example_weight')
        eval_config = config_pb2.EvalConfig(model_specs=[model_spec])
        feature_extractor = features_extractor.FeaturesExtractor(eval_config)
        label_extractor = labels_extractor.LabelsExtractor(eval_config)
        example_weight_extractor = (
            example_weights_extractor.ExampleWeightsExtractor(eval_config))
        predict_extractor = predictions_extractor.PredictionsExtractor(
            eval_config)
        unbatch_inputs_extractor = unbatch_extractor.UnbatchExtractor()

        schema = text_format.Parse(
            """
        feature {
          name: "label"
          type: FLOAT
        }
        feature {
          name: "example_weight"
          type: FLOAT
        }
        feature {
          name: "fixed_int"
          type: INT
        }
        feature {
          name: "fixed_float"
          type: FLOAT
        }
        feature {
          name: "fixed_string"
          type: BYTES
        }
        """, schema_pb2.Schema())
        tfx_io = test_util.InMemoryTFExampleRecord(
            schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
        examples = [
            self._makeExample(label=1.0,
                              example_weight=0.5,
                              fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string1'),
            self._makeExample(label=0.0,
                              example_weight=0.0,
                              fixed_int=1,
                              fixed_float=1.0,
                              fixed_string='fixed_string2'),
            self._makeExample(label=0.0,
                              example_weight=1.0,
                              fixed_int=2,
                              fixed_float=0.0,
                              fixed_string='fixed_string3')
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (
                pipeline
                | 'Create' >> beam.Create(
                    [e.SerializeToString() for e in examples], reshuffle=False)
                | 'BatchExamples' >> tfx_io.BeamSource(batch_size=3)
                |
                'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
                | feature_extractor.stage_name >> feature_extractor.ptransform
                | label_extractor.stage_name >> label_extractor.ptransform
                | example_weight_extractor.stage_name >>
                example_weight_extractor.ptransform
                | predict_extractor.stage_name >> predict_extractor.ptransform
                | unbatch_inputs_extractor.stage_name >>
                unbatch_inputs_extractor.ptransform)

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 3)
                    self.assertDictElementsAlmostEqual(
                        got[0][constants.FEATURES_KEY], {
                            'fixed_int': np.array([1]),
                            'fixed_float': np.array([1.0]),
                        })
                    self.assertEqual(
                        got[0][constants.FEATURES_KEY]['fixed_string'],
                        np.array([b'fixed_string1']))
                    self.assertAlmostEqual(got[0][constants.LABELS_KEY],
                                           np.array([1.0]))
                    self.assertAlmostEqual(
                        got[0][constants.EXAMPLE_WEIGHTS_KEY], np.array([0.5]))
                    self.assertDictElementsAlmostEqual(
                        got[1][constants.FEATURES_KEY], {
                            'fixed_int': np.array([1]),
                            'fixed_float': np.array([1.0]),
                        })
                    self.assertEqual(
                        got[1][constants.FEATURES_KEY]['fixed_string'],
                        np.array([b'fixed_string2']))
                    self.assertAlmostEqual(got[1][constants.LABELS_KEY],
                                           np.array([0.0]))
                    self.assertAlmostEqual(
                        got[1][constants.EXAMPLE_WEIGHTS_KEY], np.array([0.0]))
                    self.assertDictElementsAlmostEqual(
                        got[2][constants.FEATURES_KEY], {
                            'fixed_int': np.array([2]),
                            'fixed_float': np.array([0.0]),
                        })
                    self.assertEqual(
                        got[2][constants.FEATURES_KEY]['fixed_string'],
                        np.array([b'fixed_string3']))
                    self.assertAlmostEqual(got[2][constants.LABELS_KEY],
                                           np.array([0.0]))
                    self.assertAlmostEqual(
                        got[2][constants.EXAMPLE_WEIGHTS_KEY], np.array([1.0]))

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
コード例 #10
0
  def testPredictionsExtractorWithMultiClassModel(self):
    temp_export_dir = self._getExportDir()
    export_dir, _ = dnn_classifier.simple_dnn_classifier(
        temp_export_dir, None, n_classes=3)

    eval_config = config_pb2.EvalConfig(model_specs=[config_pb2.ModelSpec()])
    eval_shared_model = self.createTestEvalSharedModel(
        eval_saved_model_path=export_dir, tags=[tf.saved_model.SERVING])
    schema = text_format.Parse(
        """
        feature {
          name: "age"
          type: FLOAT
        }
        feature {
          name: "langauge"
          type: BYTES
        }
        feature {
          name: "label"
          type: INT
        }
        """, schema_pb2.Schema())
    tfx_io = test_util.InMemoryTFExampleRecord(
        schema=schema, raw_record_column_name=constants.ARROW_INPUT_COLUMN)
    tensor_adapter_config = tensor_adapter.TensorAdapterConfig(
        arrow_schema=tfx_io.ArrowSchema(),
        tensor_representations=tfx_io.TensorRepresentations())
    feature_extractor = features_extractor.FeaturesExtractor(
        eval_config=eval_config,
        tensor_representations=tensor_adapter_config.tensor_representations)
    prediction_extractor = predictions_extractor.PredictionsExtractor(
        eval_config=eval_config, eval_shared_model=eval_shared_model)

    examples = [
        self._makeExample(age=1.0, language='english', label=0),
        self._makeExample(age=2.0, language='chinese', label=1),
        self._makeExample(age=3.0, language='english', label=2),
        self._makeExample(age=4.0, language='chinese', label=1),
    ]

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create([e.SerializeToString() for e in examples],
                                    reshuffle=False)
          | 'BatchExamples' >> tfx_io.BeamSource(batch_size=4)
          | 'InputsToExtracts' >> model_eval_lib.BatchedInputsToExtracts()
          | feature_extractor.stage_name >> feature_extractor.ptransform
          | prediction_extractor.stage_name >> prediction_extractor.ptransform)

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 1)
          # We can't verify the actual predictions, but we can verify the keys.
          for item in got:
            self.assertIn(constants.PREDICTIONS_KEY, item)
            for pred_key in ('probabilities', 'all_classes'):
              self.assertIn(pred_key, item[constants.PREDICTIONS_KEY])

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result, label='result')