def testRaisesErrorWhenExampleWeightsDiffer(self):
    with self.assertRaises(ValueError):
      metric = min_label_position.MinLabelPosition().computations(
          query_key='query')[0]

      query1_example1 = {
          'labels': np.array([0.0]),
          'predictions': np.array([0.2]),
          'example_weights': np.array([1.0]),
          'features': {
              'query': np.array(['query1'])
          }
      }
      query1_example2 = {
          'labels': np.array([1.0]),
          'predictions': np.array([0.8]),
          'example_weights': np.array([0.5]),
          'features': {
              'query': np.array(['query1'])
          }
      }

      with beam.Pipeline() as pipeline:
        # pylint: disable=no-value-for-parameter
        _ = (
            pipeline
            | 'Create' >> beam.Create(
                [tfma_util.merge_extracts([query1_example1, query1_example2])])
            | 'Process' >> beam.Map(metric_util.to_standard_metric_inputs, True)
            | 'AddSlice' >> beam.Map(lambda x: ((), x))
            | 'Combine' >> beam.CombinePerKey(metric.combiner))
Esempio n. 2
0
 def _update_state(
         self, accumulator: tf_metric_accumulators.TFMetricsAccumulator):
     features = {}
     labels = {}
     example_weights = {}
     for i, output_name in enumerate(self._output_names):
         f, l, w = accumulator.get_inputs(i)
         if i == 0:
             features = util.merge_extracts(f)
         if not output_name and len(self._output_names) > 1:
             # The empty output_name for multi-output models is not used for inputs.
             continue
         labels[output_name] = np.array(l)
         weights = np.array(w)
         # TFv1 will not squeeze the weights, so must do manually
         if weights.shape[-1] == 1:
             weights = weights.squeeze(axis=-1)
         example_weights[output_name] = weights
     if len(self._output_names) == 1:
         # Single-output models don't use dicts.
         labels = next(iter(labels.values()))
         example_weights = next(iter(example_weights.values()))
     input_specs = model_util.get_input_specs(self._model,
                                              signature_name=None)
     inputs = model_util.get_inputs(features, input_specs)
     if inputs is None:
         raise ValueError('unable to prepare inputs for evaluation: '
                          f'input_specs={input_specs}, features={features}')
     self._model.evaluate(x=inputs,
                          y=labels,
                          batch_size=util.batch_size(features),
                          verbose=0,
                          sample_weight=example_weights)
Esempio n. 3
0
    def testMergeExtractsRaisesException(self):
        extracts = [
            {
                'features': {
                    'feature_3':
                    types.SparseTensorValue(values=np.array([1]),
                                            indices=np.array([[0, 1]]),
                                            dense_shape=np.array([1, 2]))
                },
            },
            {
                'features': {
                    'feature_3':
                    types.SparseTensorValue(values=np.array([2]),
                                            indices=np.array([[0, 2]]),
                                            dense_shape=np.array([1, 3]))
                },
            },
        ]

        with self.assertRaisesWithPredicateMatch(
                RuntimeError,
                lambda exc: isinstance(exc.__cause__, RuntimeError)):
            util.merge_extracts(extracts)
Esempio n. 4
0
    def testMinLabelPositionWithNoWeightedExamples(self):
        metric = min_label_position.MinLabelPosition().computations(
            query_key='query', example_weighted=True)[0]

        query1_example1 = {
            'labels': np.array([1.0]),
            'predictions': np.array([0.2]),
            'example_weights': np.array([0.0]),
            'features': {
                'query': np.array(['query1'])
            }
        }

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (pipeline
                      | 'Create' >> beam.Create(
                          [tfma_util.merge_extracts([query1_example1])])
                      | 'Process' >> beam.Map(
                          metric_util.to_standard_metric_inputs, True)
                      | 'AddSlice' >> beam.Map(lambda x: ((), x))
                      | 'Combine' >> beam.CombinePerKey(metric.combiner))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    key = metric_types.MetricKey(name='min_label_position',
                                                 example_weighted=True)
                    self.assertIn(key, got_metrics)
                    self.assertTrue(math.isnan(got_metrics[key]))

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
  def testMinLabelPosition(self, label_key):
    metric = min_label_position.MinLabelPosition(
        label_key=label_key).computations(query_key='query')[0]

    query1_example1 = {
        'labels': np.array([1.0]),
        'predictions': np.array([0.2]),
        'example_weights': np.array([1.0]),
        'features': {
            'custom_label': np.array([0.0]),
            'query': np.array(['query1'])
        }
    }
    query1_example2 = {
        'labels': np.array([0.0]),
        'predictions': np.array([0.8]),
        'example_weights': np.array([1.0]),
        'features': {
            'custom_label': np.array([1.0]),
            'query': np.array(['query1'])
        }
    }
    query2_example1 = {
        'labels': np.array([1.0]),
        'predictions': np.array([0.9]),
        'example_weights': np.array([2.0]),
        'features': {
            'custom_label': np.array([0.0]),
            'query': np.array(['query2'])
        }
    }
    query2_example2 = {
        'labels': np.array([0.0]),
        'predictions': np.array([0.1]),
        'example_weights': np.array([2.0]),
        'features': {
            'custom_label': np.array([1.0]),
            'query': np.array(['query2'])
        }
    }
    query2_example3 = {
        'labels': np.array([0.0]),
        'predictions': np.array([0.5]),
        'example_weights': np.array([2.0]),
        'features': {
            'custom_label': np.array([0.0]),
            'query': np.array(['query2'])
        }
    }
    query3_example1 = {
        'labels': np.array([1.0]),
        'predictions': np.array([0.9]),
        'example_weights': np.array([3.0]),
        'features': {
            'custom_label': np.array([0.0]),
            'query': np.array(['query3'])
        }
    }
    examples = [
        tfma_util.merge_extracts([query1_example1, query1_example2]),
        tfma_util.merge_extracts(
            [query2_example1, query2_example2, query2_example3]),
        tfma_util.merge_extracts([query3_example1])
    ]

    if label_key:
      self.assertIsNotNone(metric.preprocessor)

    with beam.Pipeline() as pipeline:
      # pylint: disable=no-value-for-parameter
      result = (
          pipeline
          | 'Create' >> beam.Create(examples)
          | 'Process' >> beam.Map(
              metric_util.to_standard_metric_inputs, include_features=True)
          | 'AddSlice' >> beam.Map(lambda x: ((), x))
          | 'Combine' >> beam.CombinePerKey(metric.combiner))

      # pylint: enable=no-value-for-parameter

      def check_result(got):
        try:
          self.assertLen(got, 1)
          got_slice_key, got_metrics = got[0]
          self.assertEqual(got_slice_key, ())
          key = metric_types.MetricKey(name='min_label_position')
          self.assertIn(key, got_metrics)
          if label_key == 'custom_label':
            # (1*1.0 + 3*2.0) / (1.0 + 2.0) = 2.333333
            self.assertAllClose(got_metrics[key], 2.333333)
          else:
            # (2*1.0 + 1*2.0 + 1*3.0) / (1.0 + 2.0 + 3.0) = 1.166666
            self.assertAllClose(got_metrics[key], 1.166666)

        except AssertionError as err:
          raise util.BeamAssertException(err)

      util.assert_that(result, check_result, label='result')
Esempio n. 6
0
    def testMergeExtracts(self):
        extracts = [
            {
                'features': {
                    'feature_1':
                    np.array([1.0, 2.0]),
                    'feature_2':
                    np.array([1.0, 2.0]),
                    'feature_3':
                    types.SparseTensorValue(values=np.array([1]),
                                            indices=np.array([[0, 1]]),
                                            dense_shape=np.array([1, 3])),
                    'feature_4':
                    types.RaggedTensorValue(
                        values=np.array([3, 1, 4, 1, 5, 9, 2, 6]),
                        nested_row_splits=[np.array([0, 4, 4, 7, 8, 8])]),
                    'feature_5':
                    types.SparseTensorValue(values=np.array([1]),
                                            indices=np.array([[0, 1]]),
                                            dense_shape=np.array([1, 3])),
                    'feature_6':
                    np.array([]),
                },
                'labels':
                np.array([1.0]),
                'example_weights':
                np.array(0.0),
                'predictions': {
                    'model1': np.array([0.1, 0.2]),
                    'model2': np.array([0.1, 0.2])
                },
                '_slice_key_types':
                slicer_lib.slice_keys_to_numpy_array([('gender', 'm'), ()])
            },
            {
                'features': {
                    'feature_1':
                    np.array([3.0, 4.0]),
                    'feature_2':
                    np.array([3.0, 4.0]),
                    'feature_3':
                    types.SparseTensorValue(values=np.array([2]),
                                            indices=np.array([[0, 2]]),
                                            dense_shape=np.array([1, 3])),
                    'feature_4':
                    types.RaggedTensorValue(
                        values=np.array([3, 1, 4, 1, 5, 9, 2, 6]),
                        nested_row_splits=[np.array([0, 4, 4, 7, 8, 8])]),
                    'feature_5':
                    types.SparseTensorValue(values=np.array([2]),
                                            indices=np.array([[0, 2]]),
                                            dense_shape=np.array([1, 4])),
                    'feature_6':
                    np.array([]),
                },
                'labels': np.array([0.0]),
                'example_weights': np.array(0.5),
                'predictions': {
                    'model1': np.array([0.3, 0.4]),
                    'model2': np.array([0.3, 0.4])
                },
                '_slice_key_types': slicer_lib.slice_keys_to_numpy_array([()])
            },
            {
                'features': {
                    'feature_1':
                    np.array([5.0, 6.0]),
                    'feature_2':
                    np.array([5.0, 6.0]),
                    'feature_3':
                    types.SparseTensorValue(values=np.array([3]),
                                            indices=np.array([[0, 0]]),
                                            dense_shape=np.array([1, 3])),
                    'feature_4':
                    types.RaggedTensorValue(
                        values=np.array([3, 1, 4, 1, 5, 9, 2, 6]),
                        nested_row_splits=[np.array([0, 4, 4, 7, 8, 8])]),
                    'feature_5':
                    types.SparseTensorValue(values=np.array([3]),
                                            indices=np.array([[0, 3]]),
                                            dense_shape=np.array([1, 5])),
                    'feature_6':
                    np.array([2.0, 3.0]),
                },
                'labels': np.array([1.0]),
                'example_weights': np.array(1.0),
                'predictions': {
                    'model1': np.array([0.5, 0.6]),
                    'model2': np.array([0.5, 0.6])
                },
                '_slice_key_types': slicer_lib.slice_keys_to_numpy_array([()])
            },
        ]

        expected = {
            'features': {
                'feature_1':
                np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]),
                'feature_2':
                np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]),
                'feature_3':
                types.SparseTensorValue(values=np.array([1, 2, 3]),
                                        indices=np.array([[0, 0, 1], [1, 0, 2],
                                                          [2, 0, 0]]),
                                        dense_shape=np.array([3, 1, 3])),
                'feature_4':
                types.RaggedTensorValue(values=np.array([
                    3, 1, 4, 1, 5, 9, 2, 6, 3, 1, 4, 1, 5, 9, 2, 6, 3, 1, 4, 1,
                    5, 9, 2, 6
                ]),
                                        nested_row_splits=[
                                            np.array([0, 5, 10, 15]),
                                            np.array([
                                                0, 4, 4, 7, 8, 8, 12, 12, 15,
                                                16, 16, 20, 20, 23, 24, 24
                                            ])
                                        ]),
                'feature_5':
                types.SparseTensorValue(values=np.array([1, 2, 3]),
                                        indices=np.array([[0, 0, 1], [1, 0, 2],
                                                          [2, 0, 3]]),
                                        dense_shape=np.array([3, 1, 5])),
                'feature_6':
                types.VarLenTensorValue(values=np.array([2.0, 3.0]),
                                        indices=np.array([[2, 0], [2, 1]]),
                                        dense_shape=np.array([3, 2]))
            },
            'labels':
            np.array([1.0, 0.0, 1.0]),
            'example_weights':
            np.array([0.0, 0.5, 1.0]),
            'predictions': {
                'model1': np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]),
                'model2': np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
            },
            '_slice_key_types':
            types.VarLenTensorValue(
                values=slicer_lib.slice_keys_to_numpy_array([('gender', 'm'),
                                                             (), (), ()]),
                indices=np.array([[0, 0], [0, 1], [1, 0], [2, 0]]),
                dense_shape=np.array([3, 2]))
        }
        np.testing.assert_equal(util.merge_extracts(extracts), expected)
Esempio n. 7
0
    def testQueryStatistics(self):
        metrics = query_statistics.QueryStatistics().computations(
            query_key='query')[0]

        query1_example1 = {
            'labels': np.array([1.0]),
            'predictions': np.array([0.2]),
            'example_weights': np.array([1.0]),
            'features': {
                'query': np.array(['query1']),
                'gain': np.array([1.0])
            }
        }
        query1_example2 = {
            'labels': np.array([0.0]),
            'predictions': np.array([0.8]),
            'example_weights': np.array([1.0]),
            'features': {
                'query': np.array(['query1']),
                'gain': np.array([0.5])
            }
        }
        query2_example1 = {
            'labels': np.array([0.0]),
            'predictions': np.array([0.5]),
            'example_weights': np.array([2.0]),
            'features': {
                'query': np.array(['query2']),
                'gain': np.array([0.5])
            }
        }
        query2_example2 = {
            'labels': np.array([1.0]),
            'predictions': np.array([0.9]),
            'example_weights': np.array([2.0]),
            'features': {
                'query': np.array(['query2']),
                'gain': np.array([1.0])
            }
        }
        query2_example3 = {
            'labels': np.array([0.0]),
            'predictions': np.array([0.1]),
            'example_weights': np.array([2.0]),
            'features': {
                'query': np.array(['query2']),
                'gain': np.array([0.1])
            }
        }
        query3_example1 = {
            'labels': np.array([1.0]),
            'predictions': np.array([0.9]),
            'example_weights': np.array([3.0]),
            'features': {
                'query': np.array(['query3']),
                'gain': np.array([1.0])
            }
        }
        examples = [
            tfma_util.merge_extracts([query1_example1, query1_example2]),
            tfma_util.merge_extracts(
                [query2_example1, query2_example2, query2_example3]),
            tfma_util.merge_extracts([query3_example1])
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (pipeline
                      | 'Create' >> beam.Create(examples)
                      | 'Process' >> beam.Map(
                          metric_util.to_standard_metric_inputs, True)
                      | 'AddSlice' >> beam.Map(lambda x: ((), x))
                      | 'Combine' >> beam.CombinePerKey(metrics.combiner))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    total_queries_key = metric_types.MetricKey(
                        name='total_queries')
                    total_documents_key = metric_types.MetricKey(
                        name='total_documents')
                    min_documents_key = metric_types.MetricKey(
                        name='min_documents')
                    max_documents_key = metric_types.MetricKey(
                        name='max_documents')
                    self.assertDictElementsAlmostEqual(got_metrics, {
                        total_queries_key: 3,
                        total_documents_key: 6,
                        min_documents_key: 1,
                        max_documents_key: 3
                    },
                                                       places=5)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Esempio n. 8
0
    def testNDCG(self):
        # SubKeys will be a merger of top_k_list and sub_keys.
        metric = ndcg.NDCG(gain_key='gain', top_k_list=[1, 2]).computations(
            sub_keys=[
                None,
                metric_types.SubKey(top_k=1),
                metric_types.SubKey(top_k=2)
            ],
            query_key='query',
            example_weighted=True)[0]

        query1_example1 = {
            'labels': np.array([1.0]),
            'predictions': np.array([0.2]),
            'example_weights': np.array([1.0]),
            'features': {
                'query': np.array(['query1']),
                'gain': np.array([1.0])
            }
        }
        query1_example2 = {
            'labels': np.array([0.0]),
            'predictions': np.array([0.8]),
            'example_weights': np.array([1.0]),
            'features': {
                'query': np.array(['query1']),
                'gain': np.array([0.5])
            }
        }
        query2_example1 = {
            'labels': np.array([0.0]),
            'predictions': np.array([0.5]),
            'example_weights': np.array([2.0]),
            'features': {
                'query': np.array(['query2']),
                'gain': np.array([0.5])
            }
        }
        query2_example2 = {
            'labels': np.array([1.0]),
            'predictions': np.array([0.9]),
            'example_weights': np.array([2.0]),
            'features': {
                'query': np.array(['query2']),
                'gain': np.array([1.0])
            }
        }
        query2_example3 = {
            'labels': np.array([0.0]),
            'predictions': np.array([0.1]),
            'example_weights': np.array([2.0]),
            'features': {
                'query': np.array(['query2']),
                'gain': np.array([0.1])
            }
        }
        query3_example1 = {
            'labels': np.array([1.0]),
            'predictions': np.array([0.9]),
            'example_weights': np.array([3.0]),
            'features': {
                'query': np.array(['query3']),
                'gain': np.array([1.0])
            }
        }
        query4_example1 = {
            'labels': np.array([1.0]),
            'predictions': np.array([0.9]),
            'example_weights': np.array([3.0]),
            'features': {
                'query': np.array(['query4']),
                'gain': np.array([0.0])  # 0 gain is ignored
            }
        }
        examples = [
            tfma_util.merge_extracts([query1_example1, query1_example2]),
            tfma_util.merge_extracts(
                [query2_example1, query2_example2, query2_example3]),
            tfma_util.merge_extracts([query3_example1]),
            tfma_util.merge_extracts([query4_example1])
        ]

        with beam.Pipeline() as pipeline:
            # pylint: disable=no-value-for-parameter
            result = (pipeline
                      | 'Create' >> beam.Create(examples)
                      | 'Process' >> beam.Map(
                          metric_util.to_standard_metric_inputs, True)
                      | 'AddSlice' >> beam.Map(lambda x: ((), x))
                      | 'Combine' >> beam.CombinePerKey(metric.combiner))

            # pylint: enable=no-value-for-parameter

            def check_result(got):
                try:
                    self.assertLen(got, 1)
                    got_slice_key, got_metrics = got[0]
                    self.assertEqual(got_slice_key, ())
                    ndcg1_key = metric_types.MetricKey(
                        name='ndcg',
                        sub_key=metric_types.SubKey(top_k=1),
                        example_weighted=True)
                    ndcg2_key = metric_types.MetricKey(
                        name='ndcg',
                        sub_key=metric_types.SubKey(top_k=2),
                        example_weighted=True)
                    # Query1 (weight=1): (p=0.8, g=0.5) (p=0.2, g=1.0)
                    # Query2 (weight=2): (p=0.9, g=1.0) (p=0.5, g=0.5) (p=0.1, g=0.1)
                    # Query3 (weight=3): (p=0.9, g=1.0)
                    #
                    # DCG@1:  0.5, 1.0, 1.0
                    # NDCG@1: 0.5, 1.0, 1.0
                    # Average NDCG@1: (1 * 0.5 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.92
                    #
                    # DCG@2: (0.5 + 1.0/log(3), (1.0 + 0.5/log(3), (1.0)
                    # NDCG@2: (0.5 + 1.0/log(3)) / (1.0 + 0.5/log(3)),
                    #         (1.0 + 0.5/log(3)) / (1.0 + 0.5/log(3)),
                    #         1.0
                    # Average NDCG@2: (1 * 0.860 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.97
                    self.assertDictElementsAlmostEqual(got_metrics, {
                        ndcg1_key: 0.9166667,
                        ndcg2_key: 0.9766198
                    },
                                                       places=5)

                except AssertionError as err:
                    raise util.BeamAssertException(err)

            util.assert_that(result, check_result, label='result')
Esempio n. 9
0
    def testMergeExtracts(self):
        extracts = [
            {
                'features': {
                    'feature_1':
                    np.array([1.0, 2.0]),
                    'feature_2':
                    np.array([1.0, 2.0]),
                    'feature_3':
                    types.SparseTensorValue(values=np.array([1]),
                                            indices=np.array([[0, 1]]),
                                            dense_shape=np.array([1, 3])),
                    'feature_4':
                    types.RaggedTensorValue(
                        values=np.array([3, 1, 4, 1, 5, 9, 2, 6]),
                        nested_row_splits=[np.array([0, 4, 4, 7, 8, 8])])
                },
                'labels': np.array([1.0]),
                'example_weights': np.array(0.0),
                'predictions': {
                    'model1': np.array([0.1, 0.2]),
                    'model2': np.array([0.1, 0.2])
                },
                '_slice_key_types': [()]
            },
            {
                'features': {
                    'feature_1':
                    np.array([3.0, 4.0]),
                    'feature_2':
                    np.array([3.0, 4.0]),
                    'feature_3':
                    types.SparseTensorValue(values=np.array([2]),
                                            indices=np.array([[0, 2]]),
                                            dense_shape=np.array([1, 3])),
                    'feature_4':
                    types.RaggedTensorValue(
                        values=np.array([3, 1, 4, 1, 5, 9, 2, 6]),
                        nested_row_splits=[np.array([0, 4, 4, 7, 8, 8])])
                },
                'labels': np.array([0.0]),
                'example_weights': np.array(0.5),
                'predictions': {
                    'model1': np.array([0.3, 0.4]),
                    'model2': np.array([0.3, 0.4])
                },
                '_slice_key_types': [()]
            },
            {
                'features': {
                    'feature_1':
                    np.array([5.0, 6.0]),
                    'feature_2':
                    np.array([5.0, 6.0]),
                    'feature_3':
                    types.SparseTensorValue(values=np.array([3]),
                                            indices=np.array([[0, 0]]),
                                            dense_shape=np.array([1, 3])),
                    'feature_4':
                    types.RaggedTensorValue(
                        values=np.array([3, 1, 4, 1, 5, 9, 2, 6]),
                        nested_row_splits=[np.array([0, 4, 4, 7, 8, 8])])
                },
                'labels': np.array([1.0]),
                'example_weights': np.array(1.0),
                'predictions': {
                    'model1': np.array([0.5, 0.6]),
                    'model2': np.array([0.5, 0.6])
                },
                '_slice_key_types': [()]
            },
        ]

        expected = {
            'features': {
                'feature_1':
                np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]),
                'feature_2':
                np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]),
                'feature_3':
                types.SparseTensorValue(values=np.array([1, 2, 3]),
                                        indices=np.array([[0, 0, 1], [1, 0, 2],
                                                          [2, 0, 0]]),
                                        dense_shape=np.array([3, 1, 3])),
                'feature_4':
                types.RaggedTensorValue(values=np.array([
                    3, 1, 4, 1, 5, 9, 2, 6, 3, 1, 4, 1, 5, 9, 2, 6, 3, 1, 4, 1,
                    5, 9, 2, 6
                ]),
                                        nested_row_splits=[
                                            np.array([0, 5, 10, 15]),
                                            np.array([
                                                0, 4, 4, 7, 8, 8, 12, 12, 15,
                                                16, 16, 20, 20, 23, 24, 24
                                            ])
                                        ])
            },
            'labels': np.array([1.0, 0.0, 1.0]),
            'example_weights': np.array([0.0, 0.5, 1.0]),
            'predictions': {
                'model1': np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]),
                'model2': np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]])
            },
            '_slice_key_types': np.array([(), (), ()])
        }

        self.assertAllClose(util.merge_extracts(extracts), expected)