def testRaisesErrorWhenExampleWeightsDiffer(self): with self.assertRaises(ValueError): metric = min_label_position.MinLabelPosition().computations( query_key='query')[0] query1_example1 = { 'labels': np.array([0.0]), 'predictions': np.array([0.2]), 'example_weights': np.array([1.0]), 'features': { 'query': np.array(['query1']) } } query1_example2 = { 'labels': np.array([1.0]), 'predictions': np.array([0.8]), 'example_weights': np.array([0.5]), 'features': { 'query': np.array(['query1']) } } with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter _ = (pipeline | 'Create' >> beam.Create([ tfma_util.merge_extracts( [query1_example1, query1_example2]) ]) | 'Process' >> beam.Map( metric_util.to_standard_metric_inputs, True) | 'AddSlice' >> beam.Map(lambda x: ((), x)) | 'Combine' >> beam.CombinePerKey(metric.combiner))
def testMergeExtractsRaisesException(self): extracts = [ { 'features': { 'feature_3': tf.compat.v1.SparseTensorValue(indices=np.array([[0, 1]]), values=np.array([1]), dense_shape=(1, 2)) }, }, { 'features': { 'feature_3': tf.compat.v1.SparseTensorValue(indices=np.array([[0, 2]]), values=np.array([2]), dense_shape=(1, 3)) }, }, ] with self.assertRaisesWithPredicateMatch( RuntimeError, lambda exc: isinstance(exc.__cause__, RuntimeError)): util.merge_extracts(extracts)
def testMinLabelPositionWithNoWeightedExamples(self): metric = min_label_position.MinLabelPosition().computations( query_key='query')[0] query1_example1 = { 'labels': np.array([1.0]), 'predictions': np.array([0.2]), 'example_weights': np.array([0.0]), 'features': { 'query': np.array(['query1']) } } with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = (pipeline | 'Create' >> beam.Create( [tfma_util.merge_extracts([query1_example1])]) | 'Process' >> beam.Map( metric_util.to_standard_metric_inputs, True) | 'AddSlice' >> beam.Map(lambda x: ((), x)) | 'Combine' >> beam.CombinePerKey(metric.combiner)) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) key = metric_types.MetricKey(name='min_label_position') self.assertIn(key, got_metrics) self.assertTrue(math.isnan(got_metrics[key])) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testMergeExtracts(self): extracts = [ { 'features': { 'feature_1': np.array([1.0, 2.0]), 'feature_2': np.array([1.0, 2.0]), 'feature_3': tf.compat.v1.SparseTensorValue(indices=np.array([[0, 1]]), values=np.array([1]), dense_shape=(1, 3)) }, 'labels': np.array([1.0]), 'example_weights': np.array(0.0), 'predictions': { 'model1': np.array([0.1, 0.2]), 'model2': np.array([0.1, 0.2]) }, '_slice_key_types': [()] }, { 'features': { 'feature_1': np.array([3.0, 4.0]), 'feature_2': np.array([3.0, 4.0]), 'feature_3': tf.compat.v1.SparseTensorValue(indices=np.array([[0, 2]]), values=np.array([2]), dense_shape=(1, 3)) }, 'labels': np.array([0.0]), 'example_weights': np.array(0.5), 'predictions': { 'model1': np.array([0.3, 0.4]), 'model2': np.array([0.3, 0.4]) }, '_slice_key_types': [()] }, { 'features': { 'feature_1': np.array([5.0, 6.0]), 'feature_2': np.array([5.0, 6.0]), 'feature_3': tf.compat.v1.SparseTensorValue(indices=np.array([[0, 0]]), values=np.array([3]), dense_shape=(1, 3)) }, 'labels': np.array([1.0]), 'example_weights': np.array(1.0), 'predictions': { 'model1': np.array([0.5, 0.6]), 'model2': np.array([0.5, 0.6]) }, '_slice_key_types': [()] }, ] expected = { 'features': { 'feature_1': np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]), 'feature_2': np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]), 'feature_3': tf.compat.v1.SparseTensorValue(indices=np.array([[0, 1], [1, 2], [2, 0]]), values=np.array([1, 2, 3]), dense_shape=np.array([3, 3])) }, 'labels': np.array([1.0, 0.0, 1.0]), 'example_weights': np.array([0.0, 0.5, 1.0]), 'predictions': { 'model1': np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]), 'model2': np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) }, '_slice_key_types': np.array([(), (), ()]) } self.assertAllClose(expected, util.merge_extracts(extracts))
def testNDCG(self): # SubKeys will be a merger of top_k_list and sub_keys. metric = ndcg.NDCG(gain_key='gain', top_k_list=[1, 2]).computations(sub_keys=[ metric_types.SubKey(top_k=1), metric_types.SubKey(top_k=2) ], query_key='query')[0] query1_example1 = { 'labels': np.array([1.0]), 'predictions': np.array([0.2]), 'example_weights': np.array([1.0]), 'features': { 'query': np.array(['query1']), 'gain': np.array([1.0]) } } query1_example2 = { 'labels': np.array([0.0]), 'predictions': np.array([0.8]), 'example_weights': np.array([1.0]), 'features': { 'query': np.array(['query1']), 'gain': np.array([0.5]) } } query2_example1 = { 'labels': np.array([0.0]), 'predictions': np.array([0.5]), 'example_weights': np.array([2.0]), 'features': { 'query': np.array(['query2']), 'gain': np.array([0.5]) } } query2_example2 = { 'labels': np.array([1.0]), 'predictions': np.array([0.9]), 'example_weights': np.array([2.0]), 'features': { 'query': np.array(['query2']), 'gain': np.array([1.0]) } } query2_example3 = { 'labels': np.array([0.0]), 'predictions': np.array([0.1]), 'example_weights': np.array([2.0]), 'features': { 'query': np.array(['query2']), 'gain': np.array([0.1]) } } query3_example1 = { 'labels': np.array([1.0]), 'predictions': np.array([0.9]), 'example_weights': np.array([3.0]), 'features': { 'query': np.array(['query3']), 'gain': np.array([1.0]) } } examples = [ tfma_util.merge_extracts([query1_example1, query1_example2]), tfma_util.merge_extracts( [query2_example1, query2_example2, query2_example3]), tfma_util.merge_extracts([query3_example1]) ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = (pipeline | 'Create' >> beam.Create(examples) | 'Process' >> beam.Map( metric_util.to_standard_metric_inputs, True) | 'AddSlice' >> beam.Map(lambda x: ((), x)) | 'Combine' >> beam.CombinePerKey(metric.combiner)) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) ndcg1_key = metric_types.MetricKey( name='ndcg', sub_key=metric_types.SubKey(top_k=1)) ndcg2_key = metric_types.MetricKey( name='ndcg', sub_key=metric_types.SubKey(top_k=2)) # Query1 (weight=1): (p=0.8, g=0.5) (p=0.2, g=1.0) # Query2 (weight=2): (p=0.9, g=1.0) (p=0.5, g=0.5) (p=0.1, g=0.1) # Query3 (weight=3): (p=0.9, g=1.0) # # DCG@1: 0.5, 1.0, 1.0 # NDCG@1: 0.5, 1.0, 1.0 # Average NDCG@1: (1 * 0.5 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.92 # # DCG@2: (0.5 + 1.0/log(3), (1.0 + 0.5/log(3), (1.0) # NDCG@2: (0.5 + 1.0/log(3)) / (1.0 + 0.5/log(3)), # (1.0 + 0.5/log(3)) / (1.0 + 0.5/log(3)), # 1.0 # Average NDCG@2: (1 * 0.860 + 2 * 1.0 + 3 * 1.0) / (1 + 2 + 3) ~ 0.97 self.assertDictElementsAlmostEqual(got_metrics, { ndcg1_key: 0.9166667, ndcg2_key: 0.9766198 }, places=5) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testMinLabelPosition(self, label_key): metric = min_label_position.MinLabelPosition( label_key=label_key).computations(query_key='query')[0] query1_example1 = { 'labels': np.array([1.0]), 'predictions': np.array([0.2]), 'example_weights': np.array([1.0]), 'features': { 'custom_label': np.array([0.0]), 'query': np.array(['query1']) } } query1_example2 = { 'labels': np.array([0.0]), 'predictions': np.array([0.8]), 'example_weights': np.array([1.0]), 'features': { 'custom_label': np.array([1.0]), 'query': np.array(['query1']) } } query2_example1 = { 'labels': np.array([1.0]), 'predictions': np.array([0.9]), 'example_weights': np.array([2.0]), 'features': { 'custom_label': np.array([0.0]), 'query': np.array(['query2']) } } query2_example2 = { 'labels': np.array([0.0]), 'predictions': np.array([0.1]), 'example_weights': np.array([2.0]), 'features': { 'custom_label': np.array([1.0]), 'query': np.array(['query2']) } } query2_example3 = { 'labels': np.array([0.0]), 'predictions': np.array([0.5]), 'example_weights': np.array([2.0]), 'features': { 'custom_label': np.array([0.0]), 'query': np.array(['query2']) } } query3_example1 = { 'labels': np.array([1.0]), 'predictions': np.array([0.9]), 'example_weights': np.array([3.0]), 'features': { 'custom_label': np.array([0.0]), 'query': np.array(['query3']) } } examples = [ tfma_util.merge_extracts([query1_example1, query1_example2]), tfma_util.merge_extracts( [query2_example1, query2_example2, query2_example3]), tfma_util.merge_extracts([query3_example1]) ] if label_key: self.assertIsNotNone(metric.preprocessor) with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = ( pipeline | 'Create' >> beam.Create(examples) | 'Process' >> beam.Map(metric_util.to_standard_metric_inputs, include_features=True) | 'AddSlice' >> beam.Map(lambda x: ((), x)) | 'Combine' >> beam.CombinePerKey(metric.combiner)) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) key = metric_types.MetricKey(name='min_label_position') self.assertIn(key, got_metrics) if label_key == 'custom_label': # (1*1.0 + 3*2.0) / (1.0 + 2.0) = 2.333333 self.assertAllClose(got_metrics[key], 2.333333) else: # (2*1.0 + 1*2.0 + 1*3.0) / (1.0 + 2.0 + 3.0) = 1.166666 self.assertAllClose(got_metrics[key], 1.166666) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')
def testQueryStatistics(self): metrics = query_statistics.QueryStatistics().computations( query_key='query')[0] query1_example1 = { 'labels': np.array([1.0]), 'predictions': np.array([0.2]), 'example_weights': np.array([1.0]), 'features': { 'query': np.array(['query1']), 'gain': np.array([1.0]) } } query1_example2 = { 'labels': np.array([0.0]), 'predictions': np.array([0.8]), 'example_weights': np.array([1.0]), 'features': { 'query': np.array(['query1']), 'gain': np.array([0.5]) } } query2_example1 = { 'labels': np.array([0.0]), 'predictions': np.array([0.5]), 'example_weights': np.array([2.0]), 'features': { 'query': np.array(['query2']), 'gain': np.array([0.5]) } } query2_example2 = { 'labels': np.array([1.0]), 'predictions': np.array([0.9]), 'example_weights': np.array([2.0]), 'features': { 'query': np.array(['query2']), 'gain': np.array([1.0]) } } query2_example3 = { 'labels': np.array([0.0]), 'predictions': np.array([0.1]), 'example_weights': np.array([2.0]), 'features': { 'query': np.array(['query2']), 'gain': np.array([0.1]) } } query3_example1 = { 'labels': np.array([1.0]), 'predictions': np.array([0.9]), 'example_weights': np.array([3.0]), 'features': { 'query': np.array(['query3']), 'gain': np.array([1.0]) } } examples = [ tfma_util.merge_extracts([query1_example1, query1_example2]), tfma_util.merge_extracts( [query2_example1, query2_example2, query2_example3]), tfma_util.merge_extracts([query3_example1]) ] with beam.Pipeline() as pipeline: # pylint: disable=no-value-for-parameter result = (pipeline | 'Create' >> beam.Create(examples) | 'Process' >> beam.Map( metric_util.to_standard_metric_inputs, True) | 'AddSlice' >> beam.Map(lambda x: ((), x)) | 'Combine' >> beam.CombinePerKey(metrics.combiner)) # pylint: enable=no-value-for-parameter def check_result(got): try: self.assertLen(got, 1) got_slice_key, got_metrics = got[0] self.assertEqual(got_slice_key, ()) total_queries_key = metric_types.MetricKey( name='total_queries') total_documents_key = metric_types.MetricKey( name='total_documents') min_documents_key = metric_types.MetricKey( name='min_documents') max_documents_key = metric_types.MetricKey( name='max_documents') self.assertDictElementsAlmostEqual(got_metrics, { total_queries_key: 3, total_documents_key: 6, min_documents_key: 1, max_documents_key: 3 }, places=5) except AssertionError as err: raise util.BeamAssertException(err) util.assert_that(result, check_result, label='result')