def testEvaluateGraph(self):
        # Create some FPLs. The Features aren't terribly useful for these metrics.
        # Just make sure they can be processed correctly by the feed/feedlist
        # generation logic by having one dense tensor and one sparse tensor.
        features = {
            'age': {
                encoding.NODE_SUFFIX: np.array([1])
            },
            'language': {
                encoding.NODE_SUFFIX:
                tf.SparseTensorValue(indices=np.array([[0, 0]]),
                                     values=np.array(['english']),
                                     dense_shape=np.array([1, 1]))
            }
        }
        predictions = {'predictions': {encoding.NODE_SUFFIX: np.array([2])}}
        # Have 3 labels of values 3, 23, 16 and predictions of values 2, 2, 2.
        # This should give sum = 48 and mean = 8.
        labels = {'labels': {encoding.NODE_SUFFIX: np.array([3])}}
        labels_2 = {'labels': {encoding.NODE_SUFFIX: np.array([23])}}
        labels_3 = {'labels': {encoding.NODE_SUFFIX: np.array([16])}}

        # Compile the actual FPLs
        fpl = types.FeaturesPredictionsLabels(input_ref=0,
                                              features=features,
                                              predictions=predictions,
                                              labels=labels)
        fpl_2 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_2)
        fpl_3 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_3)

        # Set up a model agnostic config so we can get the FPLConfig.
        feature_map = {
            'age': tf.FixedLenFeature([], tf.float32),
            'language': tf.VarLenFeature(tf.string),
            'predictions': tf.FixedLenFeature([], tf.float32),
            'labels': tf.FixedLenFeature([], tf.float32)
        }

        model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
            label_keys=['labels'],
            prediction_keys=['predictions'],
            feature_spec=feature_map)

        # Create a Model Anostic Evaluate graph handler and feed in the FPL list.
        evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph(
            [add_mean_callback],
            model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig(
                model_agnostic_config))
        evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3])
        outputs = evaluate_graph.get_metric_values()

        # Verify that we got the right metrics out.
        self.assertEqual(2, len(outputs))
        self.assertEqual(outputs['tf_metric_mean'], 8.0)
        self.assertEqual(outputs['py_func_total_label'], 48.0)
    def testEvaluateMultiLabelsPredictions(self):
        # Test case where we have multiple labels/predictions
        features = {'age': {encoding.NODE_SUFFIX: np.array([1])}}
        predictions = {
            'prediction': {
                encoding.NODE_SUFFIX: np.array([2])
            },
            'prediction_2': {
                encoding.NODE_SUFFIX: np.array([4])
            }
        }
        # Have 6 labels of values 3, 5, 23, 12, 16, 31 and
        # 6 predictions of values 2, 2, 2, 4, 4, 4
        # This should give sum = 108 and mean = 9.
        labels = {
            'label': {
                encoding.NODE_SUFFIX: np.array([3])
            },
            'label_2': {
                encoding.NODE_SUFFIX: np.array([5])
            }
        }
        labels_2 = {
            'label': {
                encoding.NODE_SUFFIX: np.array([23])
            },
            'label_2': {
                encoding.NODE_SUFFIX: np.array([12])
            }
        }
        labels_3 = {
            'label': {
                encoding.NODE_SUFFIX: np.array([16])
            },
            'label_2': {
                encoding.NODE_SUFFIX: np.array([31])
            }
        }

        # Compile the actual FPLs
        fpl = types.FeaturesPredictionsLabels(input_ref=0,
                                              features=features,
                                              predictions=predictions,
                                              labels=labels)
        fpl_2 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_2)
        fpl_3 = types.FeaturesPredictionsLabels(input_ref=0,
                                                features=features,
                                                predictions=predictions,
                                                labels=labels_3)

        # Set up a model agnostic config so we can get the FPLConfig.
        feature_map = {
            'age': tf.FixedLenFeature([], tf.float32),
            'prediction': tf.FixedLenFeature([], tf.int64),
            'prediction_2': tf.FixedLenFeature([], tf.int64),
            'label': tf.FixedLenFeature([], tf.int64),
            'label_2': tf.FixedLenFeature([], tf.int64)
        }

        model_agnostic_config = agnostic_predict.ModelAgnosticConfig(
            label_keys=['label', 'label_2'],
            prediction_keys=['prediction', 'prediction_2'],
            feature_spec=feature_map)

        # Create a Model Anostic Evaluate graph handler and feed in the FPL list.
        evaluate_graph = model_agnostic_evaluate_graph.ModelAgnosticEvaluateGraph(
            [add_mean_callback],
            model_agnostic_extractor.ModelAgnosticGetFPLFeedConfig(
                model_agnostic_config))
        evaluate_graph.metrics_reset_update_get_list([fpl, fpl_2, fpl_3])
        outputs = evaluate_graph.get_metric_values()

        # Verify that we got the right metrics out.
        self.assertEqual(2, len(outputs))
        self.assertEqual(outputs['tf_metric_mean'], 9.0)
        self.assertEqual(outputs['py_func_total_label'], 108.0)