예제 #1
0
 def testGetByKeysWithPrefix(self):
     self.assertEqual({
         'all_classes': ['a', 'b'],
         'probabilities': [1]
     },
                      util.get_by_keys(
                          {
                              'predictions': {
                                  'output/all_classes': ['a', 'b'],
                                  'output/probabilities': [1],
                              },
                          }, ['predictions', 'output']))
     self.assertEqual({
         'all_classes': ['a', 'b'],
         'probabilities': [1]
     },
                      util.get_by_keys(
                          {
                              'predictions': {
                                  'model': {
                                      'output/all_classes': ['a', 'b'],
                                      'output/probabilities': [1],
                                  },
                              },
                          }, ['predictions', 'model', 'output']))
예제 #2
0
    def testGetByKeysWitMultiLevel(self):
        self.assertEqual([1],
                         util.get_by_keys({'predictions': {
                             'output': [1]
                         }}, ['predictions', 'output']))

        self.assertEqual([1],
                         util.get_by_keys(
                             {'predictions': {
                                 'model': {
                                     'output': [1],
                                 },
                             }}, ['predictions', 'model', 'output']))
예제 #3
0
 def _to_gains_example_weight(
     self,
     element: metric_types.StandardMetricInputs) -> Tuple[np.ndarray, float]:
   """Returns gains and example_weight sorted by prediction."""
   _, predictions, example_weight = next(
       metric_util.to_label_prediction_example_weight(
           element,
           eval_config=self._eval_config,
           model_name=self._model_name,
           output_name=self._output_name,
           example_weighted=self._example_weighted,
           flatten=False,
           require_single_example_weight=True))  # pytype: disable=wrong-arg-types
   gains = util.get_by_keys(element.features, [self._gain_key])
   if gains.size != predictions.size:
     raise ValueError('expected {} to be same size as predictions {} != {}: '
                      'gains={}, metric_keys={}, '
                      'StandardMetricInputs={}'.format(self._gain_key,
                                                       gains.size,
                                                       predictions.size, gains,
                                                       self._metric_keys,
                                                       element))
   gains = gains.reshape(predictions.shape)
   # Ignore non-positive gains.
   if gains.max() <= 0:
     example_weight = 0.0
   return (gains[np.argsort(predictions)[::-1]], float(example_weight))
예제 #4
0
 def add_input(
     self, accumulator: _MinLabelPositionAccumulator,
     element: metric_types.StandardMetricInputs
 ) -> _MinLabelPositionAccumulator:
     labels, predictions, example_weight = next(
         metric_util.to_label_prediction_example_weight(
             element,
             eval_config=self._eval_config,
             model_name=self._key.model_name,
             output_name=self._key.output_name,
             example_weighted=self._example_weighted,
             flatten=False,
             allow_none=True,
             require_single_example_weight=True))  # pytype: disable=wrong-arg-types
     if self._label_key:
         labels = util.get_by_keys(element.features, [self._label_key])
     if labels is not None:
         min_label_pos = None
         for i, l in enumerate(labels[np.argsort(predictions)[::-1]]):
             if np.sum(l) > 0:
                 min_label_pos = i + 1  # Use 1-indexed positions
                 break
         if min_label_pos:
             accumulator.total_min_position += min_label_pos * float(
                 example_weight)
             accumulator.total_weighted_examples += float(example_weight)
     return accumulator
예제 #5
0
 def _query(
     self,
     element: metric_types.StandardMetricInputs) -> Union[float, int, str]:
   query = util.get_by_keys(element.features, [self._query_key]).flatten()
   if query.size == 0 or not np.all(query == query[0]):
     raise ValueError(
         'missing query value or not all values are the same: value={}, '
         'metric_keys={}, StandardMetricInputs={}'.format(
             query, self._metric_keys, element))
   return query[0]
예제 #6
0
 def key_by_query_key(extracts: types.Extracts,
                      query_key: str) -> Tuple[str, types.Extracts]:
     """Extract the query key from the extract and key by that."""
     value = metric_util.to_scalar(util.get_by_keys(
         extracts, [constants.FEATURES_KEY, query_key], optional=True),
                                   tensor_name=query_key)
     if value is None:
         missing_query_key_counter.inc()
         return ('', extracts)
     return ('{}'.format(value), extracts)
예제 #7
0
 def add_input(self, heap: _HeapType,
               element: metric_types.StandardMetricInputs) -> _HeapType:
     # TODO(b/206546545): add support for sampling derived features
     sampled_value = util.get_by_keys(element.features, [self._sampled_key])
     random_tag = self._random_generator.random()
     if self._example_weighted:
         # For details, see Weighted Random Sampling over Data Streams:
         # https://arxiv.org/abs/1012.0256
         weight = element.example_weight
         random_tag = random_tag**(1 / weight)
     return super().add_input(heap, (random_tag, sampled_value))
예제 #8
0
 def add_input(
         self, accumulator: Dict[str, List[float]],
         extracts: metric_types.StandardMetricInputs
 ) -> Dict[str, List[float]]:
     if constants.ATTRIBUTIONS_KEY not in extracts:
         raise ValueError(
             '{} missing from extracts {}\n\n. An attribution extractor is '
             'required to use attribution metrics'.format(
                 constants.ATTRIBUTIONS_KEY, extracts))
     attributions = extracts[constants.ATTRIBUTIONS_KEY]
     if self._key.model_name:
         attributions = util.get_by_keys(attributions,
                                         [self._key.model_name])
     if self._key.output_name:
         attributions = util.get_by_keys(attributions,
                                         [self._key.output_name])
     _, _, example_weight = next(
         metric_util.to_label_prediction_example_weight(
             extracts,
             eval_config=self._eval_config,
             model_name=self._key.model_name,
             output_name=self._key.output_name,
             sub_key=self._key.sub_key,
             example_weighted=self._key.example_weighted,
             allow_none=True,
             flatten=False))
     example_weight = float(example_weight)
     for k, v in attributions.items():
         v = util.to_numpy(v)
         if self._key.sub_key is not None:
             if self._key.sub_key.class_id is not None:
                 v = _scores_by_class_id(self._key.sub_key.class_id, v)
             elif self._key.sub_key.k is not None:
                 v = _scores_by_top_k(self._key.sub_key.k, v)
                 v = np.array(v[self._key.sub_key.k - 1])
             elif self._key.sub_key.top_k is not None:
                 v = _scores_by_top_k(self._key.sub_key.top_k, v)
         if k not in accumulator:
             accumulator[k] = [0.0] * v.size
         self._sum(accumulator[k], v * example_weight)
     return accumulator
예제 #9
0
 def add_input(self, accumulator: float,
               element: metric_types.StandardMetricInputs) -> float:
     if not self._example_weighted or element.example_weight is None:
         example_weight = np.array(1.0)
     else:
         example_weight = element.example_weight
     if isinstance(example_weight, dict) and self._model_name:
         value = util.get_by_keys(example_weight, [self._model_name],
                                  optional=True)
         if value is not None:
             example_weight = value
     if isinstance(example_weight, dict) and self._output_name:
         example_weight = util.get_by_keys(example_weight,
                                           [self._output_name],
                                           np.array(1.0))
     if isinstance(example_weight, dict):
         raise ValueError(
             f'example_count cannot be calculated on a dict {example_weight}: '
             f'model_name={self._model_name}, output_name={self._output_name}.\n\n'
             'This is most likely a configuration error (for multi-output models'
             'a separate metric is needed for each output).')
     return accumulator + np.sum(example_weight)
예제 #10
0
 def add_input(self, accumulator: Dict[Text, List[float]],
               attributions: Dict[Text, Any]) -> Dict[Text, List[float]]:
     if self._key.model_name:
         attributions = util.get_by_keys(attributions,
                                         [self._key.model_name])
     if self._key.output_name:
         attributions = util.get_by_keys(attributions,
                                         [self._key.output_name])
     for k, v in attributions.items():
         v = util.to_numpy(v)
         if self._key.sub_key is not None:
             if self._key.sub_key.class_id is not None:
                 v = _scores_by_class_id(self._key.sub_key.class_id, v)
             elif self._key.sub_key.k is not None:
                 v = _scores_by_top_k(self._key.sub_key.k, v)
                 v = np.array(v[self._key.sub_key.k - 1])
             elif self._key.sub_key.top_k is not None:
                 v = _scores_by_top_k(self._key.sub_key.top_k, v)
         if k not in accumulator:
             accumulator[k] = [0.0] * v.size
         self._sum(accumulator[k], v)
     return accumulator
예제 #11
0
 def testGetByKeysMissingSecondaryKey(self):
     with self.assertRaisesRegex(ValueError, 'not found'):
         util.get_by_keys({'predictions': {
             'missing': [1]
         }}, ['predictions', 'output'])
예제 #12
0
 def testGetByKeysMissingAndNonOptional(self):
     with self.assertRaisesRegex(ValueError, 'not found'):
         util.get_by_keys({}, ['labels'])
     with self.assertRaisesRegex(ValueError, 'not found'):
         util.get_by_keys({'labels': {}}, ['labels'])
예제 #13
0
 def testGetByKeysMissingAndOptional(self):
     self.assertIsNone(util.get_by_keys({}, ['labels'], optional=True))
     self.assertIsNone(
         util.get_by_keys({'labels': {}}, ['labels'], optional=True))
예제 #14
0
 def testGetByKeysMissingAndDefault(self):
     self.assertEqual('a',
                      util.get_by_keys({}, ['labels'], default_value='a'))
     self.assertEqual(
         'a', util.get_by_keys({'labels': {}}, ['labels'],
                               default_value='a'))
예제 #15
0
 def testGetByKeys(self):
     self.assertEqual([1], util.get_by_keys({'labels': [1]}, ['labels']))
예제 #16
0
def to_label_prediction_example_weight(
    inputs: metric_types.StandardMetricInputs,
    eval_config: Optional[config_pb2.EvalConfig] = None,
    model_name: Text = '',
    output_name: Text = '',
    sub_key: Optional[metric_types.SubKey] = None,
    aggregation_type: Optional[metric_types.AggregationType] = None,
    class_weights: Optional[Dict[int, float]] = None,
    fractional_labels: bool = False,
    flatten: bool = True,
    squeeze: bool = True,
    allow_none: bool = False,
    require_single_example_weight: bool = False
) -> Iterator[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
    """Yields label, prediction, and example weights for use in calculations.

  Where applicable this function will perform model and output name lookups as
  well as any required class ID, top K, etc conversions. It will also apply
  prediction keys and label vocabularies given the necessary information is
  provided as part of the EvalConfig (or standard estimator based naming is
  used). The sparseness of labels will be inferred from the shapes of the labels
  and predictions (i.e. if the shapes are different then the labels will be
  assumed to be sparse).

  If successful, the final output of calling this function will be a tuple of
  numpy arrays representing the label, prediction, and example weight
  respectively. Labels and predictions will be returned in the same shape
  provided (default behavior) unless (1) flatten is True in which case a series
  of values (one per class ID) will be returned with last dimension of size 1 or
  (2) a sub_key is used in which case the last dimension may be re-shaped to
  match the new number of outputs (1 for class_id or k, top_k for top k with
  aggregation).

  Note that for top_k without aggregation, the non-top_k prediction values will
  be set to float('-inf'), but for top_k with aggregation the values will be
  truncated to only return the top k values.

  Examples:

    # default behavior
    #
    # Binary classification
    Input  : labels=[1] predictions=[0.6]
    Output : (np.array([1]), np.array([0.6]), np.array([1.0]))
    # Multi-class classification w/ sparse labels
    Input : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output: (np.array([2]), np.array([0.3, 0.6, 0.1]), np.array([1.0]))
    # Multi-class / multi-label classification w/ dense labels
    Input  : labels=[0, 1, 1] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0, 1, 1]), np.array([0.3, 0.6, 0.1]), np.array([1.0]))

    # flatten=True
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2], predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0]), np.array([0.3]), np.array([1.0])),
             (np.array([0]), np.array([0.6]), np.array([1.0])),
             (np.array([1]), np.array([0.1]), np.array([1.0]))
    # Multi-class/multi-label classification w/ dense labels
    Input  : labels=[0, 0, 1], predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0]), np.array([0.3]), np.array([1.0])),
             (np.array([0]), np.array([0.6]), np.array([1.0])),
             (np.array([1]), np.array([0.1]), np.array([1.0]))

    # sub_key.class_id=[2]
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([1]), np.array([0.1]), np.array([1.0]))
    # Multi-class classification w/ dense labels
    Input  : labels=[0, 0, 1] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([1]), np.array([0.1]), np.array([1.0]))

    # sub_key.top_k=2 and aggregation_type is None (i.e. binarization of top 2).
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0, 0, 1]), np.array([0.3, 0.6, -inf]), np.array([1.0]))
    # Multi-class classification w/ dense labels
    Input  : labels=[0, 0, 1] predictions=[0.3, 0.1, 0.6]
    Output : (np.array([0, 0, 1]), np.array([0.3, -inf, 0.6]), np.array([1.0]))

    # sub_key.top_k=2 and aggregation_type is not None (i.e. aggregate top 2).
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[2] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([0, 1]), np.array([0.3, 0.6]), np.array([1.0]))
    # Multi-class classification w/ dense labels
    Input  : labels=[0, 0, 1] predictions=[0.3, 0.1, 0.6]
    Output : (np.array([0, 0]), np.array([0.3, 0.6]), np.array([1.0]))

    # sub_key.k=2 (i.e. binarization by choosing 2nd largest predicted value).
    #
    # Multi-class classification w/ sparse labels
    Input  : labels=[0] predictions=[0.3, 0.6, 0.1]
    Output : (np.array([1]), np.array([0.3]), np.array([1.0]))
    # Multi-class classification w/ dense labels
    Input  : labels=[0] predictions=[0.3]
    Output : (np.array([0]), np.array([0.3]), np.array([1.0]))

  Args:
    inputs: Standard metric inputs.
    eval_config: Eval config
    model_name: Optional model name (if multi-model evaluation).
    output_name: Optional output name (if multi-output model type).
    sub_key: Optional sub key.
    aggregation_type: Optional aggregation type.
    class_weights: Optional class weights to apply to multi-class / multi-label
      labels and predictions. If used, flatten must also be True.
    fractional_labels: If true, each incoming tuple of (label, prediction, and
      example weight) will be split into two tuples as follows (where l, p, w
      represent the resulting label, prediction, and example weight values):
        (1) l = 0.0, p = prediction, and w = example_weight * (1.0 - label)
        (2) l = 1.0, p = prediction, and w = example_weight * label
      If enabled, an exception will be raised if labels are not within [0, 1].
      The implementation is such that tuples associated with a weight of zero
      are not yielded. This means it is safe to enable fractional_labels even
      when the labels only take on the values of 0.0 or 1.0.
    flatten: True to flatten the final label and prediction outputs so that the
      yielded values are always arrays of size 1. For example, multi-class /
      multi-label outputs would be converted into label and prediction pairs
      that could then be processed by a binary classification metric in order to
      compute a micro average over all classes. If the example weight is not a
      scalar, then they will be flattened as well, otherwise the same example
      weight value will be output for each pair of labels and predictions.
    squeeze: True to squeeze any outputs that have rank > 1. This transforms
      outputs such as np.array([[1]]) to np.array([1]).
    allow_none: True to allow labels or predictions with None values to be
      returned. When used, the values will be returned as empty np.ndarrays. The
      example weight will always be non-empty.
    require_single_example_weight: True to require that the example_weight be a
      single value.

  Yields:
    Tuple of (label, prediction, example_weight).
  """
    def fn_call_str():
        return (f'to_label_prediction_example_weight(inputs={inputs}, '
                f'eval_config={eval_config}, model_name={model_name}, '
                f'output_name={output_name}, sub_key={sub_key}, '
                f'aggregation_type={aggregation_type}, '
                f'class_weights={class_weights}, '
                f'fractional_labels={fractional_labels}, flatten={flatten}, '
                f'squeeze={squeeze}, allow_none={allow_none})')

    def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any:
        if isinstance(value, Mapping):
            new_value = util.get_by_keys(value, keys, optional=True)
            if new_value is not None:
                return new_value
        return value

    try:
        prediction_key = ''
        label_key = ''
        if eval_config and eval_config.model_specs:
            for spec in eval_config.model_specs:
                # To maintain consistency between settings where single models are used,
                # always use '' as the model name regardless of whether a name is passed
                spec_name = spec.name if len(
                    eval_config.model_specs) > 1 else ''
                if spec_name == model_name:
                    prediction_key = spec.prediction_key
                    label_key = spec.label_key
                    break

        label = inputs.label
        if label_key:
            # This is to support a custom EvalSavedModel where the labels are a dict
            # but the keys are not output_names.
            label = optionally_get_by_keys(label, [label_key])
        prediction = inputs.prediction
        example_weight = inputs.example_weight
        if example_weight is None:
            example_weight = np.array(
                1.0, dtype=np.float32)  # tf-ranking needs float32
        if model_name:
            prediction = util.get_by_keys(prediction, [model_name])
            # Labels and weights can optionally be keyed by model name.
            label = optionally_get_by_keys(label, [model_name])
            example_weight = optionally_get_by_keys(example_weight,
                                                    [model_name])
        if output_name:
            prediction = util.get_by_keys(prediction, [output_name])
            # Labels and example weights can optionally be keyed by output name.
            label = optionally_get_by_keys(label, [output_name])
            example_weight = optionally_get_by_keys(example_weight,
                                                    [output_name])

        if isinstance(label, Mapping):
            raise ValueError(
                'unable to prepare label for metric computation because the label is '
                'a dict with unrecognized keys. If a multi-output model was used '
                f'check that an output name was provided in all the relevant '
                'settings (ModelSpec.label_keys, MetricsSpec.output_names, etc): '
                f'label={label}, output_name={output_name}')
        if isinstance(example_weight, Mapping):
            raise ValueError(
                'unable to prepare example_weight for metric computation because the '
                'example_weight is a dict with unrecognized keys. If a multi-output '
                'model was used check that an output name was provided in all the '
                'relevant settings (ModelSpec.example_weight_keys, '
                f'MetricsSpec.output_names, etc): example_weight={example_weight}, '
                f'output_name={output_name}')

        label, prediction = prepare_labels_and_predictions(
            label, prediction, prediction_key)

        if not allow_none:
            for txt, value in zip(('label', 'prediction'),
                                  (label, prediction)):
                if value is None:
                    raise ValueError(
                        f'no value provided for {txt}\n\n'
                        'This may be caused by a configuration error (i.e. label, '
                        'and/or prediction keys were not specified) or an '
                        'error in the pipeline.')

        example_weight = util.to_numpy(example_weight)
        if require_single_example_weight and example_weight.size > 1:
            example_weight = example_weight.flatten()
            if not np.all(example_weight == example_weight[0]):
                raise ValueError(
                    'if example_weight size > 0, the values must all be the same: '
                    f'example_weight={example_weight}\n\n'
                    'This is most likely a configuration error.')
            example_weight = np.array(example_weight[0])

        if sub_key is not None:
            if sub_key.class_id is not None:
                label, prediction = select_class_id(sub_key.class_id, label,
                                                    prediction)
            elif sub_key.k is not None:
                indices = top_k_indices(sub_key.k, prediction)
                if len(prediction.shape) == 1:
                    indices = indices[0]  # 1D
                else:
                    # 2D, take kth values
                    indices = (indices[0][0::sub_key.k],
                               indices[1][0::sub_key.k])
                if label.shape != prediction.shape:
                    label = one_hot(label, prediction)
                label = select_indices(label, indices)
                prediction = select_indices(prediction, indices)
            elif sub_key.top_k is not None:
                # Set all non-top-k predictions to -inf. Note that we do not sort.
                indices = top_k_indices(sub_key.top_k, prediction)
                if aggregation_type is None:
                    top_k_predictions = np.full(prediction.shape,
                                                float('-inf'))
                    top_k_predictions[indices] = prediction[indices]
                    prediction = top_k_predictions
                else:
                    if label.shape != prediction.shape:
                        label = one_hot(label, prediction)
                    label = select_indices(label, indices)
                    prediction = select_indices(prediction, indices)

        # For consistency, make sure all outputs are arrays (i.e. convert scalars)
        if label is not None and not label.shape:
            label = label.reshape((1, ))
        if prediction is not None and not prediction.shape:
            prediction = prediction.reshape((1, ))
        if not example_weight.shape:
            example_weight = example_weight.reshape((1, ))

        label = label if label is not None else np.array([])
        prediction = prediction if prediction is not None else np.array([])

        flatten_size = prediction.size or label.size
        if flatten:
            if example_weight.size == 1:
                example_weight = np.array(
                    [float(example_weight) for i in range(flatten_size)])
            elif example_weight.size != flatten_size:
                raise ValueError(
                    'example_weight size does not match the size of labels and '
                    'predictions: label={}, prediction={}, example_weight={}'.
                    format(label, prediction, example_weight))

        if class_weights:
            if not flatten:
                raise ValueError(
                    'class_weights can only be used when flatten is also used: '
                    f'class_weights={class_weights}, flatten={flatten}\n\n'
                    'This is likely caused by a configuration error (i.e. micro '
                    "averaging being applied to metrics that don't support micro "
                    'averaging')
            example_weight = np.array([
                example_weight[i] *
                class_weights[i] if i in class_weights else 0.0
                for i in range(flatten_size)
            ])

        def yield_results(label, prediction, example_weight):
            if (not flatten or (label.size == 0 and prediction.size == 0)
                    or (label.size == 1 and prediction.size == 1
                        and example_weight.size == 1)):
                if squeeze:
                    yield _squeeze(label), _squeeze(prediction), _squeeze(
                        example_weight)
                else:
                    yield label, prediction, example_weight
            elif label.size == 0:
                for p, w in zip(prediction.flatten(),
                                example_weight.flatten()):
                    yield label, np.array([p]), np.array([w])
            elif prediction.size == 0:
                for l, w in zip(label.flatten(), example_weight.flatten()):
                    yield np.array([l]), prediction, np.array([w])
            elif label.size == prediction.size and label.size == example_weight.size:
                for l, p, w in zip(label.flatten(), prediction.flatten(),
                                   example_weight.flatten()):
                    yield np.array([l]), np.array([p]), np.array([w])
            elif label.shape[
                    -1] == 1 and prediction.size == example_weight.size:
                label = one_hot(label, prediction)
                for l, p, w in zip(label.flatten(), prediction.flatten(),
                                   example_weight.flatten()):
                    yield np.array([l]), np.array([p]), np.array([w])
            else:
                raise ValueError(
                    'unable to pair labels, predictions, and example weights: '
                    f'label={label}, prediction={prediction}, '
                    f'example_weight={example_weight}\n\n'
                    'This is most likely a configuration error.')

        for result in yield_results(label, prediction, example_weight):
            if fractional_labels and label.size:
                for new_result in _yield_fractional_labels(*result):
                    yield new_result
            else:
                yield result
    except Exception as e:
        import sys  # pylint: disable=g-import-not-at-top
        raise type(e)(str(e) + f'\n\n{fn_call_str()}').with_traceback(
            sys.exc_info()[2])
예제 #17
0
 def get_by_keys(value: Any, keys: List[str]) -> Any:
     if isinstance(value, dict):
         new_value = util.get_by_keys(value, keys, optional=True)
         if new_value is not None:
             return new_value
     return value
예제 #18
0
 def optionally_get_by_keys(value: Any, keys: List[Text]) -> Any:
     if isinstance(value, Mapping):
         new_value = util.get_by_keys(value, keys, optional=True)
         if new_value is not None:
             return new_value
     return value