def _print_warnings(self, arg_values, result_value):
     if isinstance(result_value, tf.Tensor):
         num_elements = tf_coder_utils.num_tensor_elements(result_value)
     elif isinstance(result_value, tf.SparseTensor):
         num_elements = tf_coder_utils.num_tensor_elements(
             result_value.values)
     else:
         return
     if num_elements > 10 * limits.MAX_TENSOR_ELEMENTS:
         print(
             'Warning: {} produced much-too-large tensor of shape {} and {} '
             'elements.'.format(self.name, result_value.shape.as_list(),
                                num_elements))
         for i, arg_value in enumerate(arg_values):
             if isinstance(arg_value.value, tf.Tensor):
                 print('  argument {} has shape {} and {} elements'.format(
                     i, arg_value.shape, arg_value.num_elements()))
                 if arg_value.num_elements() <= 20:
                     print('  argument {} is: {}'.format(
                         i, arg_value.value))
             elif arg_value.is_primitive:
                 print('  argument {} is: {}'.format(i, arg_value.value))
             else:
                 print('  argument {} has type {}'.format(
                     i, type(arg_value.value)))
             print('  argument {} has reconstruction: {}'.format(
                 i, arg_value.reconstruct_expression()))
def featurize_input_and_output(
    input_value: value_module.Value,
    output_value: value_module.Value) -> Dict[Text, List[float]]:
  """Returns a dict that featurizes relationships between an input and output.

  The feature dict will have the following keys (all start with 'io_'):

   * 'io_comparisons', list[int]: Ints in the range [0, 2] denoting which is
       larger according to various metrics.
   * 'io_counts', list[int]: Numbers of elements with various properties.
   * 'io_count_buckets', list[int]: Like 'io_counts' but bucketed.
   * 'io_fractions', list[float]: Fractions of elements with various properties.
   * 'io_booleans', list[int]: Ints in the range [0, 1] denoting various boolean
       properties.

  Args:
    input_value: The input Value.
    output_value: The output Value.
  """
  input_tensor = _as_tensor_to_featurize(input_value)
  output_tensor = _as_tensor_to_featurize(output_value)

  features = {}  # type: Dict[Text, List[float]]

  input_size = tf_coder_utils.num_tensor_elements(input_tensor)
  output_size = tf_coder_utils.num_tensor_elements(output_tensor)

  input_elements = _elements_list(input_tensor)
  output_elements = _elements_list(output_tensor)
  input_num_elements = len(input_elements)
  output_num_elements = len(output_elements)

  input_rank = len(input_tensor.shape)
  output_rank = len(output_tensor.shape)

  input_shape = _fixed_length_shape(input_tensor)
  output_shape = _fixed_length_shape(output_tensor)

  features['io_comparisons'] = [
      _comparison(input_size, output_size),
      _comparison(input_num_elements, output_num_elements),
      _comparison(input_rank, output_rank),
  ]
  features['io_comparisons'].extend(
      [_comparison(input_length, output_length)
       for input_length, output_length in zip(input_shape, output_shape)])

  # Count elements appearing in both the input and output.
  input_elements_set = set(input_elements)
  output_elements_set = set(output_elements)

  inputs_in_output = sum(e in output_elements_set for e in input_elements)
  outputs_in_input = sum(e in input_elements_set for e in output_elements)
  num_unique_overlaps = len(input_elements_set & output_elements_set)

  features['io_counts'] = [inputs_in_output, outputs_in_input,
                           num_unique_overlaps]
  features['io_count_buckets'] = [_bucket(c, COUNT_BOUNDARIES)
                                  for c in features['io_counts']]
  features['io_fractions'] = [
      _safe_divide(inputs_in_output, input_num_elements),
      _safe_divide(outputs_in_input, output_num_elements),
      _safe_divide(num_unique_overlaps, input_num_elements),
      _safe_divide(num_unique_overlaps, output_num_elements),
  ]

  all_inputs_in_output = int(inputs_in_output == len(input_elements))
  all_outputs_in_input = int(outputs_in_input == len(output_elements))
  features['io_booleans'] = [
      int(input_tensor.shape.as_list() == output_tensor.shape.as_list()),
      int(input_tensor.dtype == output_tensor.dtype),
      all_inputs_in_output,
      all_outputs_in_input,
  ]
  features['io_booleans'].extend([
      int(input_length in output_shape) if input_length else 0
      for input_length in input_shape])
  features['io_booleans'].extend([
      int(output_length in input_shape) if output_length else 0
      for output_length in output_shape])

  return features
def featurize_value(
    value: value_module.Value) -> Dict[Text, Union[List[float], List[Text]]]:
  """Returns a dict that featurizes a Value.

  The feature dict will contain the following:

   * 'kind': One int denoting whether the value is a primitive, sequence,
       Tensor, SparseTensor, or other.
   * 'dtype': One int denoting the DType of the value.
   * 'rank': One int in the range [0, MAX_RANK], where MAX_RANK denotes any rank
       greater than or equal to MAX_RANK.
   * 'shape': A list of MAX_RANK ints denoting the shape, 0-padded.
   * 'shape_buckets': Like 'shape' but bucketed.
   * 'floats': Stats about the elements as raw floats.
   * 'float_buckets': Like 'floats' but bucketed.
   * 'counts': Numbers of elements with various properties.
   * 'count_buckets': Like 'counts' but bucketed.
   * 'fractions': Fractions of elements with various properties.
   * 'booleans': Boolean properties of the value, as 0-1 ints.
   * 'value_string': The input as a string.

  Args:
    value: The Value to featurize.
  """
  to_featurize = _as_tensor_to_featurize(value)
  features = {}  # type: Dict[Text, Union[List[float], List[Text]]]

  features['kind'] = [(
      0 if value.is_primitive else  # pylint: disable=g-long-ternary
      1 if value.is_sequence else  # pylint: disable=g-long-ternary
      2 if value.is_tensor else
      3 if value.is_sparse_tensor else
      4)]

  supported_dtypes = (
      tf_coder_utils.INT_DTYPES + tf_coder_utils.FLOAT_DTYPES + (tf.bool,))
  if to_featurize.dtype not in supported_dtypes:
    raise ValueError('Cannot featurize unsupported dtype: {}'.format(
        to_featurize.dtype))
  features['dtype'] = [supported_dtypes.index(to_featurize.dtype)]

  features['rank'] = [min(len(to_featurize.shape), MAX_RANK)]

  features['shape'] = _fixed_length_shape(to_featurize)
  features['shape_buckets'] = [
      _bucket(dimension_length, COUNT_BOUNDARIES)
      for dimension_length in features['shape']]

  # "Elements" are the provided values (without default elements in
  # SparseTensors). Cast elements to float32 because, e.g., we can't compute the
  # mean of a bool tensor.
  elements = tf.cast(_elements(to_featurize), tf.float32)
  elements_list = _elements_list(to_featurize)

  max_value = tf_coder_utils.max_tensor_value(elements)
  min_value = tf_coder_utils.min_tensor_value(elements)
  mean_value = float(tf.reduce_mean(elements))
  mean_magnitude = float(tf.reduce_mean(tf.abs(elements)))
  features['floats'] = [max_value, min_value, mean_value, mean_magnitude]
  features['float_buckets'] = [_bucket(f, FLOAT_BOUNDARIES)
                               for f in features['floats']]

  # The total size of the tensor, meaning the product of dimension lengths. This
  # may be huge for SparseTensors.
  size = tf_coder_utils.num_tensor_elements(to_featurize)
  # For SparseTensors, num_elements != size.
  num_elements = len(elements_list)

  positive = int(tf.reduce_sum(tf.cast(elements > 0, tf.int32)))
  zeros = int(tf.reduce_sum(tf.cast(
      tf.abs(tf.cast(elements, tf.float32)) < EPSILON, tf.int32)))
  ones = int(tf.reduce_sum(tf.cast(
      tf.abs(tf.cast(elements, tf.float32) - 1.0) < EPSILON, tf.int32)))
  negative = int(tf.reduce_sum(tf.cast(elements < 0, tf.int32)))
  probabilities = int(tf.reduce_sum(tf.cast(
      tf.logical_and(0 <= elements, elements <= 1), tf.int32)))
  unique = len(set(elements_list))

  counts = [size, num_elements, positive, zeros, ones, negative, probabilities,
            unique]

  features['counts'] = counts
  features['count_buckets'] = [_bucket(c, COUNT_BOUNDARIES)
                               for c in features['counts']]

  assert counts[0] == size and counts[1] == num_elements
  features['fractions'] = [_safe_divide(num_elements, size)]
  features['fractions'].extend([_safe_divide(c, num_elements)
                                for c in counts[2:]])

  # TODO(kshi): Consider computing the number of times the mode appears.

  # TODO(kshi): Consider computing, for each axis, whether that axis represents
  # a probability distribution, with all elements in [0, 1] summing to 1.

  is_sorted = (int(tf.reduce_all(tf.equal(elements, tf.sort(elements))))
               if len(elements.shape) else 1)
  is_finite = int(tf.reduce_all(tf.math.is_finite(elements)))
  all_positive = int(positive == num_elements)
  all_nonnegative = int(positive + zeros == num_elements)
  all_negative = int(negative == num_elements)
  all_zero_one = int(zeros + ones == num_elements)
  all_probabilities = int(probabilities == num_elements)
  all_unique = int(unique == num_elements)

  features['booleans'] = [
      is_sorted, is_finite, all_positive, all_nonnegative, all_negative,
      all_zero_one, all_probabilities, all_unique]

  features['value_string'] = [repr(value)]

  return features
 def test_num_tensor_elements_using_content(self, content, expected_result):
     self.assertEqual(
         tf_coder_utils.num_tensor_elements(tf.constant(content)),
         expected_result)
 def test_num_tensor_elements_using_shape(self, shape, expected_result):
     self.assertEqual(tf_coder_utils.num_tensor_elements(tf.ones(shape)),
                      expected_result)
 def num_elements(self):
     """Returns the number of elements in the wrapped value."""
     if self.is_sparse_tensor:
         return tf_coder_utils.num_tensor_elements(self.value.values)
     else:
         return tf_coder_utils.num_tensor_elements(self.value)