Example #1
0
def _feature_from_feature_spec(spec, name, domains):
    """Returns a representation of a Feature from a feature spec."""
    if isinstance(spec, tf.io.FixedLenFeature):
        if spec.default_value is not None:
            raise ValueError(
                'feature "{}" had default_value {}, but FixedLenFeature must have '
                'default_value=None'.format(name, spec.default_value))
        dims = [schema_pb2.FixedShape.Dim(size=size) for size in spec.shape]
        feature = schema_pb2.Feature(
            name=name,
            presence=schema_pb2.FeaturePresence(min_fraction=1.0),
            shape=schema_pb2.FixedShape(dim=dims))
    elif isinstance(spec, tf.io.VarLenFeature):
        feature = schema_pb2.Feature(name=name)
    else:
        raise TypeError(
            'Spec for feature "{}" was {} of type {}, expected a '
            'FixedLenFeature, VarLenFeature or SparseFeature'.format(
                name, spec, type(spec)))

    _set_type(name, feature, spec.dtype)
    _set_domain(name, feature, domains.get(name))
    return feature
Example #2
0
def _legacy_schema_from_feature_spec(feature_spec, domains=None):
    """Infer a Schema from a feature spec, using the legacy feature spec logic.

  Infers a Schema proto that with generate_legacy_feature_spec set to true,
  which will result in the given feature spec and domains when
  schema_as_feature_spec is called.  This is used to represent feature specs
  that can only be represented when generate_legacy_feature_spec is true.  In
  particular, feature specs with a default value set.

  Args:
    feature_spec: A TensorFlow feature spec
    domains: A dict from key names to `IntDomain`s

  Returns:
    A Schema proto.

  Raises:
    ValueError: If a default value is invalid.
    TypeError: If an unknown type of feature spec is encountered.
  """
    result = schema_pb2.Schema()
    result.generate_legacy_feature_spec = True
    for name, spec in sorted(feature_spec.items()):
        if isinstance(spec, tf.io.FixedLenFeature):
            # Validate shape first as shape governs which default values are valid.
            if len(spec.shape) == 0:  # pylint: disable=g-explicit-length-test
                size = 1
                expected_default_value = '' if spec.dtype == tf.string else -1
            elif len(spec.shape) == 1 and spec.shape[0] > 1:
                size = spec.shape[0]
                expected_default_value = [
                    '' if spec.dtype == tf.string else -1
                ] * size
            else:
                raise ValueError(
                    'When inferring legacy schema from feature spec, feature "{}" had '
                    'shape {}, but FixedLenFeature must have shape [] or [k] where '
                    'k > 1.'.format(name, spec.shape))

            if spec.default_value is None:
                min_fraction = 1
            elif spec.default_value == expected_default_value:
                min_fraction = 0
            else:
                raise ValueError(
                    'When inferring legacy schema from feature spec, feature "{}" had '
                    'default_value {}, but FixedLenFeature must have '
                    'default_value=None or {}'.format(name, spec.default_value,
                                                      expected_default_value))

            feature = result.feature.add(
                name=name,
                presence=schema_pb2.FeaturePresence(min_fraction=min_fraction),
                value_count=schema_pb2.ValueCount(min=size, max=size))
        elif isinstance(spec, tf.io.VarLenFeature):
            feature = result.feature.add(name=name)
        else:
            raise TypeError(
                'When inferring legacy schema from feature spec, spec for feature '
                '"{}" was {} of type {}, expected a FixedLenFeature or '
                'VarLenFeature '.format(name, spec, type(spec)))

        _set_type(name, feature, spec.dtype)
        _set_domain(name, feature, domains.get(name))

    return result