def register_list(column_schema, feature):
    if str(column_schema._is_list):
        min_length, max_length = None, None
        if "value_count" in column_schema.properties:
            min_length = column_schema.properties["value_count"]["min"]
            max_length = column_schema.properties["value_count"]["max"]
        if min_length and max_length and min_length == max_length:
            shape = schema_pb2.FixedShape()
            dim = shape.dim.add()
            dim.size = min_length
            feature.shape.CopyFrom(shape)
        elif min_length and max_length and min_length < max_length:
            feature.value_count.CopyFrom(
                schema_pb2.ValueCount(min=min_length, max=max_length))
        else:
            # if no min max available set dummy value, to signal this is list
            feature.value_count.CopyFrom(schema_pb2.ValueCount(min=0, max=0))
    return feature
Exemple #2
0
def _legacy_schema_from_feature_spec(feature_spec, domains=None):
    """Infer a Schema from a feature spec, using the legacy feature spec logic.

  Infers a Schema proto that with generate_legacy_feature_spec set to true,
  which will result in the given feature spec and domains when
  schema_as_feature_spec is called.  This is used to represent feature specs
  that can only be represented when generate_legacy_feature_spec is true.  In
  particular, feature specs with a default value set.

  Args:
    feature_spec: A TensorFlow feature spec
    domains: A dict from key names to `IntDomain`s

  Returns:
    A Schema proto.

  Raises:
    ValueError: If a default value is invalid.
    TypeError: If an unknown type of feature spec is encountered.
  """
    result = schema_pb2.Schema()
    result.generate_legacy_feature_spec = True
    for name, spec in sorted(feature_spec.items()):
        if isinstance(spec, tf.io.FixedLenFeature):
            # Validate shape first as shape governs which default values are valid.
            if len(spec.shape) == 0:  # pylint: disable=g-explicit-length-test
                size = 1
                expected_default_value = '' if spec.dtype == tf.string else -1
            elif len(spec.shape) == 1 and spec.shape[0] > 1:
                size = spec.shape[0]
                expected_default_value = [
                    '' if spec.dtype == tf.string else -1
                ] * size
            else:
                raise ValueError(
                    'When inferring legacy schema from feature spec, feature "{}" had '
                    'shape {}, but FixedLenFeature must have shape [] or [k] where '
                    'k > 1.'.format(name, spec.shape))

            if spec.default_value is None:
                min_fraction = 1
            elif spec.default_value == expected_default_value:
                min_fraction = 0
            else:
                raise ValueError(
                    'When inferring legacy schema from feature spec, feature "{}" had '
                    'default_value {}, but FixedLenFeature must have '
                    'default_value=None or {}'.format(name, spec.default_value,
                                                      expected_default_value))

            feature = result.feature.add(
                name=name,
                presence=schema_pb2.FeaturePresence(min_fraction=min_fraction),
                value_count=schema_pb2.ValueCount(min=size, max=size))
        elif isinstance(spec, tf.io.VarLenFeature):
            feature = result.feature.add(name=name)
        else:
            raise TypeError(
                'When inferring legacy schema from feature spec, spec for feature '
                '"{}" was {} of type {}, expected a FixedLenFeature or '
                'VarLenFeature '.format(name, spec, type(spec)))

        _set_type(name, feature, spec.dtype)
        _set_domain(name, feature, domains.get(name))

    return result