Esempio n. 1
0
def _normalize_feature(feature: schema_pb2.Feature,
                       schema: schema_pb2.Schema) -> None:
    """Make each feature self-contained.

  If the feature references a global domain, copy the global domain locally.
  Also do this for any child features.

  Note: the name of the domain is retained, so if we want to, we could attempt
  to "unnormalize" the feature, recreating global domains.

  Args:
    feature: feature to modify in place.
    schema: schema containing any global domains.
  """

    if feature.HasField("struct_domain"):
        for x in feature.struct_domain.feature:
            _normalize_feature(x, schema)
    if feature.HasField("domain"):
        for string_domain in schema.string_domain:
            if string_domain.name == feature.domain:
                feature.string_domain.CopyFrom(string_domain)
                return
        for int_domain in schema.int_domain:
            if int_domain.name == feature.domain:
                feature.int_domain.CopyFrom(int_domain)
                return
        for float_domain in schema.float_domain:
            if float_domain.name == feature.domain:
                feature.float_domain.CopyFrom(float_domain)
                return
        raise ValueError("Did not find domain {} in schema {}".format(
            feature.domain, schema))
Esempio n. 2
0
def is_categorical_feature(feature: schema_pb2.Feature):
    """Checks if the input feature is categorical."""
    if feature.type == schema_pb2.BYTES:
        return True
    elif feature.type == schema_pb2.INT:
        return ((feature.HasField('int_domain')
                 and feature.int_domain.is_categorical)
                or feature.HasField('bool_domain'))
    else:
        return False
Esempio n. 3
0
def is_categorical_feature(feature: schema_pb2.Feature):
    """Checks if the input feature is categorical."""
    if feature.type == schema_pb2.BYTES:
        return True
    elif feature.type == schema_pb2.INT:
        return ((feature.HasField('int_domain')
                 and feature.int_domain.is_categorical)
                or feature.WhichOneof('domain_info')
                in ['bool_domain', 'natural_language_domain'])
    else:
        return False
Esempio n. 4
0
def _apply_feature(original_child: expression.Expression,
                   feature: schema_pb2.Feature):
    """Apply a feature to an expression. Feature should be "unclean"."""
    feature_copy = [x for x in feature.struct_domain.feature
                    ] if feature.HasField("struct_domain") else []
    return _SchemaExpression(original_child, feature_copy,
                             _clean_feature(feature))
Esempio n. 5
0
def _copy_domain_info(origin: schema_pb2.Feature, dest: schema_pb2.Feature):
    """Copy the domain info."""
    one_of_field_name = origin.WhichOneof("domain_info")
    if one_of_field_name is None:
        return

    origin_field = getattr(origin, one_of_field_name)

    field_descriptor = origin.DESCRIPTOR.fields_by_name.get(one_of_field_name)
    if field_descriptor is None or field_descriptor.message_type is None:
        setattr(dest, one_of_field_name, origin_field)
    else:
        dest_field = getattr(dest, one_of_field_name)
        dest_field.CopyFrom(origin_field)
Esempio n. 6
0
 def _infer_feature_shape(feature: schema_pb2.Feature):
     if feature.HasField('struct_domain'):
         for struct_domain_feature in feature.struct_domain.feature:
             _infer_feature_shape(struct_domain_feature)
     # Currently we infer shape only for required features.
     if feature.presence.min_fraction == 1:
         if (feature.HasField('value_count')
                 and feature.value_count.min != 0
                 and feature.value_count.min == feature.value_count.max):
             feature.shape.dim.add().size = feature.value_count.min
         elif feature.HasField('value_counts'):
             # Infer shape for a feature that has a nestedness level > 1 if and only
             # if the min value count equals the max value count at each nestedness
             # level.
             dimension_sizes = list()
             for value_count in feature.value_counts.value_count:
                 if (value_count.min == 0
                         or value_count.min != value_count.max):
                     return
                 dimension_sizes.append(value_count.min)
             if len(dimension_sizes) == len(
                     feature.value_counts.value_count):
                 for size in dimension_sizes:
                     feature.shape.dim.add().size = size