def _sparse_feature_from_feature_spec(spec, name, domains): """Returns a representation of a SparseFeature from a feature spec.""" if isinstance(spec.index_key, list): raise ValueError( 'SparseFeature "{}" had index_key {}, but size and index_key ' 'fields should be single values'.format(name, spec.index_key)) if isinstance(spec.size, list): raise ValueError( 'SparseFeature "{}" had size {}, but size and index_key fields ' 'should be single values'.format(name, spec.size)) # Create a index feature. index_feature = schema_pb2.Feature(name=spec.index_key, type=schema_pb2.INT, int_domain=schema_pb2.IntDomain( min=0, max=spec.size - 1)) # Create a value feature. value_feature = schema_pb2.Feature(name=spec.value_key) _set_type(name, value_feature, spec.dtype) _set_domain(name, value_feature, domains.get(name)) # Create a sparse feature which refers to the index and value features. index_feature_ref = schema_pb2.SparseFeature.IndexFeature( name=spec.index_key) value_feature_ref = schema_pb2.SparseFeature.ValueFeature( name=spec.value_key) sparse_feature = schema_pb2.SparseFeature( name=name, is_sorted=True if spec.already_sorted else None, index_feature=[index_feature_ref], value_feature=value_feature_ref) return (index_feature, value_feature, sparse_feature)
def test_look_up_feature(self): feature_1 = text_format.Parse("""name: "feature1" """, schema_pb2.Feature()) feature_2 = text_format.Parse("""name: "feature2" """, schema_pb2.Feature()) container = [feature_1, feature_2] self.assertEqual( schema_util.look_up_feature('feature1', container), feature_1) self.assertEqual( schema_util.look_up_feature('feature2', container), feature_2) self.assertEqual(schema_util.look_up_feature('feature3', container), None)
def _sparse_feature_from_feature_spec(spec, name, domains): """Returns a representation of a SparseFeature from a feature spec.""" if isinstance(spec.index_key, list): assert isinstance(spec.size, (list, tuple, tf.TensorShape)), type(spec.size) assert len(spec.index_key) == len(spec.size), (spec.index_key, spec.size) spec_size = [ s.value if isinstance(s, tf.compat.v1.Dimension) else s for s in spec.size ] int_domains = [ schema_pb2.IntDomain(min=0, max=size - 1) if size is not None else None for size in spec_size ] index_feature = [ schema_pb2.Feature(name=key, type=schema_pb2.INT, int_domain=int_domain) for (key, int_domain) in zip(spec.index_key, int_domains) ] index_feature_ref = [ schema_pb2.SparseFeature.IndexFeature(name=key) for key in spec.index_key ] else: # Create a index feature. index_feature = [ schema_pb2.Feature(name=spec.index_key, type=schema_pb2.INT, int_domain=schema_pb2.IntDomain(min=0, max=spec.size - 1)) ] index_feature_ref = [ schema_pb2.SparseFeature.IndexFeature(name=spec.index_key) ] # Create a value feature. value_feature = schema_pb2.Feature(name=spec.value_key) _set_type(name, value_feature, spec.dtype) _set_domain(name, value_feature, domains.get(name)) # Create a sparse feature which refers to the index and value features. value_feature_ref = schema_pb2.SparseFeature.ValueFeature( name=spec.value_key) sparse_feature = schema_pb2.SparseFeature( name=name, is_sorted=True if spec.already_sorted else None, index_feature=index_feature_ref, value_feature=value_feature_ref) return (index_feature, value_feature, sparse_feature)
def test_map_prensor_to_prensor_with_schema(self): original = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) def my_prensor_op(original_prensor): # Note that we are copying over the original root prensor node. The root # node is ignored in the result. return prensor.create_prensor_from_descendant_nodes({ path.Path([]): original_prensor.node, path.Path(["bar2"]): original_prensor.get_child_or_error("bar").node, path.Path(["keep_me2"]): original_prensor.get_child_or_error("keep_me").node }) bar2_feature = schema_pb2.Feature() bar2_feature.value_count.max = 7 keep_me2_feature = schema_pb2.Feature() keep_me2_feature.value_count.max = 10 # Since the top node is actually a child node, we use the child schema. my_output_schema = map_prensor_to_prensor.create_schema( is_repeated=True, children={ "bar2": { "is_repeated": True, "dtype": tf.string, "schema_feature": bar2_feature }, "keep_me2": { "is_repeated": False, "dtype": tf.bool, "schema_feature": keep_me2_feature } }) result = map_prensor_to_prensor.map_prensor_to_prensor( root_expr=original, source=path.Path(["doc"]), paths_needed=[path.Path(["bar"]), path.Path(["keep_me"])], prensor_op=my_prensor_op, output_schema=my_output_schema) doc_result = result.get_child_or_error("doc") bar2_result = doc_result.get_child_or_error("bar2") self.assertEqual(bar2_result.schema_feature.value_count.max, 7) keep_me2_result = doc_result.get_child_or_error("keep_me2") self.assertEqual(keep_me2_result.schema_feature.value_count.max, 10)
def _ragged_tensor_representation_from_feature_spec( spec: common_types.RaggedFeature, name: str, domains: Dict[str, common_types.DomainType] ) -> Tuple[schema_pb2.Feature, List[schema_pb2.Feature], schema_pb2.TensorRepresentation]: """Returns representation of a RaggedTensor from a feature spec. Args: spec: A tf.io.RaggedFeature feature spec. name: Feature name. domains: A dict whose keys are feature names and values are one of schema_pb2.IntDomain, schema_pb2.StringDomain or schema_pb2.FloatDomain. Returns: A tuple (value_feature, partitions_features, ragged_tensor_rep), where value_feature represents RaggedTensor values, partitions_features represent row lengths partitions and ragged_tensor_rep - ragged TensorRepresentation. Raises: ValueError: If the feature spec contains partition types different from UniformRowLength and RowLengths. """ value_feature = schema_pb2.Feature(name=spec.value_key or name) _set_type(name, value_feature, spec.dtype) _set_domain(name, value_feature, domains.get(name)) ragged_tensor = schema_pb2.TensorRepresentation.RaggedTensor( feature_path=path_pb2.Path(step=[spec.value_key or name])) partitions_features = [] for partition in spec.partitions: if isinstance(partition, tf.io.RaggedFeature.UniformRowLength): # pytype: disable=attribute-error ragged_tensor.partition.append( schema_pb2.TensorRepresentation.RaggedTensor.Partition( uniform_row_length=partition.length)) elif isinstance(partition, tf.io.RaggedFeature.RowLengths): # pytype: disable=attribute-error ragged_tensor.partition.append( schema_pb2.TensorRepresentation.RaggedTensor.Partition( row_length=partition.key)) partitions_features.append( schema_pb2.Feature(name=partition.key, type=schema_pb2.INT)) else: raise ValueError( 'RaggedFeature can only be created with UniformRowLength and ' 'RowLengths partitions.') return value_feature, partitions_features, schema_pb2.TensorRepresentation( ragged_tensor=ragged_tensor)
def test_get_schema(self): foo_feature = schema_pb2.Feature() foo_feature.int_domain.max = 10 foo = expression_test_util.MockExpression(is_repeated=False, my_type=tf.int64, schema_feature=foo_feature) foorepeated = expression_test_util.MockExpression(is_repeated=True, my_type=tf.int64) bar_feature = schema_pb2.Feature() bar_feature.presence.min_count = 17 bar = expression_test_util.MockExpression(is_repeated=True, my_type=tf.string, schema_feature=bar_feature) keep_me = expression_test_util.MockExpression(is_repeated=False, my_type=tf.bool) doc = expression_test_util.MockExpression(is_repeated=True, my_type=tf.int64, children={ "bar": bar, "keep_me": keep_me }) root = expression_test_util.MockExpression(is_repeated=True, my_type=None, children={ "foo": foo, "foorepeated": foorepeated, "doc": doc }) schema_result = root.get_schema() feature_map = _features_as_map(schema_result.feature) self.assertIn("foo", feature_map) # Check the properties of a first-level feature. self.assertEqual(feature_map["foo"].int_domain.max, 10) self.assertIn("foorepeated", feature_map) self.assertEqual(feature_map["doc"].type, schema_pb2.FeatureType.STRUCT) doc_feature_map = _features_as_map( feature_map["doc"].struct_domain.feature) # Test that second level features are correctly handled. self.assertIn("bar", doc_feature_map) # Test that an string_domain specified at the schema level is inserted # correctly. self.assertEqual(doc_feature_map["bar"].presence.min_count, 17) self.assertIn("keep_me", doc_feature_map)
def test_stats_options_invalid_slicing_sql_query(self): schema = schema_pb2.Schema(feature=[ schema_pb2.Feature(name='feat1', type=schema_pb2.BYTES), schema_pb2.Feature(name='feat3', type=schema_pb2.INT) ], ) experimental_slice_sqls = [ """ SELECT STRUCT(feat1, feat2) FROM example.feat1, example.feat2 """ ] with self.assertRaisesRegex(ValueError, 'One of the slice SQL query'): stats_options.StatsOptions( experimental_slice_sqls=experimental_slice_sqls, schema=schema)
def test_get_schema_missing_features(self): # The expr has a number of features: foo, foorepeated, doc, user. expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) # The schema has only a subset of the features on the expr. schema = schema_pb2.Schema() feature = schema.feature.add() feature.name = "foo" feature.type = schema_pb2.FeatureType.INT feature.value_count.min = 1 feature.value_count.max = 1 feature = schema.feature.add() feature.name = "foorepeated" feature.type = schema_pb2.FeatureType.INT feature.value_count.min = 0 feature.value_count.max = 5 feature = schema.feature.add() feature.name = "doc" feature.type = schema_pb2.FeatureType.STRUCT feature.struct_domain.feature.append( schema_pb2.Feature(name="keep_me", type=schema_pb2.FeatureType.INT)) # By default, the output schema has all features present in the expr. expr = expr.apply_schema(schema) output_schema = expr.get_schema() self.assertNotEqual(schema, output_schema) self.assertLen(schema.feature, 3) self.assertLen(output_schema.feature, 4) # With create_schema_features = False, only features on the original schema # propogate to the new schema. output_schema = expr.get_schema(create_schema_features=False) self.assertLen(output_schema.feature, 3)
def _feature_from_feature_spec(spec, name, domains): """Returns a representation of a Feature from a feature spec.""" if isinstance(spec, tf.io.FixedLenFeature): if spec.default_value is not None: raise ValueError( 'feature "{}" had default_value {}, but FixedLenFeature must have ' 'default_value=None'.format(name, spec.default_value)) dims = [schema_pb2.FixedShape.Dim(size=size) for size in spec.shape] feature = schema_pb2.Feature( name=name, presence=schema_pb2.FeaturePresence(min_fraction=1.0), shape=schema_pb2.FixedShape(dim=dims)) elif isinstance(spec, tf.io.VarLenFeature): feature = schema_pb2.Feature(name=name) else: raise TypeError( 'Spec for feature "{}" was {} of type {}, expected a ' 'FixedLenFeature, VarLenFeature or SparseFeature'.format( name, spec, type(spec))) _set_type(name, feature, spec.dtype) _set_domain(name, feature, domains.get(name)) return feature
def _ProjectTfmdSchema(self, tensor_names: List[Text]) -> schema_pb2.Schema: """Projects self._schema by the given tensor names.""" tensor_representations = self.TensorRepresentations() tensor_names = set(tensor_names) if not tensor_names.issubset(tensor_representations): raise ValueError( "Unable to project {} because they were not in the original " "TensorRepresentations.".format(tensor_names - tensor_representations)) used_paths = set() for tensor_name in tensor_names: used_paths.update( tensor_representation_util. GetSourceColumnsFromTensorRepresentation( tensor_representations[tensor_name])) result = schema_pb2.Schema() # Note: We only copy projected features into the new schema because the # coder, and ArrowSchema() only care about Schema.feature. If they start # depending on other Schema fields then those fields must also be projected. for f in self._schema.feature: p = path.ColumnPath(f.name) if f.name == _SEQUENCE_COLUMN_NAME: if f.type != schema_pb2.STRUCT: raise ValueError( "Feature {} was expected to be of type STRUCT, but got {}" .format(f.name, f)) result_sequence_struct = schema_pb2.Feature() result_sequence_struct.CopyFrom(f) result_sequence_struct.ClearField("struct_domain") any_sequence_feature_projected = False for sf in f.struct_domain.feature: sequence_feature_path = p.child(sf.name) if sequence_feature_path in used_paths: any_sequence_feature_projected = True result_sequence_struct.struct_domain.feature.add( ).CopyFrom(sf) if any_sequence_feature_projected: result.feature.add().CopyFrom(result_sequence_struct) elif p in used_paths: result.feature.add().CopyFrom(f) tensor_representation_util.SetTensorRepresentationsInSchema( result, { k: v for k, v in tensor_representations.items() if k in tensor_names }) return result
def _clean_feature(feature: schema_pb2.Feature) -> schema_pb2.Feature: """Remove name and all children of a feature (if any exist), returning a copy. Args: feature: input feature Returns: cleaned feature """ copy = schema_pb2.Feature() copy.CopyFrom(feature) copy.ClearField("name") if copy.HasField("struct_domain"): del copy.struct_domain.feature[:] return copy
def create_protobuf_feature(column_schema): feature = schema_pb2.Feature() feature.name = column_schema.name feature = register_dtype(column_schema, feature) annotation = feature.annotation annotation.tag.extend([ tag.value if hasattr(tag, "value") else tag for tag in column_schema.tags ]) # can be instantiated with no values # if so, unnecessary to dump # import pdb; pdb.set_trace() if len(column_schema.properties) > 0: feature = register_extra_metadata(column_schema, feature) return feature
def _get_promote_schema_feature( original: Optional[schema_pb2.Feature], parent: Optional[schema_pb2.Feature]) -> Optional[schema_pb2.Feature]: """Generate the schema feature for the field resulting from promote. Note that promote results in the exact same number of values. Note that min_count is never propagated. Args: original: the original feature parent: the parent feature Returns: the schema of the new field. """ if original is None or parent is None: return None result = schema_pb2.Feature() result.lifecycle_stage = _min_lifecycle_stage(original.lifecycle_stage, parent.lifecycle_stage) result.type = original.type if original.HasField("distribution_constraints"): result.distribution_constraints.CopyFrom( original.distribution_constraints) _copy_domain_info(original, result) if _feature_is_dense(parent): parent_size = parent.value_count.min if original.value_count.HasField("min"): result.value_count.min = parent_size * original.value_count.min if original.value_count.HasField("max"): result.value_count.max = parent_size * original.value_count.max if original.presence.HasField("min_fraction"): if original.presence.min_fraction == 1: result.presence.min_fraction = 1 else: result.presence.min_fraction = ( original.presence.min_fraction / parent_size) if original.presence.HasField("min_count"): # If the parent is dense then the count can # be reduced by the number of children. # E.g. {{"a"},{"b"}},{{"c"},{"d"}},{{"e"},{"f"}} # with a count of 6, with a parent size of 2 becomes: # can become {"a","b"}, {"c", "d"}, {"e", "f"} # which has a count of 3. result.presence.min_count = original.presence.min_count // parent_size return result
def test_stats_options_json_round_trip(self): generators = [ lift_stats_generator.LiftStatsGenerator( schema=None, y_path=types.FeaturePath(['label']), x_paths=[types.FeaturePath(['feature'])]) ] feature_whitelist = ['a'] schema = schema_pb2.Schema(feature=[schema_pb2.Feature(name='f')]) label_feature = 'label' weight_feature = 'weight' slice_functions = [slicing_util.get_feature_value_slicer({'b': None})] sample_rate = 0.01 num_top_values = 21 frequency_threshold = 2 weighted_frequency_threshold = 2.0 num_rank_histogram_buckets = 1001 num_values_histogram_buckets = 11 num_histogram_buckets = 11 num_quantiles_histogram_buckets = 11 epsilon = 0.02 infer_type_from_schema = True desired_batch_size = 100 enable_semantic_domain_stats = True semantic_domain_stats_sample_rate = 0.1 options = stats_options.StatsOptions( generators=generators, feature_whitelist=feature_whitelist, schema=schema, label_feature=label_feature, weight_feature=weight_feature, slice_functions=slice_functions, sample_rate=sample_rate, num_top_values=num_top_values, frequency_threshold=frequency_threshold, weighted_frequency_threshold=weighted_frequency_threshold, num_rank_histogram_buckets=num_rank_histogram_buckets, num_values_histogram_buckets=num_values_histogram_buckets, num_histogram_buckets=num_histogram_buckets, num_quantiles_histogram_buckets=num_quantiles_histogram_buckets, epsilon=epsilon, infer_type_from_schema=infer_type_from_schema, desired_batch_size=desired_batch_size, enable_semantic_domain_stats=enable_semantic_domain_stats, semantic_domain_stats_sample_rate=semantic_domain_stats_sample_rate ) options_json = options.to_json() options = stats_options.StatsOptions.from_json(options_json) self.assertIsNone(options.generators) self.assertEqual(feature_whitelist, options.feature_whitelist) compare.assertProtoEqual(self, schema, options.schema) self.assertEqual(label_feature, options.label_feature) self.assertEqual(weight_feature, options.weight_feature) self.assertIsNone(options.slice_functions) self.assertEqual(sample_rate, options.sample_rate) self.assertEqual(num_top_values, options.num_top_values) self.assertEqual(frequency_threshold, options.frequency_threshold) self.assertEqual(weighted_frequency_threshold, options.weighted_frequency_threshold) self.assertEqual(num_rank_histogram_buckets, options.num_rank_histogram_buckets) self.assertEqual(num_values_histogram_buckets, options.num_values_histogram_buckets) self.assertEqual(num_histogram_buckets, options.num_histogram_buckets) self.assertEqual(num_quantiles_histogram_buckets, options.num_quantiles_histogram_buckets) self.assertEqual(epsilon, options.epsilon) self.assertEqual(infer_type_from_schema, options.infer_type_from_schema) self.assertEqual(desired_batch_size, options.desired_batch_size) self.assertEqual(enable_semantic_domain_stats, options.enable_semantic_domain_stats) self.assertEqual(semantic_domain_stats_sample_rate, options.semantic_domain_stats_sample_rate)
'x': tf.io.FixedLenSequenceFeature([], tf.int64) }, 'error_msg': r'Spec for feature "x" was .* of type .*, expected a ' r'FixedLenFeature, VarLenFeature or SparseFeature', 'error_class': TypeError }, ] _FEATURE_BY_NAME = { 'x': text_format.Parse( """ name: "x" type: INT int_domain { min: 0 max: 9 } """, schema_pb2.Feature()), 'ragged$value': text_format.Parse( """ name: "ragged$value" type: FLOAT """, schema_pb2.Feature()), 'ragged$row_lengths_1': text_format.Parse( """ name: "ragged$row_lengths_1" type: INT """, schema_pb2.Feature()), 'ragged$row_lengths_2': text_format.Parse( """
'stats_options_kwargs': { 'semantic_domain_stats_sample_rate': 2 }, 'exception_type': ValueError, 'error_message': 'Invalid semantic_domain_stats_sample_rate 2' }, { 'testcase_name': 'categorical_float_without_sketch_generators', 'stats_options_kwargs': { 'experimental_use_sketch_based_topk_uniques': False, 'schema': schema_pb2.Schema(feature=[ schema_pb2.Feature( name='f', type=schema_pb2.FLOAT, float_domain=schema_pb2.FloatDomain(is_categorical=True)) ], ), }, 'exception_type': ValueError, 'error_message': ('Categorical float features set in schema require ' 'experimental_use_sketch_based_topk_uniques'), }, { 'testcase_name': 'both_slice_fns_and_slice_sqls_specified', 'stats_options_kwargs': { 'experimental_slice_functions': [lambda x: (None, x)], 'experimental_slice_sqls': [''] }, 'exception_type': ValueError,
def test_valid_stats_options_json_round_trip(self): feature_allowlist = ['a'] schema = schema_pb2.Schema(feature=[schema_pb2.Feature(name='f')]) vocab_paths = {'a': '/path/to/a'} label_feature = 'label' weight_feature = 'weight' sample_rate = 0.01 num_top_values = 21 frequency_threshold = 2 weighted_frequency_threshold = 2.0 num_rank_histogram_buckets = 1001 num_values_histogram_buckets = 11 num_histogram_buckets = 11 num_quantiles_histogram_buckets = 11 epsilon = 0.02 infer_type_from_schema = True desired_batch_size = 100 enable_semantic_domain_stats = True semantic_domain_stats_sample_rate = 0.1 per_feature_weight_override = {types.FeaturePath(['a']): 'w'} add_default_generators = True use_sketch_based_topk_uniques = True experimental_result_partitions = 3 options = stats_options.StatsOptions( feature_allowlist=feature_allowlist, schema=schema, vocab_paths=vocab_paths, label_feature=label_feature, weight_feature=weight_feature, sample_rate=sample_rate, num_top_values=num_top_values, frequency_threshold=frequency_threshold, weighted_frequency_threshold=weighted_frequency_threshold, num_rank_histogram_buckets=num_rank_histogram_buckets, num_values_histogram_buckets=num_values_histogram_buckets, num_histogram_buckets=num_histogram_buckets, num_quantiles_histogram_buckets=num_quantiles_histogram_buckets, epsilon=epsilon, infer_type_from_schema=infer_type_from_schema, desired_batch_size=desired_batch_size, enable_semantic_domain_stats=enable_semantic_domain_stats, semantic_domain_stats_sample_rate=semantic_domain_stats_sample_rate, per_feature_weight_override=per_feature_weight_override, add_default_generators=add_default_generators, experimental_use_sketch_based_topk_uniques= use_sketch_based_topk_uniques, experimental_result_partitions=experimental_result_partitions, ) options_json = options.to_json() options = stats_options.StatsOptions.from_json(options_json) self.assertEqual(feature_allowlist, options.feature_allowlist) compare.assertProtoEqual(self, schema, options.schema) self.assertEqual(vocab_paths, options.vocab_paths) self.assertEqual(label_feature, options.label_feature) self.assertEqual(weight_feature, options.weight_feature) self.assertEqual(sample_rate, options.sample_rate) self.assertEqual(num_top_values, options.num_top_values) self.assertEqual(frequency_threshold, options.frequency_threshold) self.assertEqual(weighted_frequency_threshold, options.weighted_frequency_threshold) self.assertEqual(num_rank_histogram_buckets, options.num_rank_histogram_buckets) self.assertEqual(num_values_histogram_buckets, options.num_values_histogram_buckets) self.assertEqual(num_histogram_buckets, options.num_histogram_buckets) self.assertEqual(num_quantiles_histogram_buckets, options.num_quantiles_histogram_buckets) self.assertEqual(epsilon, options.epsilon) self.assertEqual(infer_type_from_schema, options.infer_type_from_schema) self.assertEqual(desired_batch_size, options.desired_batch_size) self.assertEqual(enable_semantic_domain_stats, options.enable_semantic_domain_stats) self.assertEqual(semantic_domain_stats_sample_rate, options.semantic_domain_stats_sample_rate) self.assertEqual(per_feature_weight_override, options._per_feature_weight_override) self.assertEqual(add_default_generators, options.add_default_generators) self.assertEqual(use_sketch_based_topk_uniques, options.experimental_use_sketch_based_topk_uniques) self.assertEqual(experimental_result_partitions, options.experimental_result_partitions)
def export_tfx_schema(self) -> schema_pb2.Schema: """ Create a Tensorflow metadata schema from a FeatureSet. Returns: Tensorflow metadata schema. """ schema = schema_pb2.Schema() # List of attributes to copy from fields in the FeatureSet to feature in # Tensorflow metadata schema where the attribute name is the same. attributes_to_copy_from_field_to_feature = [ "name", "presence", "group_presence", "shape", "value_count", "domain", "int_domain", "float_domain", "string_domain", "bool_domain", "struct_domain", "_natural_language_domain", "image_domain", "mid_domain", "url_domain", "time_domain", "time_of_day_domain", ] for _, field in self._fields.items(): if isinstance(field, Entity): continue feature = schema_pb2.Feature() for attr in attributes_to_copy_from_field_to_feature: if getattr(field, attr) is None: # This corresponds to an unset member in the proto Oneof field. continue if issubclass(type(getattr(feature, attr)), Message): # Proto message field to copy is an "embedded" field, so MergeFrom() # method must be used. getattr(feature, attr).MergeFrom(getattr(field, attr)) elif issubclass(type(getattr(feature, attr)), (int, str, bool)): # Proto message field is a simple Python type, so setattr() # can be used. setattr(feature, attr, getattr(field, attr)) else: warnings.warn( f"Attribute '{attr}' cannot be copied from Field " f"'{field.name}' in FeatureSet '{self.name}' to a " f"Feature in the Tensorflow metadata schema, because" f"the type is neither a Protobuf message or Python " f"int, str and bool") # "type" attr is handled separately because the attribute name is different # ("dtype" in field and "type" in Feature) and "type" in Feature is only # a subset of "dtype". feature.type = field.dtype.to_tfx_schema_feature_type() schema.feature.append(feature) return schema