def encode_string_categorical_feature(feature, name, dataset): # Create a StringLookup layer which will turn strings into integer indices index = StringLookup() # Prepare a Dataset that only yields our feature feature_ds = dataset.map(lambda x, y: x[name]) feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # Learn the set of possible string values and assign them a fixed integer index index.adapt(feature_ds) # Turn the string input into integer indices encoded_feature = index(feature) # Create a CategoryEncoding for our integer indices encoder = CategoryEncoding(output_mode="binary") # Prepare a dataset of indices feature_ds = feature_ds.map(index) # Learn the space of possible indices encoder.adapt(feature_ds) # Apply one-hot encoding to our indices encoded_feature = encoder(encoded_feature) return encoded_feature
def encode_string_categorical_feature(feature, name, dataset): index = StringLookup() feature_ds = dataset.map(lambda x, y: x[name]) feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) index.adapt(feature_ds) encoded_feature = index(feature) encoder = CategoryEncoding(output_mode="binary") feature_ds = feature_ds.map(index) encoder.adapt(feature_ds) encoded_feature = encoder(encoded_feature) return encoded_feature
def _encode_categorical_feature( feature: KerasTensor, name: str, dataset: Optional[BatchDataset], ) -> KerasTensor: """One-hot encode categorical features. Args: - feature: The input layer of the feature. - name: The feature's name (its column name in the original dataframe). - dataset: The training data, if not specified, return a no-op layer. Returns: The one-hot encoded tensor of the input feature. """ # Return generic layer for the tuner initialization if not dataset: return KerasTensor(type_spec=TensorSpec( shape=(None, 1), dtype=tf.float32, name=None)) # Create a StringLookup layer which will turn strings into integer indices index = StringLookup() # Prepare a Dataset that only yields our feature feature_ds = dataset.map(lambda x, y: x[name]) feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # Learn the set of possible string values and assign them a fixed integer index index.adapt(feature_ds) # Turn the string input into integer indices encoded_feature = index(feature) # Create a CategoryEncoding for our integer indices encoder = CategoryEncoding(output_mode="binary") # Learn the space of possible indices encoder.adapt(np.arange(index.vocab_size())) # Apply one-hot encoding to our indices{split + 1} / {n_splits} encoded_feature = encoder(encoded_feature) return encoded_feature