Exemple #1
0
def build_model():
    input_node = tf.keras.Input(shape=(3, ), dtype=tf.string)
    layer = layer_module.MultiCategoryEncoding(
        encoding=[layer_module.INT, layer_module.INT, layer_module.NONE])
    output_node = layer(input_node)
    output_node = tf.keras.layers.Dense(1)(output_node)
    return tf.keras.Model(input_node, output_node), layer
Exemple #2
0
def test_multi_column_categorical_encoding(tmp_path):
    x_train, x_test, y_train = get_data()
    input_node = tf.keras.Input(shape=(3, ), dtype=tf.string)
    layer = layer_module.MultiCategoryEncoding([
        layer_module.INT,
        layer_module.INT,
        layer_module.NONE,
    ])
    hidden_node = layer(input_node)
    output_node = tf.keras.layers.Dense(1, activation='sigmoid')(hidden_node)
    model = tf.keras.Model(input_node, output_node)
    model.compile(loss='binary_crossentropy', optimizer='adam')
    tf.data.Dataset.zip((
        (tf.data.Dataset.from_tensor_slices(x_train).batch(32), ),
        (tf.data.Dataset.from_tensor_slices(np.random.rand(3, 1)).batch(32), ),
    ))
    layer.adapt(tf.data.Dataset.from_tensor_slices(x_train).batch(32))

    model.fit(x_train, y_train, epochs=1)

    model2 = tf.keras.Model(input_node, hidden_node)
    result = model2.predict(x_train)
    assert result[0][0] == result[2][0]
    assert result[0][0] != result[1][0]
    assert result[0][1] != result[1][1]
    assert result[0][1] != result[2][1]
    assert result[2][2] == 0

    output = model2.predict(x_test)
    assert output.dtype == np.dtype('float32')
def test_preprocessing_adapt_with_cat_to_int_and_norm():
    x = np.array([["a", 5], ["b", 6]]).astype(np.unicode)
    y = np.array([[1, 2], [3, 4]]).astype(np.unicode)
    dataset = tf.data.Dataset.from_tensor_slices((x, y)).batch(32)
    model = tf.keras.models.Sequential()
    model.add(tf.keras.Input(shape=(2, ), dtype=tf.string))
    model.add(keras_layers.MultiCategoryEncoding(["int", "none"]))
    model.add(preprocessing.Normalization(axis=-1))

    tuner_module.AutoTuner.adapt(model, dataset)
 def build(self, hp, inputs=None):
     input_node = nest.flatten(inputs)[0]
     encoding = []
     for column_name in self.column_names:
         column_type = self.column_types[column_name]
         if column_type == analysers.CATEGORICAL:
             # TODO: Search to use one-hot or int.
             encoding.append(keras_layers.INT)
         else:
             encoding.append(keras_layers.NONE)
     return keras_layers.MultiCategoryEncoding(encoding)(input_node)
def test_model_save_load_output_same(tmp_path):
    x_train = np.array([["a", "ab", 2.1], ["b", "bc", 1.0], ["a", "bc", "nan"]])
    layer = layer_module.MultiCategoryEncoding(
        encoding=[layer_module.INT, layer_module.INT, layer_module.NONE]
    )
    layer.adapt(tf.data.Dataset.from_tensor_slices(x_train).batch(32))

    model = tf.keras.Sequential([tf.keras.Input(shape=(3,), dtype=tf.string), layer])
    model.save(os.path.join(tmp_path, "model"))
    model2 = tf.keras.models.load_model(os.path.join(tmp_path, "model"))

    assert np.array_equal(model.predict(x_train), model2.predict(x_train))
Exemple #6
0
 def __init__(self, column_names, column_types, **kwargs):
     super().__init__(**kwargs)
     self.column_names = column_names
     self.column_types = column_types
     encoding = []
     for column_name in self.column_names:
         column_type = self.column_types[column_name]
         if column_type == analysers.CATEGORICAL:
             # TODO: Search to use one-hot or int.
             encoding.append(keras_layers.INT)
         else:
             encoding.append(keras_layers.NONE)
     self.layer = keras_layers.MultiCategoryEncoding(encoding)
def test_multi_cat_encode_strings_correctly(tmp_path):
    x_train = np.array([["a", "ab", 2.1], ["b", "bc", 1.0], ["a", "bc",
                                                             "nan"]])
    layer = layer_module.MultiCategoryEncoding(
        [layer_module.INT, layer_module.INT, layer_module.NONE])
    dataset = tf.data.Dataset.from_tensor_slices(x_train).batch(32)

    layer.adapt(tf.data.Dataset.from_tensor_slices(x_train).batch(32))
    for data in dataset.map(layer):
        result = data

    assert result[0][0] == result[2][0]
    assert result[0][0] != result[1][0]
    assert result[0][1] != result[1][1]
    assert result[0][1] != result[2][1]
    assert result[2][2] == 0
    assert result.dtype == tf.float32
def test_call_multi_with_single_column_return_right_shape():
    layer = layer_module.MultiCategoryEncoding(encoding=[layer_module.INT])

    assert layer(np.array([["a"], ["b"], ["a"]])).shape == (3, 1)
def test_init_multi_one_hot_encode():
    layer_module.MultiCategoryEncoding(
        encoding=[layer_module.ONE_HOT, layer_module.INT, layer_module.NONE]
    )
Exemple #10
0
def test_call_multi_with_single_column_return_right_shape():
    x_train = np.array([["a"], ["b"], ["a"]])
    layer = layer_module.MultiCategoryEncoding(encoding=[layer_module.INT])
    layer.adapt(tf.data.Dataset.from_tensor_slices(x_train).batch(32))

    assert layer(x_train).shape == (3, 1)