def get_feature_columns_and_inputs():
    feature_columns = []
    feature_input_layers = {}

    for header in ["trestbps", "chol", "thalach", "oldpeak", "slope", "ca"]:
        feature_columns.append(tf.feature_column.numeric_column(header))
        feature_input_layers[header] = tf.keras.Input(shape=(1,), name=header)

    age = tf.feature_column.numeric_column("age")
    age_buckets = tf.feature_column.bucketized_column(
        age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]
    )
    feature_columns.append(age_buckets)
    feature_input_layers["age"] = tf.keras.Input(shape=(1,), name="age")

    thal_hashed = tf.feature_column.categorical_column_with_hash_bucket(
        "thal", hash_bucket_size=100
    )
    thal_embedding = feature_column.embedding_column(thal_hashed, dimension=8)
    feature_columns.append(thal_embedding)
    feature_input_layers["thal"] = tf.keras.Input(
        shape=(1,), name="thal", dtype=tf.string
    )

    return feature_columns, feature_input_layers
    def test_call_embedding_column_with_weights(self):
        dimension = 8

        item_id_embedding = feature_column.embedding_column(
            tf.feature_column.weighted_categorical_column(
                tf.feature_column.categorical_column_with_identity(
                    "item_id", num_buckets=128),
                weight_feature_key="frequency",
            ),
            dimension=dimension,
            initializer=tf.initializers.identity,
            combiner="sum",
        )
        item_id_embedding.lookup_embedding = lambda unique_ids: (
            generate_vectors_with_one_hot_value(unique_ids, item_id_embedding.
                                                dimension))

        output = call_feature_columns(
            [item_id_embedding],
            {
                "item_id": [[2, 6, 5], [3, 1, 1]],
                "frequency": [[0.33, 5.0, 1.024], [2.048, 0.5, 1.0]],
            },
        )

        expected_output = np.array(
            [
                [0.0, 0.0, 0.33, 0.0, 0.0, 1.024, 5.0, 0.0],
                [0.0, 1.5, 0.0, 2.048, 0.0, 0.0, 0.0, 0.0],
            ],
            dtype=np.float32,
        )

        self.assertTrue(np.array_equal(output.numpy(), expected_output))
    def test_call_embedding_column(self):
        dimension = 32

        item_id_embedding = feature_column.embedding_column(
            tf.feature_column.categorical_column_with_identity(
                "item_id", num_buckets=128
            ),
            dimension=dimension,
        )
        item_id_embedding.lookup_embedding = lambda unique_ids: (
            generate_mock_embedding_vectors(
                unique_ids, item_id_embedding.dimension
            )
        )

        output = call_feature_columns(
            [item_id_embedding], {"item_id": [1, 2, 3]}
        )

        self.assertTrue(
            np.array_equal(
                output.numpy(),
                generate_mock_embedding_vectors([1, 2, 3], dimension),
            )
        )
Exemple #4
0
    def test_call_embedding_column(self):
        dimension = 32

        item_id_embedding = embedding_column(
            tf.feature_column.categorical_column_with_identity(
                "item_id", num_buckets=128
            ),
            dimension=dimension,
        )

        def _mock_gather_embedding(name, ids):
            return generate_vectors_with_one_hot_value(ids, dimension)

        item_id_embedding.set_lookup_embedding_func(_mock_gather_embedding)

        output = call_feature_columns(
            [item_id_embedding], {"item_id": [1, 2, 3]}
        )

        self.assertTrue(
            np.array_equal(
                output.numpy(),
                generate_vectors_with_one_hot_value([1, 2, 3], dimension),
            )
        )
Exemple #5
0
    def test_embedding_column_gradients(self):
        dimension = 8

        inputs = {"item_id": [[1, 2], [10, 10], [6, 3], [10, 2]]}

        # unique_ids: 1, 2, 10, 6, 3
        expected_sum_grads = [1, 2, 3, 1, 1]
        expected_mean_grads = [1 / 2.0, 2 / 2.0, 3 / 2.0, 1 / 2.0, 1 / 2.0]
        expected_sqrtn_grads = [
            1 / np.sqrt(2.0),
            2 / np.sqrt(2.0),
            3 / np.sqrt(2.0),
            1 / np.sqrt(2.0),
            1 / np.sqrt(2.0),
        ]

        combiner_to_expected_grads = {
            "sum": expected_sum_grads,
            "mean": expected_mean_grads,
            "sqrtn": expected_sqrtn_grads,
        }

        for combiner, expected_grads in combiner_to_expected_grads.items():
            item_id_embedding = embedding_column(
                tf.feature_column.categorical_column_with_identity(
                    "item_id", num_buckets=128
                ),
                dimension=dimension,
                combiner=combiner,
            )

            def _mock_gather_embedding(name, ids):
                return generate_vectors_fill_with_id_value(ids, dimension)

            item_id_embedding.set_lookup_embedding_func(_mock_gather_embedding)

            dense_features = tf.keras.layers.DenseFeatures([item_id_embedding])
            call_fns = [dense_features.call, tf.function(dense_features.call)]

            for call_fn in call_fns:
                with tf.GradientTape() as tape:
                    item_id_embedding.set_tape(tape)
                    output = call_fn(inputs)
                    batch_embedding = item_id_embedding.embedding_and_ids[
                        0
                    ].batch_embedding
                    grads = tape.gradient(output, batch_embedding)

                    item_id_embedding.reset()

                    grads = grads.numpy()
                    for i in range(5):
                        self.assertTrue(
                            np.isclose(
                                grads[i],
                                np.full(dimension, expected_grads[i]),
                            ).all()
                        )
def get_feature_columns():
    feature_columns = []

    for numeric_feature_key in NUMERIC_FEATURE_KEYS:
        numeric_feature = tf.feature_column.numeric_column(numeric_feature_key)
        feature_columns.append(numeric_feature)

    for categorical_feature_key in CATEGORICAL_FEATURE_KEYS:
        embedding_feature = feature_column.embedding_column(
            tf.feature_column.categorical_column_with_hash_bucket(
                categorical_feature_key, hash_bucket_size=64),
            dimension=16,
        )
        feature_columns.append(embedding_feature)

    return feature_columns
Exemple #7
0
def _replace_tf_embedding_column_with_edl(dense_features_layer):
    new_feature_columns = []
    for column in dense_features_layer._feature_columns:
        if isinstance(
                column,
                fc_lib.EmbeddingColumn) and _need_partition_embedding(column):
            logger.info("Replace embedding_column {} from TensorFlow "
                        "version to ElasticDL version".format(column.name))
            new_column = embedding_column(column.categorical_column,
                                          dimension=column.dimension)
            new_column.set_dense_features_layer_name(dense_features_layer.name)
            new_feature_columns.append(new_column)
        else:
            new_feature_columns.append(column)

    return tf.keras.layers.DenseFeatures(feature_columns=new_feature_columns,
                                         name=dense_features_layer.name)
    def test_embedding_column_gradients_with_weights(self):
        dimension = 8

        inputs = {
            "item_id": [[1, 2], [10, 10], [6, 3], [10, 2]],
            "frequency": [[0.3, 0.6], [0.1, 0.2], [0.8, 0.1], [0.9, 0.6]],
        }

        item_ids = tf.reshape(inputs["item_id"], shape=[-1])
        _, item_id_idx = tf.unique(item_ids)

        # unique_ids: 1, 2, 10, 6, 3
        expected_sum_grads = np.array([
            np.full(dimension, value)
            for value in [0.3, 0.6, 0.1, 0.2, 0.8, 0.1, 0.9, 0.6]
        ])
        expected_mean_grads = np.array([
            np.full(dimension, value) for value in [
                0.3 / 0.9,
                0.6 / 0.9,
                0.1 / 0.3,
                0.2 / 0.3,
                0.8 / 0.9,
                0.1 / 0.9,
                0.9 / 1.5,
                0.6 / 1.5,
            ]
        ])
        expected_sqrtn_grads = np.array([
            np.full(dimension, value) for value in [
                0.3 / np.sqrt(np.power(0.3, 2) + np.power(0.6, 2)),
                0.6 / np.sqrt(np.power(0.3, 2) + np.power(0.6, 2)),
                0.1 / np.sqrt(np.power(0.1, 2) + np.power(0.2, 2)),
                0.2 / np.sqrt(np.power(0.1, 2) + np.power(0.2, 2)),
                0.8 / np.sqrt(np.power(0.8, 2) + np.power(0.1, 2)),
                0.1 / np.sqrt(np.power(0.8, 2) + np.power(0.1, 2)),
                0.9 / np.sqrt(np.power(0.9, 2) + np.power(0.6, 2)),
                0.6 / np.sqrt(np.power(0.9, 2) + np.power(0.6, 2)),
            ]
        ])

        combiner_to_expected_grads = {
            "sum": expected_sum_grads,
            "mean": expected_mean_grads,
            "sqrtn": expected_sqrtn_grads,
        }

        for combiner, expected_grads in combiner_to_expected_grads.items():
            item_id_embedding = feature_column.embedding_column(
                tf.feature_column.weighted_categorical_column(
                    tf.feature_column.categorical_column_with_identity(
                        "item_id", num_buckets=128),
                    weight_feature_key="frequency",
                ),
                dimension=dimension,
                initializer=tf.initializers.identity,
                combiner=combiner,
            )

            def _mock_gather_embedding(name, ids):
                return generate_vectors_fill_with_id_value(ids, dimension)

            item_id_embedding.set_lookup_embedding_func(_mock_gather_embedding)

            dense_features = tf.keras.layers.DenseFeatures([item_id_embedding])
            call_fns = [dense_features.call, tf.function(dense_features.call)]

            for call_fn in call_fns:
                with tf.GradientTape() as tape:
                    item_id_embedding.set_tape(tape)
                    output = call_fn(inputs)
                    batch_embedding = item_id_embedding.embedding_and_ids[
                        0].batch_embedding
                    grads = tape.gradient(output, batch_embedding)

                    item_id_embedding.reset()

                    grad_indices = grads.indices
                    grad_values = grads.values

                    self.assertTrue(
                        np.array_equal(grad_indices.numpy(),
                                       item_id_idx.numpy()))
                    self.assertTrue(
                        np.isclose(grad_values.numpy(), expected_grads).all())