def transform(inputs):
    transformed = inputs.copy()

    for feature_transform_info in FEATURE_TRANSFORM_INFO_EXECUTE_ARRAY:
        if feature_transform_info.op_type == TransformOpType.HASH:
            transformed[feature_transform_info.input] = ToSparse()(
                transformed[feature_transform_info.input]
            )
            transformed[feature_transform_info.output] = Hashing(
                feature_transform_info.hash_bucket_size
            )(transformed[feature_transform_info.input])
        elif feature_transform_info.op_type == TransformOpType.BUCKETIZE:
            transformed[feature_transform_info.input] = ToSparse()(
                transformed[feature_transform_info.input]
            )
            transformed[feature_transform_info.output] = Discretization(
                feature_transform_info.boundaries
            )(transformed[feature_transform_info.input])
        elif feature_transform_info.op_type == TransformOpType.LOOKUP:
            transformed[feature_transform_info.input] = ToSparse()(
                transformed[feature_transform_info.input]
            )
            transformed[feature_transform_info.output] = IndexLookup(
                feature_transform_info.vocabulary_list
            )(transformed[feature_transform_info.input])
        elif feature_transform_info.op_type == TransformOpType.CONCAT:
            inputs_to_concat = [
                transformed[name] for name in feature_transform_info.input
            ]
            transformed[feature_transform_info.output] = ConcatenateWithOffset(
                feature_transform_info.id_offsets
            )(inputs_to_concat)
        elif feature_transform_info.op_type == TransformOpType.EMBEDDING:
            transformed[feature_transform_info.output] = SparseEmbedding(
                input_dim=feature_transform_info.input_dim,
                output_dim=feature_transform_info.output_dim,
            )(transformed[feature_transform_info.input])
        elif feature_transform_info.op_type == TransformOpType.ARRAY:
            transformed[feature_transform_info.output] = [
                transformed[name] for name in feature_transform_info.input
            ]

    return tuple([transformed[name] for name in TRANSFORM_OUTPUTS])
Esempio n. 2
0
    def test_model_with_to_sparse(self):
        inputs = tf.keras.Input(shape=(1, ), dtype=tf.int32)
        sparse_inputs = ToSparse(ignore_value=-1)(inputs)
        model = tf.keras.Model(inputs=inputs, outputs=sparse_inputs)
        out = model.call(tf.constant([[1], [-1], [2], [3]]))

        expect_out = tf.SparseTensor(
            indices=tf.constant([[0, 0], [2, 0], [3, 0]], dtype=tf.int64),
            values=tf.constant([1, 2, 3], dtype=tf.int32),
            dense_shape=(4, 1),
        )
        self.assertTrue(sparse_tensor_equal(out, expect_out))
Esempio n. 3
0
    def test_to_sparse(self):
        layer = ToSparse()
        inp = tf.constant([["A", ""], ["B", "C"]], tf.string)
        output = layer.call(inp)
        expected_out = tf.SparseTensor(
            indices=np.array([[0, 0], [1, 0], [1, 1]]),
            values=np.array(["A", "B", "C"]),
            dense_shape=(2, 2),
        )
        self.assertTrue(sparse_tensor_equal(output, expected_out))

        layer = ToSparse()
        inp = tf.constant([[12, -1], [45, 78]], tf.int64)
        output = layer.call(inp)
        expected_out = tf.SparseTensor(
            indices=np.array([[0, 0], [1, 0], [1, 1]]),
            values=np.array([12, 45, 78]),
            dense_shape=(2, 2),
        )
        self.assertTrue(sparse_tensor_equal(output, expected_out))
def transform_from_code_gen(source_inputs):
    inputs = source_inputs.copy()

    education_hash_out = Hashing(education_hash.hash_bucket_size)(
        ToSparse()(inputs["education"])
    )
    occupation_hash_out = Hashing(occupation_hash.hash_bucket_size)(
        ToSparse()(inputs["occupation"])
    )
    native_country_hash_out = Hashing(native_country_hash.hash_bucket_size)(
        ToSparse()(inputs["native_country"])
    )
    workclass_lookup_out = IndexLookup(workclass_lookup.vocabulary_list)(
        ToSparse()(inputs["workclass"])
    )
    marital_status_lookup_out = IndexLookup(
        marital_status_lookup.vocabulary_list
    )(ToSparse()(inputs["marital_status"]))
    relationship_lookup_out = IndexLookup(relationship_lookup.vocabulary_list)(
        ToSparse()(inputs["relationship"])
    )
    race_lookup_out = IndexLookup(race_lookup.vocabulary_list)(
        ToSparse()(inputs["race"])
    )
    sex_lookup_out = IndexLookup(sex_lookup.vocabulary_list)(
        ToSparse()(inputs["sex"])
    )
    age_bucketize_out = Discretization(age_bucketize.boundaries)(
        ToSparse()(inputs["age"])
    )
    capital_gain_bucketize_out = Discretization(
        capital_gain_bucketize.boundaries
    )(ToSparse()(inputs["capital_gain"]))
    capital_loss_bucketize_out = Discretization(
        capital_loss_bucketize.boundaries
    )(ToSparse()(inputs["capital_loss"]))
    hours_per_week_bucketize_out = Discretization(
        hours_per_week_bucketize.boundaries
    )(ToSparse()(inputs["hours_per_week"]))

    group1_out = ConcatenateWithOffset(group1.id_offsets)(
        [
            workclass_lookup_out,
            hours_per_week_bucketize_out,
            capital_gain_bucketize_out,
            capital_loss_bucketize_out,
        ]
    )
    group2_out = ConcatenateWithOffset(group2.id_offsets)(
        [
            education_hash_out,
            marital_status_lookup_out,
            relationship_lookup_out,
            occupation_hash_out,
        ]
    )
    group3_out = ConcatenateWithOffset(group3.id_offsets)(
        [
            age_bucketize_out,
            sex_lookup_out,
            race_lookup_out,
            native_country_hash_out,
        ]
    )

    group1_embedding_wide_out = SparseEmbedding(
        input_dim=group1_embedding_wide.input_dim,
        output_dim=group1_embedding_wide.output_dim,
    )(group1_out)
    group2_embedding_wide_out = SparseEmbedding(
        input_dim=group2_embedding_wide.input_dim,
        output_dim=group2_embedding_wide.output_dim,
    )(group2_out)

    group1_embedding_deep_out = SparseEmbedding(
        input_dim=group1_embedding_deep.input_dim,
        output_dim=group1_embedding_deep.output_dim,
    )(group1_out)
    group2_embedding_deep_out = SparseEmbedding(
        input_dim=group2_embedding_deep.input_dim,
        output_dim=group2_embedding_deep.output_dim,
    )(group2_out)
    group3_embedding_deep_out = SparseEmbedding(
        input_dim=group3_embedding_deep.input_dim,
        output_dim=group3_embedding_deep.output_dim,
    )(group3_out)

    wide_embeddings_out = [
        group1_embedding_wide_out,
        group2_embedding_wide_out,
    ]

    deep_embeddings_out = [
        group1_embedding_deep_out,
        group2_embedding_deep_out,
        group3_embedding_deep_out,
    ]

    return wide_embeddings_out, deep_embeddings_out