Esempio n. 1
0
def embedding_varlen(batch_size, max_length):
    """Benchmark a variable-length embedding."""
    # Data and constants.
    embedding_size = 32768
    data = fc_bm.create_data(max_length,
                             batch_size * NUM_REPEATS,
                             embedding_size - 1,
                             dtype=int)

    # Keras implementation
    model = keras.Sequential()
    model.add(keras.Input(shape=(None, ), name="data", dtype=tf.int64))
    model.add(keras.layers.Embedding(embedding_size, 256))
    model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1)))

    # FC implementation
    fc = tf.feature_column.embedding_column(
        tf.feature_column.categorical_column_with_identity(
            "data", num_buckets=embedding_size - 1),
        dimension=256)

    # Wrap the FC implementation in a tf.function for a fair comparison
    @tf_function()
    def fc_fn(tensors):
        fc.transform_feature(fcv2.FeatureTransformationCache(tensors), None)

    # Benchmark runs
    keras_data = {"data": data.to_tensor(default_value=0)}
    k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)

    fc_data = {"data": data.to_tensor(default_value=0)}
    fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)

    return k_avg_time, fc_avg_time
def embedding_varlen(batch_size, max_length):
    """Benchmark a variable-length embedding."""
    # Data and constants.
    max_value = 25.0
    bins = np.arange(1.0, max_value)
    data = fc_bm.create_data(
        max_length, batch_size * NUM_REPEATS, 100000, dtype=float
    )

    # Keras implementation
    model = keras.Sequential()
    model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.float32))
    model.add(discretization.Discretization(bins))

    # FC implementation
    fc = tf.feature_column.bucketized_column(
        tf.feature_column.numeric_column("data"), boundaries=list(bins)
    )

    # Wrap the FC implementation in a tf.function for a fair comparison
    @tf_function()
    def fc_fn(tensors):
        fc.transform_feature(
            tf.__internal__.feature_column.FeatureTransformationCache(tensors),
            None,
        )

    # Benchmark runs
    keras_data = {"data": data.to_tensor(default_value=0.0)}
    k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)

    fc_data = {"data": data.to_tensor(default_value=0.0)}
    fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)

    return k_avg_time, fc_avg_time
def embedding_varlen(batch_size, max_length):
    """Benchmark a variable-length embedding."""
    # Data and constants.
    embedding_size = 32768
    data = fc_bm.create_data(max_length,
                             batch_size * NUM_REPEATS,
                             embedding_size - 1,
                             dtype=int)
    weight = tf.ones_like(data, dtype=tf.float32)

    # Keras implementation
    data_input = keras.Input(shape=(None, ),
                             ragged=True,
                             name="data",
                             dtype=tf.int64)
    weight_input = keras.Input(shape=(None, ),
                               ragged=True,
                               name="weight",
                               dtype=tf.float32)
    embedded_data = keras.layers.Embedding(embedding_size, 256)(data_input)
    weighted_embedding = tf.multiply(embedded_data,
                                     tf.expand_dims(weight_input, -1))
    reduced_embedding = tf.reduce_sum(weighted_embedding, axis=1)
    model = keras.Model([data_input, weight_input], reduced_embedding)

    # FC implementation
    fc = tf.feature_column.embedding_column(
        tf.feature_column.weighted_categorical_column(
            tf.feature_column.categorical_column_with_identity(
                "data", num_buckets=embedding_size - 1),
            weight_feature_key="weight",
        ),
        dimension=256,
    )

    # Wrap the FC implementation in a tf.function for a fair comparison
    @tf_function()
    def fc_fn(tensors):
        fc.transform_feature(
            tf.__internal__.feature_column.FeatureTransformationCache(tensors),
            None,
        )

    # Benchmark runs
    keras_data = {"data": data, "weight": weight}
    k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)

    fc_data = {"data": data.to_sparse(), "weight": weight.to_sparse()}
    fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)

    return k_avg_time, fc_avg_time