Example #1
0
    def test_get_sparse_tensors(self, inputs_args, expected_args):
        inputs = sparse_tensor.SparseTensorValue(**inputs_args)
        expected = sparse_tensor.SparseTensorValue(**expected_args)
        column = sfc.sequence_categorical_column_with_hash_bucket(
            'aaa', hash_bucket_size=10)

        id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})

        self.assertIsNone(id_weight_pair.weight_tensor)
        _assert_sparse_tensor_indices_shape(
            self, expected, self.evaluate(id_weight_pair.id_tensor))
    def _build_feature_columns(self):
        col = fc.categorical_column_with_identity('int_ctx', num_buckets=100)
        ctx_cols = [
            fc.embedding_column(col, dimension=10),
            fc.numeric_column('float_ctx')
        ]

        identity_col = sfc.sequence_categorical_column_with_identity(
            'int_list', num_buckets=10)
        bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
            'bytes_list', hash_bucket_size=100)
        seq_cols = [
            fc.embedding_column(identity_col, dimension=10),
            fc.embedding_column(bucket_col, dimension=20)
        ]

        return ctx_cols, seq_cols
  def _build_feature_columns(self):
    col = fc.categorical_column_with_identity('int_ctx', num_buckets=100)
    ctx_cols = [
        fc.embedding_column(col, dimension=10),
        fc.numeric_column('float_ctx')
    ]

    identity_col = sfc.sequence_categorical_column_with_identity(
        'int_list', num_buckets=10)
    bucket_col = sfc.sequence_categorical_column_with_hash_bucket(
        'bytes_list', hash_bucket_size=100)
    seq_cols = [
        fc.embedding_column(identity_col, dimension=10),
        fc.embedding_column(bucket_col, dimension=20)
    ]

    return ctx_cols, seq_cols
Example #4
0
def _get_sequence_categorical_column(params: dict) -> fc.SequenceCategoricalColumn:
    key = params['key']
    if 'vocabulary' in params.keys():
        feature = sfc.sequence_categorical_column_with_vocabulary_list(key,
                                                                       vocabulary_list=_parse_vocabulary(
                                                                           params['vocabulary']),
                                                                       default_value=0)
    elif 'bucket_size' in params.keys():
        feature = sfc.sequence_categorical_column_with_hash_bucket(
            key, hash_bucket_size=params['bucket_size'])
    elif 'file' in params.keys():
        feature = sfc.sequence_categorical_column_with_vocabulary_file(key,
                                                                       vocabulary_file=params['file'],
                                                                       default_value=0)
    elif 'num_buckets' in params.keys():
        feature = sfc.sequence_categorical_column_with_identity(key,
                                                                num_buckets=params['num_buckets'])
    else:
        raise Exception("params error")

    return feature
Example #5
0
def embedding_varlen(batch_size, max_length):
    """Benchmark a variable-length embedding."""
    # Data and constants.

    num_buckets = 10000
    vocab = fc_bm.create_vocabulary(32768)
    data = fc_bm.create_string_data(max_length,
                                    batch_size * NUM_REPEATS,
                                    vocab,
                                    pct_oov=0.0)

    # Keras implementation
    model = keras.Sequential()
    model.add(keras.Input(shape=(max_length, ), name="data", dtype=dt.string))
    model.add(hashing.Hashing(num_buckets))

    # FC implementation
    fc = sfc.sequence_categorical_column_with_hash_bucket("data", num_buckets)

    # Wrap the FC implementation in a tf.function for a fair comparison
    @tf_function()
    def fc_fn(tensors):
        fc.transform_feature(fcv2.FeatureTransformationCache(tensors), None)

    # Benchmark runs
    keras_data = {
        "data": data.to_tensor(default_value="",
                               shape=(batch_size, max_length))
    }
    k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)

    fc_data = {
        "data": data.to_tensor(default_value="",
                               shape=(batch_size, max_length))
    }
    fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)

    return k_avg_time, fc_avg_time