예제 #1
0
    def build_model(self, is_training=True):
        columns = get_feature_columns()
        item_columns = fc.input_layer(self.other_features, columns['item'])
        user_columns = fc.input_layer(self.other_features, columns['user'])
        # 查找同ID的用户聚类特征
        self.user_findid = self.table1.lookup(
            self.other_features['FEA_CtxUid'])
        self.user_embedding = tf.gather(self.user_feature, self.user_findid)

        # 用户四种关系分离并进行attention
        user_split = tf.split(self.user_embedding, 4, 1)
        rela = tf.stack(user_split, 1)
        rela_conv = tf.layers.conv1d(rela, 1, 1)
        coef = tf.expand_dims(user_columns, 1)
        coef = tf.layers.conv1d(coef, 1, 1)
        out = tf.multiply(coef, rela_conv)
        coefs = tf.nn.softmax(tf.nn.tanh(out), 1)
        res = tf.multiply(coefs, rela)
        res = tf.reduce_sum(res, 1)

        item = self.cus_nn(item_columns, None, [
                           EMBEDDING_NUM * 2, EMBEDDING_NUM], is_training)
        norm_item = tf.sqrt(tf.reduce_sum(tf.square(item), 1, True))
        item_emb = tf.truediv(item, norm_item)
        # for column in sorted(params['user_columns'], key=lambda x: x.name):
        #     print(column.name)

        # 产出item embedding
        if args.mode == 'sample':
            self.prediction = {
                'vid': self.other_features['FEA_SrcItemId'],
                'item': item_emb,
            }
        else:
            user = self.cus_nn(user_columns, None, [
                               EMBEDDING_NUM * 4, EMBEDDING_NUM * 2, EMBEDDING_NUM], is_training)

            # attenton embedding拼接后送入全连接层
            user_columns_out = tf.concat([user_columns, res], axis=1)
            regularizer = tf.contrib.layers.l2_regularizer(scale=0.1)
            user = tf.layers.dense(
                inputs=user_columns_out,
                units=EMBEDDING_NUM,
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                activation=partial(tf.nn.leaky_relu, alpha=0.2),
                use_bias=True,
                kernel_regularizer=regularizer,
            )
            # 残差连接
            # user = user + net
            # user =  tf.concat([res, user],axis=1)
            # user = self.cus_nn(user, None, [EMBEDDING_NUM], is_training)

            norm_user = tf.sqrt(tf.reduce_sum(tf.square(user), 1, True))
            user_emb = tf.truediv(user, norm_user)
            self.cos_sim_raw = tf.reduce_sum(
                tf.multiply(user_emb, item_emb), 1, True)

            self.prob = tf.nn.sigmoid(self.cos_sim_raw)
예제 #2
0
def test_bucketized_column():
    sample = {
        'price': [[5.], [16], [25], [36]],
        'time': [[2.], [6], [8], [15]]
    }
    price_column = feature_column.numeric_column('price')
    bucket_price = feature_column.bucketized_column(price_column,
                                                    [10, 20, 30, 40])
    price_bucket_tensor = feature_column.input_layer(sample, [bucket_price])

    time_column = feature_column.numeric_column('time')
    bucket_time = feature_column.bucketized_column(time_column, [5, 10, 12])
    time_bucket_tensor = feature_column.input_layer(sample, [bucket_time])
    with tf.Session() as session:
        print(session.run([price_bucket_tensor, time_bucket_tensor]))
def test_categorical_column_with_hash_bucket():
    #源数据
    color_data = {'color': [[2], [5], [-1], [0]]}  # 4行样本 shape=[4,1]
    builder = _LazyBuilder(color_data)

    # categorical_column
    color_column = feature_column.categorical_column_with_hash_bucket(
        'color', 7, dtype=tf.int32)

    # tensor
    color_column_tensor = color_column._get_sparse_tensors(builder)  #稀疏表示
    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print(session.run([color_column_tensor.id_tensor]))

    # 通过indicator_column,将稀疏的转换成dense,也就是one-hot形式,只是multi-hot
    color_column_identy = feature_column.indicator_column(color_column)

    #input_layer连接数据源和声明的column生成新的tensor
    color_dense_tensor = feature_column.input_layer(color_data,
                                                    [color_column_identy])

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('use input_layer' + '_' * 40)
        print(session.run([color_dense_tensor]))
def test_crossed_column():
    """ crossed column测试 """
    #源数据
    featrues = {
        'price': [['A'], ['B'], ['C']],  # 0,1,2
        'color': [['R'], ['G'], ['B']]  # 0,1,2
    }
    # categorical_column
    price = feature_column.categorical_column_with_vocabulary_list(
        'price', ['A', 'B', 'C', 'D'])
    color = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'])

    #crossed_column 产生稀疏表示
    p_x_c = feature_column.crossed_column([price, color], 16)

    # 稠密表示
    p_x_c_identy = feature_column.indicator_column(p_x_c)

    # crossed_column 连接 源数据
    p_x_c_identy_dense_tensor = feature_column.input_layer(
        featrues, [p_x_c_identy])

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print(session.run([p_x_c_identy_dense_tensor]))
예제 #5
0
def build_mode_norm_test(features, mode, params):
    # Build the hidden layers, sized according to the 'hidden_units' param.
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    fea_net = fc.input_layer(features, params['feature_columns'])
    fea_net = tf.layers.batch_normalization(fea_net, training=is_training)

    #x1 = tf.layers.dense(fea_net, units=256, activation=None, use_bias=False)
    #hidden1 = tf.nn.relu(tf.layers.batch_normalization(x1, training=is_training), name='hidden1')

    #x2 = tf.layers.dense(hidden1, units=128, activation=None, use_bias=False)
    #hidden2 = tf.nn.relu(tf.layers.batch_normalization(x2, training=is_training), name='hidden2')

    #net = tf.layers.dense(hidden2, units=64, activation=tf.tanh, name='user_vector_layer')

    hidden1 = tf.layers.dense(fea_net,
                              units=128,
                              activation=tf.nn.relu,
                              name='hidden1')
    #hidden2 = tf.layers.dense(hidden1, units=128, activation=tf.nn.relu, name='hidden2')
    net = tf.layers.dense(hidden1,
                          units=64,
                          activation=tf.nn.relu,
                          name='user_vector_layer')

    return net
예제 #6
0
def build_mode_norm(features, mode, params):
    # Build the hidden layers, sized according to the 'hidden_units' param.
    use_batch_norm = params['use_batch_norm']
    is_training = mode == tf.estimator.ModeKeys.TRAIN
    net = fc.input_layer(features, params['feature_columns'])
    if use_batch_norm:
        net = tf.layers.batch_normalization(net, training=is_training)

    for units in params['hidden_units']:
        if use_batch_norm:
            x = tf.layers.dense(net,
                                units=units,
                                activation=None,
                                use_bias=False)
            net = tf.nn.relu(
                tf.layers.batch_normalization(x, training=is_training))
        else:
            net = tf.layers.dense(net, units=units, activation=tf.nn.relu)

    if use_batch_norm:
        x = tf.layers.dense(net,
                            units=params['last_hidden_units'],
                            activation=None,
                            use_bias=False)
        net = tf.nn.elu(tf.layers.batch_normalization(x, training=is_training),
                        name='user_vector_layer')
    else:
        net = tf.layers.dense(net,
                              units=params['last_hidden_units'],
                              activation=tf.nn.relu,
                              name='user_vector_layer')

    return net
def test_embedding():
    tf.set_random_seed(1)
    #源数据
    color_data = {
        'color': [['R', 'G'], ['G', 'A'], ['B', 'B'], ['A', 'A']]
    }  # 4行样本
    builder = _LazyBuilder(color_data)

    # categorical_column  要想转为 embedding 先将源数据的clomn表达为categorical_column 这里只是声明没有源数据
    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1)
    # tensor 数据源  将数据源表达成tensor
    color_column_tensor = color_column._get_sparse_tensors(builder)

    #获取embedding_column; 第一个参数是:categorical_column;  第二个参数是维度
    color_embedding_column = feature_column.embedding_column(color_column,
                                                             4,
                                                             combiner='sum')

    # 转化为tensor  input_layer(数据源,column)  连接起数据源和embedding_column
    color_embeding_dense_tensor = feature_column.input_layer(
        color_data, [color_embedding_column])

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print(session.run([color_column_tensor.id_tensor]))
        print('embeding' + '_' * 40)
        print(session.run([color_embeding_dense_tensor]))
예제 #8
0
def test_categorical_column_with_hash_bucket():
    # 1. Input features
    color_data = {'color': [[2], [5], [-1], [0]]}
    builder = _LazyBuilder(color_data)
    # 2. Feature columns (Sparse)
    color_column = feature_column.categorical_column_with_hash_bucket(
        'color', 7, dtype=tf.int32)
    color_column_tensor = color_column._get_sparse_tensors(builder)
    with tf.Session() as session:
        #session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print(session.run([color_column_tensor.id_tensor]))

    # 2. Feature columns (Dense)
    # Convert the Categorical Column to Dense Column
    color_column_identity = feature_column.indicator_column(color_column)
    # 3. Feature tensor
    color_dense_tensor = feature_column.input_layer(color_data,
                                                    [color_column_identity])

    with tf.Session() as session:
        #session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('use input_layer' + '_' * 40)
        print(session.run([color_dense_tensor]))
def my_model(features, labels, mode, params):
    net = fc.input_layer(features, params['feature_columns'])
    # Build the hidden layers, sized according to the 'hidden_units' param.
    for units in params['hidden_units']:
        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
        if 'dropout_rate' in params and params['dropout_rate'] > 0.0:
            net = tf.layers.dropout(
                net,
                params['dropout_rate'],
                training=(mode == tf.estimator.ModeKeys.TRAIN))
    my_head = tf.contrib.estimator.binary_classification_head(thresholds=[0.5])
    # Compute logits (1 per class).
    logits = tf.layers.dense(net,
                             my_head.logits_dimension,
                             activation=None,
                             name="my_model_output_logits")
    optimizer = tf.train.AdagradOptimizer(
        learning_rate=params['learning_rate'])

    def _train_op_fn(loss):
        return optimizer.minimize(loss, global_step=tf.train.get_global_step())

    return my_head.create_estimator_spec(features=features,
                                         mode=mode,
                                         labels=labels,
                                         logits=logits,
                                         train_op_fn=_train_op_fn)
예제 #10
0
def test_weighted_categorical_column():
    # 1. Input features
    color_data = {
        'color': [['R'], ['G'], ['B'], ['A']],
        'weight': [[1.0], [2.0], [4.0], [8.0]]
    }
    # 2. Feature columns (Sparse)
    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1)
    # 2. Feature columns (Sparse)
    color_weight_categorical_column \
        = feature_column.weighted_categorical_column(color_column, 'weight')
    builder = _LazyBuilder(color_data)
    id_tensor, weight = color_weight_categorical_column._get_sparse_tensors(
        builder)

    with tf.Session() as session:
        #session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('weighted categorical' + '-' * 40)
        print(session.run([id_tensor]))
        print('-' * 40)
        print(session.run([weight]))

    # 2. Feature columns (Dense)
    weighted_column = feature_column.indicator_column(
        color_weight_categorical_column)
    # 3. Feature tensor
    weighted_column_dense_tensor = feature_column.input_layer(
        color_data, [weighted_column])
    with tf.Session() as session:
        #session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('use input_layer' + '_' * 40)
        print(session.run([weighted_column_dense_tensor]))
예제 #11
0
def test_shared_embedding_column_with_hash_bucket():
    # 1. Input features
    color_data = {
        'range': [[2, 2], [5, 5], [0, -1], [0, 0]],
        'id': [[2], [5], [-1], [0]]
    }
    builder = _LazyBuilder(color_data)
    # 2. Feature columns (Sparse)
    color_column = feature_column.categorical_column_with_hash_bucket(
        'range', 7, dtype=tf.int32)
    color_column_tensor = color_column._get_sparse_tensors(builder)
    # 2. Feature columns (Sparse)
    color_column2 = feature_column.categorical_column_with_hash_bucket(
        'id', 7, dtype=tf.int32)
    color_column_tensor2 = color_column2._get_sparse_tensors(builder)
    with tf.Session() as session:
        #session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('not use input_layer' + '_' * 40)
        print(session.run([color_column_tensor.id_tensor]))
        print(session.run([color_column_tensor2.id_tensor]))

    # 2. Feature columns (Dense)
    color_column_embed = feature_column.shared_embedding_columns(
        [color_column2, color_column], 3, combiner='sum')
    print(type(color_column_embed))
    # 3. Feature tensor
    color_dense_tensor = feature_column.input_layer(color_data,
                                                    color_column_embed)

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('use input_layer' + '_' * 40)
        print(session.run(color_dense_tensor))
예제 #12
0
def test_embedding():
    tf.set_random_seed(1)
    # 1. Input features
    color_data = {'color': [['R', 'G'], ['G', 'A'], ['B', 'B'], ['A', 'A']]}
    builder = _LazyBuilder(color_data)
    # 2. Feature columns (Sparse)
    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1)
    color_column_tensor = color_column._get_sparse_tensors(builder)
    with tf.Session() as session:
        #session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print(session.run([color_column_tensor.id_tensor]))

    # 2. Feature columns (Dense)
    color_embedding = feature_column.embedding_column(color_column,
                                                      4,
                                                      combiner='sum')
    # 3. Feature tensor
    color_embedding_dense_tensor = feature_column.input_layer(
        color_data, [color_embedding])

    with tf.Session() as session:
        # Embedding needs variables (weights) to do the embedding
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('embedding' + '_' * 40)
        print(session.run([color_embedding_dense_tensor]))
예제 #13
0
def test_categorical_column_with_vocabulary_list():

    color_data = {
        'color': [['R', 'R'], ['G', 'R'], ['B', 'G'], ['A', 'A']]
    }  # 4行样本

    builder = _LazyBuilder(color_data)

    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1)

    color_column_tensor = color_column._get_sparse_tensors(builder)

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())

        session.run(tf.tables_initializer())

        print(session.run([color_column_tensor.id_tensor]))

    # 将稀疏的转换成dense,也就是one-hot形式,只是multi-hot
    color_column_identy = feature_column.indicator_column(color_column)

    color_dense_tensor = feature_column.input_layer(color_data,
                                                    [color_column_identy])

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())

        session.run(tf.tables_initializer())

        print('use input_layer' + '_' * 40)
        print(session.run([color_dense_tensor]))
예제 #14
0
def test_multi_value_embedding():
    color_data = {
        'color': [['G', 'G'], ['G', 'B'], ['B', 'B'], ['G', 'R'], ['R', 'R'],
                  ['B', 'R']]
    }

    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1)

    color_embeding = feature_column.embedding_column(color_column, 7)
    color_embeding_dense_tensor = feature_column.input_layer(
        color_data, [color_embeding])
    builder = _LazyBuilder(color_data)
    color_column_tensor = color_column._get_sparse_tensors(builder)

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print(session.run([color_column_tensor.id_tensor]))

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('embeding' + '-' * 40)
        print(session.run([color_embeding_dense_tensor]))
예제 #15
0
def test_weighted_cate_column():
    # !!! id=''代表missing,其对应的weight只能为0,否则会导致id和weight长度不一致而报错
    # !!! 而且weight必须是float型,输入int会报错
    x_values = {
        'id': [[b'a', b'z', b'a', b'c'], [b'b', b'', b'd', b'b']],
        'weight': [[1.0, 2.0, -3.0, 4.0], [5.0, 0.0, 7.0, -8.0]]
    }
    builder = _LazyBuilder(x_values)  # lazy representation of input

    # ================== define ops
    sparse_id_featcol = feature_column.categorical_column_with_vocabulary_list(
        'id', ['a', 'b', 'c'], dtype=tf.string, default_value=-1)
    sparse_featcol = feature_column.weighted_categorical_column(
        categorical_column=sparse_id_featcol, weight_feature_key='weight')
    x_sparse_tensor = sparse_featcol._get_sparse_tensors(builder)

    # indicator_column将sparse tensor转换成dense MHE格式, shape=[batch_size, #tokens]
    # 其中的权重是这个token出现的所有权重的总和
    dense_featcol = feature_column.indicator_column(sparse_featcol)
    x_dense_tensor = feature_column.input_layer(x_values, [dense_featcol])

    # ================== run
    with tf.Session() as sess:
        # 必须initialize table,否则报错
        sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())

        id_sparse_value, weight_sparse_value = sess.run(
            [x_sparse_tensor.id_tensor, x_sparse_tensor.weight_tensor])

        print("************************* sparse id tensor")
        # sparse tensor's id_tensor保持与原始输入相同的形状,[batch_size, max_tokens_per_example]=[2,4]
        # SparseTensorValue(indices=array(
        #       [[0, 0],
        #        [0, 1],
        #        [0, 2],
        #        [0, 3],
        #        [1, 0],
        #        [1, 2],
        #        [1, 3]]), values=array([ 0, -1,  0,  2,  1, -1,  1]), dense_shape=array([2, 4]))
        print(id_sparse_value)

        print("************************* sparse weight tensor")
        # sparse tensor's weight_tensor保持与原始输入相同的形状,[batch_size, max_tokens_per_example]=[2,4]
        # SparseTensorValue(indices=array(
        #       [[0, 0],
        #        [0, 1],
        #        [0, 2],
        #        [0, 3],
        #        [1, 0],
        #        [1, 2],
        #        [1, 3]]), values=array([ 1.,  2., -3.,  4.,  5.,  7., -8.], dtype=float32), dense_shape=array([2, 4]))
        print(weight_sparse_value)

        print("************************* dense MHE tensor")
        # indicator_column将sparse tensor按照MHE的方式转化成dense tensor,shape=[batch_size, total_tokens_in_vocab]
        # 其中的每个数值是该token出现的所有权重的总和
        # [[-2.  0.  4.]
        #  [ 0. -3.  0.]]
        print(sess.run(x_dense_tensor))
예제 #16
0
def test_shared_embedding_column_with_hash_bucket():
    color_data = {
        'color': [[2, 2], [5, 5], [0, -1], [0, 0]],
        'color2': [[2], [5], [-1], [0]]
    }  # 4行样本
    builder = _LazyBuilder(color_data)
    color_column = feature_column.categorical_column_with_hash_bucket(
        'color', 7, dtype=tf.int32)
    color_column_tensor = color_column._get_sparse_tensors(builder)
    color_column2 = feature_column.categorical_column_with_hash_bucket(
        'color2', 7, dtype=tf.int32)
    color_column_tensor2 = color_column2._get_sparse_tensors(builder)
    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('not use input_layer' + '_' * 40)
        print(session.run([color_column_tensor.id_tensor]))
        print(session.run([color_column_tensor2.id_tensor]))

    # 将稀疏的转换成dense,也就是one-hot形式,只是multi-hot
    color_column_embed = feature_column.shared_embedding_columns(
        [color_column2, color_column], 3, combiner='sum')
    print(type(color_column_embed))
    color_dense_tensor = feature_column.input_layer(color_data,
                                                    color_column_embed)

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('use input_layer' + '_' * 40)
        print(session.run(color_dense_tensor))
예제 #17
0
def pratise():
    d = {'x': [[32], [16], [38], [98]]}
    cd = feature_column.numeric_column('x')
    bcd = feature_column.bucketized_column(cd, [10, 20, 40, 60])
    fcd = feature_column.input_layer(d, [bcd])

    with tf.Session() as sess:
        print(sess.run(fcd))
def test_bucketized_column():
    price = {'price': [[5.], [15.], [25.], [35.]]}  # 4行样本 shape =[4,1]
    price_column = feature_column.numeric_column('price')
    bucket_price = feature_column.bucketized_column(price_column,
                                                    [10, 20, 30, 40])
    price_bucket_tensor = feature_column.input_layer(price, [bucket_price])
    with tf.Session() as session:
        print(session.run([price_bucket_tensor]))
예제 #19
0
def test_weighted_categorical_feature_embedding():
    color_data = {
        'color': [['R', 'R'], ['G', 'G'], ['B', 'B'], ['G', 'R'], ['G', 'B'],
                  ['B', 'R']],
        'weight': [[0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.3, 0.2], [0.4, 0.3],
                   [0.4, 0.6]]
    }  # 6行样本

    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1)

    color_embeding = feature_column.embedding_column(color_column,
                                                     7,
                                                     combiner="sum")
    color_embeding_dense_tensor = feature_column.input_layer(
        color_data, [color_embeding])

    color_weight_categorical_column = feature_column.weighted_categorical_column(
        color_column, 'weight')
    color_embeding_weighted = feature_column.embedding_column(
        color_weight_categorical_column, 7, combiner="sum")
    color_embeding_dense_tensor_2 = feature_column.input_layer(
        color_data, [color_embeding_weighted])

    builder = _LazyBuilder(color_data)
    color_column_tensor = color_column._get_sparse_tensors(builder)
    color_weighted_tensor = color_weight_categorical_column._get_sparse_tensors(
        builder)  ## is a pair (id_tensor, weight_tensor)

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print(session.run([color_column_tensor.id_tensor]))
        print("color column weight:")
        print(color_column_tensor.weight_tensor)
        print("color column weighted categorical,  weight:")
        print(session.run([color_weighted_tensor.id_tensor]))
        print(session.run([color_weighted_tensor.weight_tensor]))

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        print('embeding' + '-' * 40)
        print(session.run([color_embeding_dense_tensor]))
        print('embeding weighted categorical column')
        print(session.run([color_embeding_dense_tensor_2]))
예제 #20
0
파일: model.py 프로젝트: yujun001/tf-ncf
def build_item_model(features, mode, params):
    with tf.variable_scope("item_side",
                           partitioner=tf.fixed_size_partitioner(len(
                               FLAGS.ps_hosts.split(",")),
                                                                 axis=0)):
        item_uuid_embed = fc.input_layer(
            features, params["feature_configs"].all_columns["itemID"])
        item_dense = tf.nn.l2_normalize(item_uuid_embed)
        return item_dense
예제 #21
0
def build_mode(features, mode, params, columns):
    # import pdb;pdb.set_trace()
    net = fc.input_layer(features, columns)
    # Build the hidden layers, sized according to the 'hidden_units' param.
    for units in params['hidden_units']:
        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
        if 'dropout_rate' in params and params['dropout_rate'] > 0.0:
          net = tf.layers.dropout(net, params['dropout_rate'], training=(mode == tf.estimator.ModeKeys.TRAIN))
    return net
예제 #22
0
def dupn_model_fn(features, labels, mode, params):
    behvr_emb, property_emb, item_emb = get_behavior_embedding(
        params, features)
    print("behvr_emb shape:", behvr_emb.shape)
    print("property_emb shape:", property_emb.shape)
    print("item_emb shape:", item_emb.shape)

    inputs = tf.concat([behvr_emb, property_emb], -1)
    print("lstm inputs shape:", inputs.shape)
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=params["num_units"])
    #initial_state = lstm_cell.zero_state(params["batch_size"], tf.float32)
    outputs, state = tf.nn.dynamic_rnn(lstm_cell, inputs, dtype=tf.float32)
    print("lstm output shape:", outputs.shape)

    masks = tf.cast(features["behaviorPids"] >= 0, tf.float32)
    user = fc.input_layer(features, params["user_feature_columns"])
    context = tf.concat([user, item_emb], -1)
    print("attention context shape:", context.shape)
    sequence = attention(outputs, context, params, masks)
    print("sequence embedding shape:", sequence.shape)

    other = fc.input_layer(features, params["other_feature_columns"])
    net = tf.concat([sequence, item_emb, other], -1)
    # Build the hidden layers, sized according to the 'hidden_units' param.
    for units in params['hidden_units']:
        net = tf.layers.dense(net, units=int(units), activation=tf.nn.relu)
        if 'dropout_rate' in params and params['dropout_rate'] > 0.0:
            net = tf.layers.dropout(
                net,
                params['dropout_rate'],
                training=(mode == tf.estimator.ModeKeys.TRAIN))
    # Compute logits
    logits = tf.layers.dense(net, 1, activation=None)

    optimizer = optimizers.get_optimizer_instance(params["optimizer"],
                                                  params["learning_rate"])
    my_head = tf.contrib.estimator.binary_classification_head(thresholds=[0.5])
    return my_head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        logits=logits,
        train_op_fn=lambda loss: optimizer.minimize(
            loss, global_step=tf.train.get_global_step()))
예제 #23
0
파일: esmm.py 프로젝트: lh0730/esmm-1
def build_mode(features, mode, params):
  net = fc.input_layer(features, params['feature_columns'])
  # Build the hidden layers, sized according to the 'hidden_units' param.
  for units in params['hidden_units']:
    net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
    if 'dropout_rate' in params and params['dropout_rate'] > 0.0:
      net = tf.layers.dropout(net, params['dropout_rate'], training=(mode == tf.estimator.ModeKeys.TRAIN))
  # Compute logits
  logits = tf.layers.dense(net, 1, activation=None)
  return logits
def practise():
    fx = {'x': [['a', 'a'], ['b', 'c'], ['c', 'e'], ['d', ''], ['e', 'f']]}
    fc = feature_column.categorical_column_with_hash_bucket('x', 5)
    fic = feature_column.indicator_column(fc)
    t2 = fc._get_sparse_tensors(_LazyBuilder(fx)).id_tensor
    tsor = feature_column.input_layer(fx, fic)

    with tf.Session() as sess:
        print(sess.run(t2))
        print(sess.run(tsor))
예제 #25
0
def test_identity_feature_column():
    sample = {'price': [[1], [2], [3], [0]]}
    # price_column = feature_column.numeric_column('price')
    price_column = feature_column.categorical_column_with_identity(
        key='price', num_buckets=4)
    indicator = feature_column.indicator_column(price_column)
    price_column_tensor = feature_column.input_layer(sample, [indicator])

    with tf.Session() as session:
        print(session.run([price_column_tensor]))
예제 #26
0
  def inference(self, feats):
    # embedding_columns, order_columns, spacetime_columns, user_columns = make_columns()
    embedding_columns, order_columns, spacetime_columns, user_columns = make_columns_with_normalizer()

    # TODO: feats 按columns拆分
    with tf.name_scope('embedding_columns'):
      embedding_tensor = fc.input_layer(feats, embedding_columns)
    with tf.name_scope('order_columns'):
      order_tensor = fc.input_layer(feats, order_columns)
    with tf.name_scope('spacetime_columns'):
      spacetime_tensor = fc.input_layer(feats, spacetime_columns)

    input_tensor = tf.concat([embedding_tensor, order_tensor, spacetime_tensor], axis=1, name='input_concat')

    eta_d = tf.concat([net('eta_d_%d'%ix, input_tensor, self.layer_units) for ix in range(self.d_k)], axis=1)
    eta_c = tf.concat([net('eta_c_%d'%ix, input_tensor, self.layer_units) for ix in range(self.c_k)], axis=1)

    d_softmax_logits = net('logits_d', input_tensor, self.layer_units, self.d_k)
    c_softmax_logits = net('logits_c', input_tensor, self.layer_units, self.c_k)

    return eta_d, eta_c, d_softmax_logits, c_softmax_logits
예제 #27
0
def test_bucketized_column():
    # 1. Input features
    price = {'price': [[15.], [5.], [35.], [25.]]}
    # 2. Feature columns (Dense)
    price_column = feature_column.numeric_column('price')
    # 2. Feature columns (Dense): bucketized_column is both Dense and
    # Categorical
    bucket_price = feature_column.bucketized_column(price_column, [10, 20, 30])
    # 3. Feature tensor
    price_bucket_tensor = feature_column.input_layer(price, [bucket_price])
    with tf.Session() as session:
        print(session.run([price_bucket_tensor]))
예제 #28
0
def test_cate_featcol_with_vocablist():
    # ================== prepare input
    # 1. 为什么要用b前缀,这是因为通过input_fn读进来的字符串都有b前缀
    # 而我要测试的是,如果我传入的vocab list是普通的str,能否与这些b匹配成功
    # 2. '' represents missing, feature_column treats them "ignored in sparse tensor, and 0 in dense tensor"
    # 3. 'z' represents OOV, feature_column treats them "-1 in sparse tensor, and 0 in dense tensor"
    # 4. duplicates should be merged in dense tensor by summing up their occurrence
    x_values = {'x': [[b'a', b'z', b'a', b'c'], [b'b', b'', b'd', b'b']]}
    builder = _LazyBuilder(x_values)  # lazy representation of input

    # ================== define ops
    sparse_featcol = feature_column.categorical_column_with_vocabulary_list(
        'x', ['a', 'b', 'c'], dtype=tf.string, default_value=-1)
    x_sparse_tensor = sparse_featcol._get_sparse_tensors(builder)
    # 尽管一行中有重复,但是并没有合并,所以压根就没有weight
    # 只是导致id_tensor中会出现重复数值而已,而导致embedding_lookup_sparse时出现累加
    assert x_sparse_tensor.weight_tensor is None

    # indicator_column将sparse tensor转换成dense MHE格式,注意第一行有重复,所以结果应该是multi-hot
    dense_featcol = feature_column.indicator_column(sparse_featcol)
    x_dense_tensor = feature_column.input_layer(x_values, [dense_featcol])

    # ================== run
    with tf.Session() as sess:
        # 必须initialize table,否则报错
        sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())

        print("************************* sparse tensor")
        # 结果证明:
        # 1. 输入数据用b前缀的字符串,能够匹配上vocab list中的str
        # 2. 注意第二行只有两个元素,''在sparse tensor中被忽略掉了
        # 3. 'z','d'代表oov,sparse tensor中被映射成-1
        # 4. sparse tensor的dense_shape与原始输入的shape相同
        # [SparseTensorValue(indices=array([[0, 0],
        #                                   [0, 1],
        #                                   [0, 2],
        #                                   [0, 3],
        #                                   [1, 0],
        #                                   [1, 2],
        #                                   [1, 3]]), values=array([0, -1, 0, 2, 1, -1, 1]), dense_shape=array([2, 4]))]
        print(sess.run([x_sparse_tensor.id_tensor]))

        print("************************* dense MHE tensor")
        # 结果证明:
        # 1. 在dense表示中,duplicates的出现次数被加和,使用MHE
        # 2. 无论是原始的missing(或许是由padding造成的),还是oov,在dense结果中都不出现
        # 3. dense_tensor的shape=[#batch_size, vocab_size]
        # [[2. 0. 1.]
        #  [0. 2. 0.]]
        print(sess.run(x_dense_tensor))
예제 #29
0
def build_model_net(features, mode, params):
    net = fc.input_layer(features, params['feature_columns'])
    # net = tf.layers.batch_normalization(net, training=(mode == tf.estimator.ModeKeys.TRAIN))
    # Build the hidden layers, sized according to the 'hidden_units' param.
    for units in params['hidden_units']:
        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
        if 'dropout_rate' in params and params['dropout_rate'] > 0.0:
            net = tf.layers.dropout(
                net,
                params['dropout_rate'],
                training=(mode == tf.estimator.ModeKeys.TRAIN))
            print("net node count", net.shape[-1].value)
    logits = tf.layers.dense(net, units=1)
    return logits
예제 #30
0
def emb():
    xb = {'x': [['a', 'b'], ['a', 'c'], ['b', 'c']]}
    x = {
        'x': [['a', 'b'], ['b', 'c'], ['c', ''], ['', '']]
    }  #可以是多值变量, 对于一篇文章而言是好的处理方式
    fx = feature_column.categorical_column_with_vocabulary_list(
        'x', ['a', 'b', 'c', 'd'], dtype=tf.string, default_value=0)
    fex = feature_column.embedding_column(fx, 4, 'mean')

    t = feature_column.input_layer(x, [fex])
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())
        print(sess.run(t))