Example #1
0
def model_fn(labels, features, mode, params):
    tf.set_random_seed(2019)

    cont_feats = features["cont_feats"]
    cate_feats = features["cate_feats"]
    vector_feats = features["vector_feats"]

    single_cate_feats = cate_feats[:, 0:params.cate_field_size]
    multi_cate_feats = cate_feats[:, params.cate_field_size:params.cate_field_size + params.multi_feats_size]
    attention_cate_feats = cate_feats[:, params.cate_field_size + params.multi_feats_size:]

    # init_embedding
    feats_emb = my_layer.emb_init(name='feats_emb', feat_num=params.cate_feats_size, embedding_size=params.embedding_size)
    # single_category -> embedding
    single_cate_emb = tf.nn.embedding_lookup(feats_emb, ids=single_cate_feats)
    single_cate_emb = tf.reshape(single_cate_emb, shape=[-1, params.cate_field_size * params.embedding_size])
    # attention
    attention_emb = attention_alg(params, feats_emb, multi_cate_feats, single_cate_feats, attention_cate_feats)
    # multi_category -> embedding
    multi_cate_emb = my_layer.multi_cate_emb(params.multi_feats_range, feats_emb, multi_cate_feats)
    # deep input dense
    if len(params.multi_feats_range) > 0:
        dense = tf.concat([cont_feats, vector_feats, single_cate_emb, multi_cate_emb, attention_emb], axis=1, name='dense_vector')
    else:
        dense = tf.concat([cont_feats, vector_feats, single_cate_emb, attention_emb], axis=1, name='dense_vector')
    # deep
    len_layers = len(params.hidden_units)
    for i in range(0, len_layers):
        dense = tf.layers.dense(inputs=dense, units=params.hidden_units[i], activation=tf.nn.relu)
    out = tf.layers.dense(inputs=dense, units=1)
    score = tf.identity(tf.nn.sigmoid(out), name='score')
    model_estimator_spec = op.model_optimizer(params, mode, labels, score)
    return model_estimator_spec
def model_fn(features, labels, mode, params):
    tf.set_random_seed(2019)

    cate_feats = features["cate_feats"]
    vector_feats = features["vector_feats"]

    feats_emb = my_layer.emb_init(name='feats_emb',
                                  feat_num=params.cate_feats_size,
                                  embedding_size=params.embedding_size)
    # category -> Embedding
    cate_emb = tf.nn.embedding_lookup(feats_emb, ids=cate_feats)
    cate_emb = tf.reshape(
        cate_emb, shape=[-1, params.cate_field_size * params.embedding_size])

    # deep input dense
    dense = tf.concat([vector_feats, cate_emb], axis=1, name='dense_vector')

    # deep
    len_layers = len(params.hidden_units)
    for i in range(0, len_layers):
        dense = tf.layers.dense(inputs=dense,
                                units=params.hidden_units[i],
                                activation=tf.nn.relu)
    out = tf.layers.dense(inputs=dense, units=1)

    score = tf.identity(tf.nn.sigmoid(out), name='score')
    model_estimator_spec = op.model_optimizer(params, mode, labels, score)

    return model_estimator_spec
Example #3
0
def model_fn(labels, features, mode, params):
    tf.set_random_seed(2020)

    cont_feats = features["cont_feats"]
    cate_feats = features["cate_feats"]

    cont_feats_index = tf.Variable(
        [[i for i in range(params.cont_field_count)]],
        trainable=False,
        dtype=tf.int64,
        name="cont_feats_index")
    cont_feats_index = tf.add(cont_feats_index, params.cate_emb_space_size)

    feats_size = params.cont_field_count + params.cate_emb_space_size
    feats_emb = my_layer.emb_init(name='feats_emb',
                                  feat_num=feats_size,
                                  embedding_size=params.embedding_size)

    cont_emb = tf.nn.embedding_lookup(
        feats_emb, ids=cont_feats_index)  # None * F * embedding_size
    cont_value = tf.reshape(cont_feats, shape=[-1, params.cont_field_count, 1])
    embeddings = tf.multiply(cont_emb, cont_value)
    cate_emb = tf.nn.embedding_lookup(feats_emb, ids=cate_feats)
    embeddings = tf.concat([embeddings, cate_emb], axis=1)
    if params.multi_feats_type == 'dense':
        for name_topN in params.multi_cate_field_list:
            dense_embedding = tf.nn.embedding_lookup(
                feats_emb,
                ids=features[name_topN[0]])  # None, topN, embedding_size
            dense_embedding = tf.reduce_sum(dense_embedding, axis=1)  # None, 1
            dense_embedding = tf.reshape(dense_embedding,
                                         shape=[-1, 1, params.embedding_size])
            embeddings = tf.concat([embeddings, dense_embedding], axis=1)
    else:  # sparse
        for name_topN in params.multi_cate_field_list:
            sparse_embedding = tf.nn.embedding_lookup_sparse(
                feats_emb,
                sp_ids=features[name_topN[0]],
                sp_weights=None,
                combiner='sum')  # None * embedding_size
            sparse_embedding = tf.reshape(sparse_embedding,
                                          shape=[-1, 1, params.embedding_size])
            embeddings = tf.concat([embeddings, sparse_embedding], axis=1)

    deep_emb = tf.reshape(
        embeddings,
        shape=[-1, params.total_field_count * params.embedding_size])
    # deep
    len_layers = len(params.hidden_units)
    for i in range(0, len_layers):
        deep_emb = tf.layers.dense(inputs=deep_emb,
                                   units=params.hidden_units[i],
                                   activation=tf.nn.relu)
    out = tf.layers.dense(inputs=deep_emb, units=1)
    score = tf.identity(tf.nn.sigmoid(out), name='score')
    model_estimator_spec = op.model_optimizer(params, mode, labels, score)
    return model_estimator_spec
Example #4
0
def model_fn(labels, features, mode, params):
    tf.set_random_seed(2020)
    cont_feats = features["cont_feats"]
    cate_feats = features["cate_feats"]

    cont_feats_index = tf.Variable([[i for i in range(params.cont_field_count)]], trainable=False, dtype=tf.int64, name="cont_feats_index")
    cont_feats_index = tf.add(cont_feats_index, params.cate_emb_space_size)

    feats_size = params.cont_field_count + params.cate_emb_space_size
    feats_emb = my_layer.emb_init(name='feats_emb', feat_num=feats_size, embedding_size=params.embedding_size)

    # cont_feats
    cont_emb = tf.nn.embedding_lookup(feats_emb, ids=cont_feats_index)  # None * F * embedding_size
    cont_value = tf.reshape(cont_feats, shape=[-1, params.cont_field_count, 1])
    embeddings = tf.multiply(cont_emb, cont_value)
    # cate_feats
    cate_emb = tf.nn.embedding_lookup(feats_emb, ids=cate_feats)
    embeddings = tf.concat([embeddings, cate_emb], axis=1)
    # multi_cate_feats
    for name_topN in params.multi_cate_field_list:
        multi_cate_emb = tf.nn.embedding_lookup(feats_emb, ids=features[name_topN[0]])  # None, topN, embedding_size
        multi_cate_emb = tf.reduce_sum(multi_cate_emb, axis=1)  # None, embedding_size
        multi_cate_emb = tf.reshape(multi_cate_emb, shape=[-1, 1, params.embedding_size])
        embeddings = tf.concat([embeddings, multi_cate_emb], axis=1)
    # target-attention 1vN (e.g. item_cat1 & user_click_cat1)
    for k_v in params.target_att_1vN_list:
        item_feat = tf.split(cate_feats, params.cate_field_count, axis=1)[k_v[1]]
        user_feat = features[k_v[0]]
        nonzero_len = tf.count_nonzero(user_feat, axis=1)
        item_emb = tf.nn.embedding_lookup(feats_emb, ids=item_feat)  # [B, 1, H]
        item_emb = tf.reshape(item_emb, shape=[-1, params.embedding_size])  # [B, H]
        user_emb = tf.nn.embedding_lookup(feats_emb, ids=user_feat)  # [B, T, H])
        att_1vN_emb = my_layer.attention(item_emb, user_emb, nonzero_len)  # [B, 1, H]
        embeddings = tf.concat([embeddings, att_1vN_emb], axis=1)
    # target-attention NvN (e.g. item_tags & user_click_tags)
    for k_v in params.target_att_NvN_list:
        item_feat = features[k_v[1]]
        user_feat = features[k_v[0]]
        nonzero_len = tf.count_nonzero(user_feat, axis=1)
        item_emb = tf.nn.embedding_lookup(feats_emb, ids=item_feat)  # [B, N, H]
        user_emb = tf.nn.embedding_lookup(feats_emb, ids=user_feat)  # [B, T, H])
        att_NvN_emb = my_layer.attention_multi(item_emb, user_emb, nonzero_len)  # [B, 1, H]
        embeddings = tf.concat([embeddings, att_NvN_emb], axis=1)

    # deep
    embeddings = tf.layers.flatten(embeddings)
    len_layers = len(params.hidden_units)
    for i in range(0, len_layers):
        embeddings = tf.layers.dense(inputs=embeddings, units=params.hidden_units[i], activation=tf.nn.relu)
    out = tf.layers.dense(inputs=embeddings, units=1)
    score = tf.identity(tf.nn.sigmoid(out), name='score')
    model_estimator_spec = op.model_optimizer(params, mode, labels, score)
    return model_estimator_spec
def model_fn(labels, features, mode, params):
    tf.set_random_seed(2019)

    cont_feats = features["cont_feats"]
    cate_feats = features["cate_feats"]
    vector_feats = features["vector_feats"]

    single_cate_feats = cate_feats[:, 0:params.cate_field_size]
    # multi_cate_feats = cate_feats[:, params.cate_field_size:]

    feats_emb = my_layer.emb_init(name='feats_emb',
                                  feat_num=params.cate_feats_size,
                                  embedding_size=params.embedding_size)
    # single_category -> embedding
    cate_emb = tf.nn.embedding_lookup(feats_emb, ids=single_cate_feats)
    cate_emb = tf.reshape(
        cate_emb, shape=[-1, params.cate_field_size * params.embedding_size])
    # aid -> attention
    aid = vector_feats[:, -112:-96]
    user_click_aids = vector_feats[:, -96:]
    user_click_aids = tf.reshape(user_click_aids, shape=[-1, 6, 16])
    user_click_aids_len = tf.constant(
        6, shape=[user_click_aids.get_shape().as_list()[0]])
    attention_emb = attention(aid, user_click_aids, user_click_aids_len)
    attention_emb = tf.reshape(attention_emb, shape=[-1, 16])

    # deep input dense
    dense = tf.concat([cont_feats, vector_feats, cate_emb, attention_emb],
                      axis=1,
                      name='dense_vector')

    # deep
    len_layers = len(params.hidden_units)
    for i in range(0, len_layers):
        dense = tf.layers.dense(inputs=dense,
                                units=params.hidden_units[i],
                                activation=tf.nn.relu)
    out = tf.layers.dense(inputs=dense, units=1)
    score = tf.identity(tf.nn.sigmoid(out), name='score')
    model_estimator_spec = op.model_optimizer(params, mode, labels, score)
    return model_estimator_spec
def model_fn(features, labels, mode, params):
    use_deep = True
    use_fm = True
    tf.set_random_seed(2019)

    cate_feats = features["cate_feats"]
    vector_feats = features["vector_feats"]

    index_max_size = params.cont_field_size + params.cate_feats_size
    fm_first_order_emb = my_layer.emb_init(name='fm_first_order_emb', feat_num=index_max_size, embedding_size=1)
    feats_emb = my_layer.emb_init(name='feats_emb', feat_num=index_max_size, embedding_size=params.embedding_size)

    with tf.name_scope('fm_part'):
        input_field_size = params.cont_field_size + params.cate_field_size + params.multi_field_size

        # FM_first_order [?, input_field_size]
        # cate
        first_cate_emb = tf.nn.embedding_lookup(fm_first_order_emb, ids=cate_feats)
        first_cate_emb = tf.reshape(first_cate_emb, shape=[-1, params.cate_field_size])
        first_order = tf.nn.dropout(first_cate_emb, params.dropout_keep_fm[0])

        # FM_second_order [?, embedding_size]
        # cate
        second_order_emb = tf.nn.embedding_lookup(feats_emb, ids=cate_feats)
        sum_emb = tf.reduce_sum(second_order_emb, 1)
        sum_square_emb = tf.square(sum_emb)
        square_emb = tf.square(second_order_emb)
        square_sum_emb = tf.reduce_sum(square_emb, 1)
        second_order = 0.5 * tf.subtract(sum_square_emb, square_sum_emb)
        second_order = tf.nn.dropout(second_order, params.dropout_keep_fm[1])
        print("fm_second_order:", second_order)

        # FM_res [?, self.input_field_size + embedding_size]
        fm_res = tf.concat([first_order, second_order], axis=1)

    with tf.name_scope('deep_part'):
        # category -> Embedding
        cate_emb = tf.nn.embedding_lookup(feats_emb, ids=cate_feats)
        cate_emb = tf.reshape(cate_emb, shape=[-1, params.cate_field_size * params.embedding_size])
        # dense input
        deep_res = tf.concat([vector_feats, cate_emb], axis=1, name='dense_vector')
        # deep
        len_layers = len(params.hidden_units)
        for i in range(0, len_layers):
            deep_res = tf.layers.dense(inputs=deep_res, units=params.hidden_units[i], activation=tf.nn.relu)

    with tf.name_scope('deep_fm'):
        if use_fm and use_deep:
            feats_input = tf.concat([fm_res, deep_res], axis=1)
            feats_input_size = input_field_size + params.embedding_size + params.hidden_units[-1]
        elif use_fm:
            feats_input = fm_res
            feats_input_size = input_field_size + params.embedding_size
        elif use_deep:
            feats_input = deep_res
            feats_input_size = params.hidden_units[-1]

        glorot = np.sqrt(2.0 / (feats_input_size + 1))
        deep_fm_weight = tf.Variable(
            np.random.normal(loc=0, scale=glorot, size=(feats_input_size, 1)), dtype=np.float32)
        deep_fm_bias = tf.Variable(tf.random_normal([1]))

        out = tf.add(tf.matmul(feats_input, deep_fm_weight), deep_fm_bias)

    score = tf.identity(tf.nn.sigmoid(out), name='score')
    model_estimator_spec = op.model_optimizer(params, mode, labels, score)

    return model_estimator_spec
Example #7
0
def model_fn(labels, features, mode, params):
    use_deep = True
    use_fm = True
    tf.set_random_seed(2019)

    cont_feats = features["cont_feats"]
    cate_feats = features["cate_feats"]
    vector_feats = features["vector_feats"]

    single_cate_feats = cate_feats[:, 0:params.cate_field_size]
    multi_cate_feats = cate_feats[:, params.cate_field_size:]
    cont_feats_index = tf.Variable([[i for i in range(params.cont_field_size)]], trainable=False, dtype=tf.int64, name="cont_feats_index")

    index_max_size = params.cont_field_size + params.cate_feats_size
    fm_first_order_emb = my_layer.emb_init(name='fm_first_order_emb', feat_num=index_max_size, embedding_size=1)
    feats_emb = my_layer.emb_init(name='feats_emb', feat_num=index_max_size, embedding_size=params.embedding_size)

    with tf.name_scope('fm_part'):
        input_field_size = params.cont_field_size + params.cate_field_size + len(params.multi_feats_range)
        cont_index_add = tf.add(cont_feats_index, params.cate_feats_size)

        # FM_first_order [?, input_field_size]
        # cont
        first_cont_emb = tf.nn.embedding_lookup(fm_first_order_emb, ids=cont_index_add)
        first_cont_emb = tf.reshape(first_cont_emb, shape=[-1, params.cont_field_size])
        first_cont_mul = tf.multiply(first_cont_emb, cont_feats)
        # single_cate
        first_single_cate_emb = tf.nn.embedding_lookup(fm_first_order_emb, ids=single_cate_feats)
        first_single_cate_emb = tf.reshape(first_single_cate_emb, shape=[-1, params.cate_field_size])
        # multi_cate
        first_multi_cate_emb = my_layer.multi_cate_emb(params.multi_feats_range, fm_first_order_emb, multi_cate_feats)
        # concat cont & single_cate & multi_cate
        first_order_emb = tf.concat([first_cont_mul, first_single_cate_emb, first_multi_cate_emb], axis=1)
        first_order = tf.nn.dropout(first_order_emb, params.dropout_keep_fm[0])

        # FM_second_order [?, embedding_size]
        # cont
        second_cont_emb = tf.nn.embedding_lookup(feats_emb, ids=cont_index_add)
        second_cont_value = tf.reshape(cont_feats, shape=[-1, params.cont_field_size, 1])
        second_cont_emb = tf.multiply(second_cont_emb, second_cont_value)
        # single_cate
        second_single_cate_emb = tf.nn.embedding_lookup(feats_emb, ids=single_cate_feats)
        # multi_cate
        second_multi_cate_emb = my_layer.multi_cate_emb(params.multi_feats_range, feats_emb, multi_cate_feats)
        second_multi_cate_emb = tf.reshape(second_multi_cate_emb,
                                           shape=[-1, len(params.multi_feats_range), params.embedding_size])
        # concat cont & single_cate & multi_cate
        second_order_emb = tf.concat([second_cont_emb, second_single_cate_emb, second_multi_cate_emb], axis=1)

        sum_emb = tf.reduce_sum(second_order_emb, 1)
        sum_square_emb = tf.square(sum_emb)
        square_emb = tf.square(second_order_emb)
        square_sum_emb = tf.reduce_sum(square_emb, 1)
        second_order = 0.5 * tf.subtract(sum_square_emb, square_sum_emb)
        second_order = tf.nn.dropout(second_order, params.dropout_keep_fm[1])

        # FM_res [?, self.input_field_size + embedding_size]
        fm_res = tf.concat([first_order, second_order], axis=1)

    with tf.name_scope('deep_part'):
        # single_cate
        single_cate_emb = tf.nn.embedding_lookup(feats_emb, ids=single_cate_feats)
        single_cate_emb = tf.reshape(single_cate_emb, shape=[-1, params.cate_field_size * params.embedding_size])
        # multi_cate
        multi_cate_emb = my_layer.multi_cate_emb(params.multi_feats_range, feats_emb, multi_cate_feats)

        deep_res = tf.concat([cont_feats, vector_feats, single_cate_emb, multi_cate_emb], axis=1, name='dense_vector')
        # deep
        len_layers = len(params.hidden_units)
        for i in range(0, len_layers):
            deep_res = tf.layers.dense(inputs=deep_res, units=params.hidden_units[i], activation=tf.nn.relu)

    with tf.name_scope('deep_fm'):
        if use_fm and use_deep:
            feats_input = tf.concat([fm_res, deep_res], axis=1)
            feats_input_size = input_field_size + params.embedding_size + params.hidden_units[-1]
        elif use_fm:
            feats_input = fm_res
            feats_input_size = input_field_size + params.embedding_size
        elif use_deep:
            feats_input = deep_res
            feats_input_size = params.hidden_units[-1]

        glorot = np.sqrt(2.0 / (feats_input_size + 1))
        deep_fm_weight = tf.Variable(
            np.random.normal(loc=0, scale=glorot, size=(feats_input_size, 1)), dtype=np.float32)
        deep_fm_bias = tf.Variable(tf.random_normal([1]))

        out = tf.add(tf.matmul(feats_input, deep_fm_weight), deep_fm_bias)

    score = tf.identity(tf.nn.sigmoid(out), name='score')
    model_estimator_spec = op.model_optimizer(params, mode, labels, score)

    return model_estimator_spec
def model_fn(labels, features, mode, params):
    tf.set_random_seed(2019)

    cont_feats = features["cont_feats"]
    cate_feats = features["cate_feats"]
    vector_feats = features["vector_feats"]

    single_cate_feats = cate_feats[:, 0:params.cate_field_size]
    multi_cate_feats = cate_feats[:, params.cate_field_size:]
    cont_feats_index = tf.Variable([[i
                                     for i in range(params.cont_field_size)]],
                                   trainable=False,
                                   dtype=tf.int64,
                                   name="cont_feats_index")

    cont_index_add = tf.add(cont_feats_index, params.cate_feats_size)

    index_max_size = params.cont_field_size + params.cate_feats_size
    feats_emb = my_layer.emb_init(name='feats_emb',
                                  feat_num=index_max_size,
                                  embedding_size=params.embedding_size)

    # cont_feats -> Embedding
    with tf.name_scope("cont_feat_emb"):
        ori_cont_emb = tf.nn.embedding_lookup(feats_emb,
                                              ids=cont_index_add,
                                              name="ori_cont_emb")
        cont_value = tf.reshape(cont_feats,
                                shape=[-1, params.cont_field_size, 1],
                                name="cont_value")
        cont_emb = tf.multiply(ori_cont_emb, cont_value)
        cont_emb = tf.reshape(
            cont_emb,
            shape=[-1, params.cont_field_size * params.embedding_size],
            name="cont_emb")

    # single_category -> Embedding
    with tf.name_scope("single_cate_emb"):
        cate_emb = tf.nn.embedding_lookup(feats_emb, ids=single_cate_feats)
        cate_emb = tf.reshape(
            cate_emb,
            shape=[-1, params.cate_field_size * params.embedding_size])

    # multi_category -> Embedding
    with tf.name_scope("multi_cate_emb"):
        multi_cate_emb = my_layer.multi_cate_emb(params.multi_feats_range,
                                                 feats_emb, multi_cate_feats)

    # deep input dense
    dense_input = tf.concat([cont_emb, vector_feats, cate_emb, multi_cate_emb],
                            axis=1,
                            name='dense_vector')

    # experts
    experts_weight = tf.get_variable(
        name='experts_weight',
        dtype=tf.float32,
        shape=(dense_input.get_shape()[1], params.experts_units,
               params.experts_num),
        initializer=tf.contrib.layers.xavier_initializer())
    experts_bias = tf.get_variable(
        name='expert_bias',
        dtype=tf.float32,
        shape=(params.experts_units, params.experts_num),
        initializer=tf.contrib.layers.xavier_initializer())

    # gates
    gate1_weight = tf.get_variable(
        name='gate1_weight',
        dtype=tf.float32,
        shape=(dense_input.get_shape()[1], params.experts_num),
        initializer=tf.contrib.layers.xavier_initializer())
    gate1_bias = tf.get_variable(
        name='gate1_bias',
        dtype=tf.float32,
        shape=(params.experts_num, ),
        initializer=tf.contrib.layers.xavier_initializer())
    gate2_weight = tf.get_variable(
        name='gate2_weight',
        dtype=tf.float32,
        shape=(dense_input.get_shape()[1], params.experts_num),
        initializer=tf.contrib.layers.xavier_initializer())
    gate2_bias = tf.get_variable(
        name='gate2_bias',
        dtype=tf.float32,
        shape=(params.experts_num, ),
        initializer=tf.contrib.layers.xavier_initializer())

    # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper
    experts_output = tf.tensordot(dense_input, experts_weight, axes=1)
    use_experts_bias = True
    if use_experts_bias:
        experts_output = tf.add(experts_output, experts_bias)
    experts_output = tf.nn.relu(experts_output)

    # g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper
    gate1_output = tf.matmul(dense_input, gate1_weight)
    gate2_output = tf.matmul(dense_input, gate2_weight)
    user_gate_bias = True
    if user_gate_bias:
        gate1_output = tf.add(gate1_output, gate1_bias)
        gate2_output = tf.add(gate2_output, gate2_bias)
    gate1_output = tf.nn.softmax(gate1_output)
    gate2_output = tf.nn.softmax(gate2_output)

    # f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x))
    label1_input = tf.multiply(experts_output,
                               tf.expand_dims(gate1_output, axis=1))
    label1_input = tf.reduce_sum(label1_input, axis=2)
    label1_input = tf.reshape(label1_input, [-1, params.experts_units])
    label2_input = tf.multiply(experts_output,
                               tf.expand_dims(gate2_output, axis=1))
    label2_input = tf.reduce_sum(label2_input, axis=2)
    label2_input = tf.reshape(label2_input, [-1, params.experts_units])

    len_layers = len(params.hidden_units)
    with tf.variable_scope('ctr_deep'):
        dense_ctr = tf.layers.dense(inputs=label1_input,
                                    units=params.hidden_units[0],
                                    activation=tf.nn.relu)
        for i in range(1, len_layers):
            dense_ctr = tf.layers.dense(inputs=dense_ctr,
                                        units=params.hidden_units[i],
                                        activation=tf.nn.relu)
        ctr_out = tf.layers.dense(inputs=dense_ctr, units=1)
    with tf.variable_scope('cvr_deep'):
        dense_cvr = tf.layers.dense(inputs=label2_input,
                                    units=params.hidden_units[0],
                                    activation=tf.nn.relu)
        for i in range(1, len_layers):
            dense_cvr = tf.layers.dense(inputs=dense_cvr,
                                        units=params.hidden_units[i],
                                        activation=tf.nn.relu)
        cvr_out = tf.layers.dense(inputs=dense_cvr, units=1)

    ctr_score = tf.identity(tf.nn.sigmoid(ctr_out), name='ctr_score')
    cvr_score = tf.identity(tf.nn.sigmoid(cvr_out), name='cvr_score')
    ctcvr_score = ctr_score * cvr_score
    ctcvr_score = tf.identity(ctcvr_score, name='ctcvr_score')

    score = tf.add(ctr_score * params.label1_weight,
                   cvr_score * params.label2_weight)
    score = tf.identity(score, name='score')

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=score)

    else:
        ctr_labels = tf.identity(labels['label'], name='ctr_labels')
        ctcvr_labels = tf.identity(labels['label2'], name='ctcvr_labels')
        ctr_auc = tf.metrics.auc(labels=ctr_labels,
                                 predictions=ctr_score,
                                 name='auc')
        ctcvr_auc = tf.metrics.auc(labels=ctcvr_labels,
                                   predictions=ctcvr_score,
                                   name='auc')
        metrics = {'ctr_auc': ctr_auc, 'ctcvr_auc': ctcvr_auc}
        # ctr_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=ctr_labels, logits=ctr_out))
        ctr_loss = tf.reduce_mean(
            tf.losses.log_loss(labels=ctr_labels, predictions=ctr_score))
        ctcvr_loss = tf.reduce_mean(
            tf.losses.log_loss(labels=ctcvr_labels, predictions=ctcvr_score))
        loss = ctr_loss + ctcvr_loss

        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdamOptimizer(params.learning_rate)
            train_op = optimizer.minimize(
                loss=loss, global_step=tf.train.get_global_step())
        else:
            train_op = None

    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      eval_metric_ops=metrics,
                                      train_op=train_op)
Example #9
0
def model_fn(labels, features, mode, params):
    tf.set_random_seed(2019)

    cont_feats = features["cont_feats"]
    cate_feats = features["cate_feats"]
    vector_feats = features["vector_feats"]

    single_cate_feats = cate_feats[:, 0:params.cate_field_size]
    multi_cate_feats = cate_feats[:, params.cate_field_size:]

    feats_emb = my_layer.emb_init(name='feats_emb', feat_num=params.cate_feats_size,
                                  embedding_size=params.embedding_size)
    # single_category -> Embedding
    cate_emb = tf.nn.embedding_lookup(feats_emb, ids=single_cate_feats)
    cate_emb = tf.reshape(cate_emb, shape=[-1, params.cate_field_size * params.embedding_size])
    # multi_category -> Embedding
    multi_cate_emb = my_layer.multi_cate_emb(params.multi_feats_range, feats_emb, multi_cate_feats)

    # deep input dense
    dense_input = tf.concat([cont_feats, vector_feats, cate_emb, multi_cate_emb], axis=1, name='dense_vector')

    len_layers = len(params.hidden_units)
    with tf.variable_scope('ctr_deep'):
        dense_ctr = tf.layers.dense(inputs=dense_input, units=params.hidden_units[0], activation=tf.nn.relu)
        for i in range(1, len_layers):
            dense_ctr = tf.layers.dense(inputs=dense_ctr, units=params.hidden_units[i], activation=tf.nn.relu)
        ctr_out = tf.layers.dense(inputs=dense_ctr, units=1)
    with tf.variable_scope('cvr_deep'):
        dense_cvr = tf.layers.dense(inputs=dense_input, units=params.hidden_units[0], activation=tf.nn.relu)
        for i in range(1, len_layers):
            dense_cvr = tf.layers.dense(inputs=dense_cvr, units=params.hidden_units[i], activation=tf.nn.relu)
        cvr_out = tf.layers.dense(inputs=dense_cvr, units=1)

    ctr_score = tf.identity(tf.nn.sigmoid(ctr_out), name='ctr_score')
    cvr_score = tf.identity(tf.nn.sigmoid(cvr_out), name='cvr_score')
    ctcvr_score = ctr_score * cvr_score
    ctcvr_score = tf.identity(ctcvr_score, name='ctcvr_score')

    ctr_pow = 0.5
    cvr_pow = 1
    score = tf.multiply(tf.pow(ctr_score, ctr_pow), tf.pow(cvr_score, cvr_pow))
    score = tf.identity(score, name='score')

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=score)

    else:
        ctr_labels = tf.identity(labels['label'], name='ctr_labels')
        ctcvr_labels = tf.identity(labels['label2'], name='ctcvr_labels')
        ctr_auc = tf.metrics.auc(labels=ctr_labels, predictions=ctr_score, name='auc')
        ctcvr_auc = tf.metrics.auc(labels=ctcvr_labels, predictions=ctcvr_score, name='auc')
        metrics = {
            'ctr_auc': ctr_auc,
            'ctcvr_auc': ctcvr_auc
        }
        # ctr_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=ctr_labels, logits=ctr_out))
        ctr_loss = tf.reduce_mean(tf.losses.log_loss(labels=ctr_labels, predictions=ctr_score))
        ctcvr_loss = tf.reduce_mean(tf.losses.log_loss(labels=ctcvr_labels, predictions=ctcvr_score))
        loss = ctr_loss + ctcvr_loss

        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdamOptimizer(params.learning_rate)
            train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
        else:
            train_op = None

    return tf.estimator.EstimatorSpec(
        mode=mode,
        loss=loss,
        eval_metric_ops=metrics,
        train_op=train_op)
Example #10
0
def model_fn(labels, features, mode, params):
    tf.set_random_seed(2019)

    cont_feats = features["cont_feats"]
    cate_feats = features["cate_feats"]
    vector_feats = features["vector_feats"]

    single_cate_feats = cate_feats[:, 0:params.cate_field_size]
    multi_cate_feats = cate_feats[:, params.cate_field_size:]
    cont_feats_index = tf.Variable([[i
                                     for i in range(params.cont_field_size)]],
                                   trainable=False,
                                   dtype=tf.int64,
                                   name="cont_feats_index")

    cont_index_add = tf.add(cont_feats_index, params.cate_feats_size)

    index_max_size = params.cont_field_size + params.cate_feats_size
    feats_emb = my_layer.emb_init(name='feats_emb',
                                  feat_num=index_max_size,
                                  embedding_size=params.embedding_size)

    # cont_feats -> Embedding
    with tf.name_scope("cont_feat_emb"):
        ori_cont_emb = tf.nn.embedding_lookup(feats_emb,
                                              ids=cont_index_add,
                                              name="ori_cont_emb")
        cont_value = tf.reshape(cont_feats,
                                shape=[-1, params.cont_field_size, 1],
                                name="cont_value")
        cont_emb = tf.multiply(ori_cont_emb, cont_value)
        autoint_cont = cont_emb
        cont_emb = tf.reshape(
            cont_emb,
            shape=[-1, params.cont_field_size * params.embedding_size],
            name="cont_emb")

    # single_category -> Embedding
    with tf.name_scope("single_cate_emb"):
        cate_emb = tf.nn.embedding_lookup(feats_emb, ids=single_cate_feats)
        autoint_cate = cate_emb
        cate_emb = tf.reshape(
            cate_emb,
            shape=[-1, params.cate_field_size * params.embedding_size])

    # multi_category -> Embedding
    with tf.name_scope("multi_cate_emb"):
        multi_cate_emb = my_layer.multi_cate_emb(params.multi_feats_range,
                                                 feats_emb, multi_cate_feats)
        autoint_multi_cate = tf.reshape(
            multi_cate_emb,
            shape=[-1,
                   len(params.multi_feats_range), params.embedding_size])

    # AutoInteracting
    with tf.name_scope("AutoInt"):
        autoint_input = tf.concat(
            [autoint_cont, autoint_cate, autoint_multi_cate], axis=1)
        for i in range(params.autoint_layer_num):
            autoint_input = my_layer.InteractingLayer(
                num_layer=i,
                att_emb_size=params.autoint_emb_size,
                seed=2020,
                head_num=params.autoint_head_num,
                use_res=params.autoint_use_res)(autoint_input)
        autoint_output = tf.layers.Flatten()(autoint_input)

    # deep input dense
    dense = tf.concat([cont_emb, vector_feats, cate_emb, multi_cate_emb],
                      axis=1,
                      name='dense_vector')

    # deep
    len_layers = len(params.hidden_units)
    for i in range(0, len_layers):
        dense = tf.layers.dense(inputs=dense,
                                units=params.hidden_units[i],
                                activation=tf.nn.relu)

    final_input = tf.concat([autoint_output, dense], axis=1)
    out = tf.layers.dense(inputs=final_input, units=1)
    score = tf.identity(tf.nn.sigmoid(out), name='score')
    model_estimator_spec = op.model_optimizer(params, mode, labels, score)
    return model_estimator_spec
Example #11
0
def model_fn(features, labels, mode, params):
    """Bulid Model function f(x) for Estimator."""
    # ------hyperparameters----
    field_size = params["field_size"]
    feature_size = params["feature_size"]
    embedding_size = params["embedding_size"]
    l2_reg = params["l2_reg"]
    learning_rate = params["learning_rate"]
    # batch_norm_decay = params["batch_norm_decay"]
    # optimizer = params["optimizer"]
    layers = list(map(int, params["deep_layers"].split(',')))
    dropout = list(map(float, params["dropout"].split(',')))

    # ------bulid weights------
    FM_B = tf.get_variable(name='fm_bias',
                           shape=[1],
                           initializer=tf.constant_initializer(0.0))
    # FM_W = tf.get_variable(name='fm_w', shape=[feature_size], initializer=tf.glorot_normal_initializer())
    FM_W = emb_init(name='fm_w',
                    feat_num=feature_size,
                    initializer=tf.glorot_normal_initializer(),
                    zero_first_row=False)
    # FM_V = tf.get_variable(name='fm_v', shape=[feature_size, embedding_size],
    #                        initializer=tf.glorot_normal_initializer())
    FM_V = emb_init(name='fm_v',
                    feat_num=feature_size,
                    embedding_size=embedding_size,
                    initializer=tf.glorot_normal_initializer(),
                    zero_first_row=False)

    # ------build feaure-------
    feat_ids = features['feat_ids']
    feat_ids = tf.reshape(feat_ids, shape=[-1, field_size])
    feat_vals = features['feat_vals']
    feat_vals = tf.reshape(feat_vals, shape=[-1, field_size])

    single_feat = feat_ids[:, 0:params.multi_start_idx]
    multi_feat = feat_ids[:, params.multi_start_idx:]

    # ------build f(x)------
    with tf.variable_scope("First-order"):
        feat_wgts = tf.nn.embedding_lookup(FM_W, feat_ids)  # None * F * 1
        y_w = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1)

    with tf.variable_scope("Second-order"):
        embeddings = tf.nn.embedding_lookup(FM_V, feat_ids)  # None * F * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
        embeddings = tf.multiply(embeddings, feat_vals)  # vij*xi
        sum_square = tf.square(tf.reduce_sum(embeddings, 1))
        square_sum = tf.reduce_sum(tf.square(embeddings), 1)
        y_v = 0.5 * tf.reduce_sum(tf.subtract(sum_square, square_sum),
                                  1)  # None * 1

    with tf.variable_scope("Deep-part"):
        if FLAGS.batch_norm:
            if mode == tf.estimator.ModeKeys.TRAIN:
                train_phase = True
            else:
                train_phase = False
        else:
            normalizer_fn = None
            normalizer_params = None

        deep_inputs = tf.reshape(embeddings,
                                 shape=[-1, field_size * embedding_size
                                        ])  # None * (F*K)
        for i in range(len(layers)):
            deep_inputs = tf.layers.dense(
                inputs=deep_inputs,
                units=layers[i],
                # kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
                activation=tf.nn.relu,
                name='mlp%d' % i)
            if FLAGS.batch_norm:
                # 放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu
                deep_inputs = batch_norm_layer(deep_inputs,
                                               train_phase=train_phase,
                                               scope_bn='bn_%d' % i)
            if mode == tf.estimator.ModeKeys.TRAIN:
                # Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2)
                deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i])
                # deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode ==
                # tf.estimator.ModeKeys.TRAIN)

        y_deep = tf.layers.dense(
            inputs=deep_inputs,
            units=1,
            activation=tf.identity,
            # kernel_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
            name='deep_out')
        y_d = tf.reshape(y_deep, shape=[-1])

    with tf.variable_scope("DeepFM-out"):
        # y_bias = FM_B * tf.ones_like(labels, dtype=tf.float32)  # None * 1
        # warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label时不传
        y_bias = FM_B * tf.ones_like(y_d, dtype=tf.float32)  # None * 1
        y = y_bias + y_w + y_v + y_d
        pred = tf.sigmoid(y)

    predictions = {"prob": pred}
    export_outputs = {
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
        tf.estimator.export.PredictOutput(predictions)
    }
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)

    # ------bulid loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \
           l2_reg * tf.nn.l2_loss(FM_W) + \
           l2_reg * tf.nn.l2_loss(FM_V)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {"auc": tf.metrics.auc(labels, pred)}
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    # ------bulid optimizer------

    if FLAGS.optimizer == 'Adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate,
                                              initial_accumulator_value=1e-8)
    elif FLAGS.optimizer == 'Momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=0.95)
    elif FLAGS.optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)
    else:
        # FLAGS.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                           beta1=0.9,
                                           beta2=0.999,
                                           epsilon=1e-8)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op)
def model_fn(labels, features, mode, params):
    tf.set_random_seed(2019)

    cont_feats = features["cont_feats"]
    cate_feats = features["cate_feats"]
    vector_feats = features["vector_feats"]

    single_cate_feats = cate_feats[:, 0:params.cate_field_size]
    multi_cate_feats = cate_feats[:, params.cate_field_size:]
    cont_feats_index = tf.Variable([[i
                                     for i in range(params.cont_field_size)]],
                                   trainable=False,
                                   dtype=tf.int64,
                                   name="cont_feats_index")

    cont_index_add = tf.add(cont_feats_index, params.cate_feats_size)

    index_max_size = params.cont_field_size + params.cate_feats_size
    feats_emb = my_layer.emb_init(name='feats_emb',
                                  feat_num=index_max_size,
                                  embedding_size=params.embedding_size)

    # cont_feats -> Embedding
    ori_cont_emb = tf.nn.embedding_lookup(feats_emb,
                                          ids=cont_index_add,
                                          name="ori_cont_emb")
    cont_value = tf.reshape(cont_feats,
                            shape=[-1, params.cont_field_size, 1],
                            name="cont_value")
    cont_emb = tf.multiply(ori_cont_emb, cont_value)
    cont_emb = tf.reshape(
        cont_emb,
        shape=[-1, params.cont_field_size * params.embedding_size],
        name="cont_emb")
    # print(ori_cont_emb)
    # print(cont_value)
    # print(cont_emb)

    # single_category -> Embedding
    cate_emb = tf.nn.embedding_lookup(feats_emb, ids=single_cate_feats)
    cate_emb = tf.reshape(
        cate_emb, shape=[-1, params.cate_field_size * params.embedding_size])
    # print(cate_emb)

    # multi_category -> Embedding
    multi_cate_emb = my_layer.multi_cate_emb(params.multi_feats_range,
                                             feats_emb, multi_cate_feats)

    # deep input dense
    dense = tf.concat([cont_emb, vector_feats, cate_emb, multi_cate_emb],
                      axis=1,
                      name='dense_vector')

    # deep
    len_layers = len(params.hidden_units)
    for i in range(0, len_layers):
        dense = tf.layers.dense(inputs=dense,
                                units=params.hidden_units[i],
                                activation=tf.nn.relu)
    out = tf.layers.dense(inputs=dense, units=1)
    score = tf.identity(tf.nn.sigmoid(out), name='score')
    model_estimator_spec = op.model_optimizer(params, mode, labels, score)
    return model_estimator_spec