Example #1
0
def get_linear_logit(features, feature_columns, units=1, l2_reg=0, init_std=0.0001, seed=1024, prefix='linear'):
    linear_emb_list = [
        input_from_feature_columns(features, feature_columns, 1, l2_reg, init_std, seed, prefix=prefix + str(i))[0] for
        i in range(units)]
    _, dense_input_list,_ = input_from_feature_columns(features, feature_columns, 1, l2_reg, init_std, seed,
                                                     prefix=prefix)

    linear_logit_list = []
    for i in range(units):

        if len(linear_emb_list[0]) > 0 and len(dense_input_list) > 0:
            sparse_input = concat_fun(linear_emb_list[i])
            dense_input = concat_fun(dense_input_list)
            linear_logit = Linear(l2_reg, mode=2)([sparse_input, dense_input])
        elif len(linear_emb_list[0]) > 0:
            sparse_input = concat_fun(linear_emb_list[i])
            linear_logit = Linear(l2_reg, mode=0)(sparse_input)
        elif len(dense_input_list) > 0:
            dense_input = concat_fun(dense_input_list)
            linear_logit = Linear(l2_reg, mode=1)(dense_input)
        else:
            raise NotImplementedError
        linear_logit_list.append(linear_logit)

    return concat_fun(linear_logit_list)
Example #2
0
def combined_dnn_input(sparse_embedding_list, dense_value_list):
    if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0:
        sparse_dnn_input = Flatten()(concat_fun(sparse_embedding_list))
        dense_dnn_input = Flatten()(concat_fun(dense_value_list))
        return concat_fun([sparse_dnn_input, dense_dnn_input])
    elif len(sparse_embedding_list) > 0:
        return Flatten()(concat_fun(sparse_embedding_list))
    elif len(dense_value_list) > 0:
        return Flatten()(concat_fun(dense_value_list))
    else:
        raise NotImplementedError
Example #3
0
def myAutoInt(
    feature_dim_dict,
    embedding_size=8,
    att_layer_num=3,
    att_embedding_size=8,
    att_head_num=4,
    att_res=True,
    hidden_size=(256, 256),
    activation='relu',
    l2_reg_deep=0,
    l2_reg_embedding=1e-5,
    use_bn=False,
    keep_prob=1.0,
    init_std=0.0001,
    seed=1024,
    final_activation='sigmoid',
):
    if len(hidden_size) <= 0 and att_layer_num <= 0:
        raise ValueError("Either hidden_layer or att_layer_num must > 0")
    check_feature_config_dict(feature_dim_dict)

    deep_emb_list, _, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, 0, init_std, seed,
        False)
    att_input = concat_fun(deep_emb_list, axis=1)

    for _ in range(att_layer_num):
        att_input = InteractingLayer(att_embedding_size, att_head_num,
                                     att_res)(att_input)
    att_output = tf.keras.layers.Flatten()(att_input)

    deep_input = tf.keras.layers.Flatten()(concat_fun(deep_emb_list))
    deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn,
                   seed)(deep_input)

    finish_out = tf.keras.layers.Concatenate()([att_output, deep_out])
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out = tf.keras.layers.Concatenate()([att_output, deep_out])
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)

    output_finish = PredictionLayer(final_activation,
                                    name='finish')(finish_logit)
    output_like = PredictionLayer(final_activation, name='like')(like_logit)

    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])

    return model
Example #4
0
def xDeepFM_MTL(
    feature_dim_dict,
    embedding_size=8,
    hidden_size=(256, 256),
    cin_layer_size=(
        256,
        256,
    ),
    cin_split_half=True,
    task_net_size=(128, ),
    l2_reg_linear=0.00001,
    l2_reg_embedding=0.00001,
    seed=1024,
):
    check_feature_config_dict(feature_dim_dict)
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')
    print("[xdeepfm] feature_dim_dict: {}".format(feature_dim_dict))

    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear,
        0.0001, seed)
    print("[xdeepfm] deep_emb_list:", deep_emb_list)
    print("[xdeepfm] linear_logit:", linear_logit)
    print("[xdeepfm] inputs_list:", inputs_list)

    # video_input = tf.keras.layers.Input((128,))
    # inputs_list.append(video_input)

    fm_input = concat_fun(deep_emb_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)

    deep_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = MLP(hidden_size)(deep_input)

    finish_out = MLP(task_net_size)(deep_out)
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out = MLP(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)

    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit])
    like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit])

    output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit)
    output_like = PredictionLayer('sigmoid', name='like')(like_logit)
    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])
    return model
Example #5
0
def sess_interest_division(sparse_embedding_dict,
                           user_behavior_input_dict,
                           sparse_fg_list,
                           sess_feture_list,
                           sess_max_count,
                           bias_encoding=True):
    tr_input = []
    for i in range(sess_max_count):
        sess_name = "sess_" + str(i)
        keys_emb_list = get_embedding_vec_list(
            sparse_embedding_dict, user_behavior_input_dict[sess_name],
            sparse_fg_list, sess_feture_list, sess_feture_list)
        # [sparse_embedding_dict[feat](user_behavior_input_dict[sess_name][feat]) for feat in
        #             sess_feture_list]
        keys_emb = concat_fun(keys_emb_list)
        tr_input.append(keys_emb)
    if bias_encoding:
        tr_input = BiasEncoding(sess_max_count)(tr_input)
    return tr_input
Example #6
0
def SVD(feature_columns,
        embedding_size=100,
        l2_reg_embedding=1e-5,
        l2_reg_linear=1e-5,
        l2_reg_dnn=0,
        init_std=0.0001,
        seed=1024,
        bi_dropout=0,
        dnn_dropout=0):
    """Instantiates the Neural Factorization Machine architecture.

    :param feature_columns: An iterable containing all the sparse features used by model.
    :param num_factors: number of units in latent representation layer.
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part.
    :param l2_reg_dnn: float . L2 regularizer strength applied to DNN
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param biout_dropout: When not ``None``, the probability we will drop out the output of BiInteractionPooling Layer.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param act_func: Activation function to use at prediction layer.
    :param task: str, ``"binary"`` for  'binary_crossentropy' loss or  ``"multiclass"`` for 'categorical_crossentropy' loss
    :return: A Keras model instance.
    """
    features = build_input_features(feature_columns)

    input_layers = list(features.values())
    sparse_embedding_list, _ = input_from_feature_columns(
        features, feature_columns, embedding_size, l2_reg_embedding, init_std,
        seed)

    fm_input = concat_fun(sparse_embedding_list, axis=1)
    fm_logit = FM()(fm_input)

    model = tf.keras.models.Model(inputs=input_layers, outputs=fm_logit)
    return model
Example #7
0
def MT_xDeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidden_units=(256, 256),
            cin_layer_size=(128, 128,), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001,
            l2_reg_embedding=0.00001, l2_reg_dnn=0, l2_reg_cin=0, init_std=0.0001, seed=1024, dnn_dropout=0,
            dnn_activation='relu', dnn_use_bn=False, task='binary'):
    """Instantiates the xDeepFM architecture.

    :param flag_columns:
    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param embedding_size: positive integer,sparse feature embedding_size
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param cin_layer_size: list,list of positive integer or empty list, the feature maps  in each hidden layer of Compressed Interaction Network
    :param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit
    :param cin_activation: activation function used on feature maps
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
    :param l2_reg_embedding: L2 regularizer strength applied to embedding vector
    :param l2_reg_dnn: L2 regularizer strength applied to deep net
    :param l2_reg_cin: L2 regularizer strength applied to CIN.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_activation: Activation function to use in DNN
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """


    features = build_input_features(linear_feature_columns + dnn_feature_columns)
    inputs_list = list(features.values())

    sparse_embedding_list, dense_value_list = input_from_feature_columns(features,dnn_feature_columns,
                                                                              embedding_size,
                                                                              l2_reg_embedding,init_std,
                                                                              seed)

    linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std,
                                    seed=seed, prefix='linear')

    fm_input = concat_fun(sparse_embedding_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, cin_activation,
                       cin_split_half, l2_reg_cin, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(4, activation=None, )(exFM_out)

    dnn_input = combined_dnn_input(sparse_embedding_list,dense_value_list)

    deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                   dnn_use_bn, seed)(dnn_input)
    deep_logit = tf.keras.layers.Dense(
        4, use_bias=False, activation=None)(deep_out)

    if len(dnn_hidden_units) == 0 and len(cin_layer_size) == 0:  # only linear
        final_logit = linear_logit
    elif len(dnn_hidden_units) == 0 and len(cin_layer_size) > 0:  # linear + CIN
        final_logit = tf.keras.layers.add([linear_logit, exFM_logit])
    elif len(dnn_hidden_units) > 0 and len(cin_layer_size) == 0:  # linear + Deep
        final_logit = tf.keras.layers.add([linear_logit, deep_logit])
    elif len(dnn_hidden_units) > 0 and len(cin_layer_size) > 0:  # linear + CIN + Deep
        final_logit = tf.keras.layers.add(
            [linear_logit, deep_logit, exFM_logit])
    else:
        raise NotImplementedError


    output_units = PredictionLayer(task)(final_logit)
    # output = None
    # for i in range(len(flag_columns)):
    #     print(i)
    #     selected_index = [0, 1] if flag_columns[i] else [2, 3]
    #     if output != None:
    #         output = tf.concat([output, tf.reshape(tf.gather(output_units[i, :], selected_index), (1, -1))], axis=0)
    #     else:
    #         output = tf.reshape(tf.gather(output_units[i, :], selected_index), (1, -1))
    finish = tf.cast(1-features['u_region_id'], dtype=tf.float32)*output_units[:,0]+\
             tf.cast(features['u_region_id'], dtype=tf.float32)*output_units[:,1]
    like = tf.cast(1-features['u_region_id'], dtype=tf.float32)*output_units[:,2]+\
           tf.cast(1-features['u_region_id'], dtype=tf.float32)*output_units[:,3]
    # mask = tf.cond(pred=tf.equal(features['u_region_id'], tf.constant(value = 1, dtype = tf.int32)),
    #                true_fn=lambda: [True, True, False, False], false_fn=lambda: [False, False, True, True])
    # output = tf.reshape(tf.boolean_mask(output_units, mask), shape=[-1, 2])
    # finish = output[:, 0]
    # like = output[:, 1]
    # print(output)

    model = tf.keras.models.Model(inputs=inputs_list, outputs=[finish, like])
    return model
Example #8
0
def MTL_with_Title(
    feature_dim_dict,
    embedding_size=8,
    hidden_size=(256, 256),
    cin_layer_size=(
        256,
        256,
    ),
    cin_split_half=True,
    task_net_size=(128, ),
    l2_reg_linear=0.00001,
    l2_reg_embedding=0.00001,
    seed=1024,
):
    check_feature_config_dict(feature_dim_dict)
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')

    # xDeepFM Model

    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear,
        0.0001, seed)

    fm_input = concat_fun(deep_emb_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)

    deep_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = MLP(hidden_size)(deep_input)

    finish_out = MLP(task_net_size)(deep_out)
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out = MLP(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)

    # Add Title Features

    title_input = Input(shape=(35, ), dtype='int32', name='title_input')
    title_embedding = Embedding(output_dim=32,
                                input_dim=134545,
                                input_length=35)(title_input)
    lstm_out = LSTM(units=32, return_sequences=True)(title_embedding)
    avg_out = GlobalAveragePooling1D()(lstm_out)
    dense1 = Dense(32, activation='relu')(avg_out)
    dense2 = Dense(1, activation='relu')(dense1)

    #

    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit, dense2])
    like_logit = tf.keras.layers.add(
        [linear_logit, like_logit, exFM_logit, dense2])

    output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit)
    output_like = PredictionLayer('sigmoid', name='like')(like_logit)
    print(str(inputs_list))
    inputs_list.append(title_input)
    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])
    return model
Example #9
0
def xDeepFM_MTL(
    linear_feature_columns,
    dnn_feature_columns,
    gate_feature_columns,
    embedding_size=8,
    dnn_hidden_units=(256, 256),
    cin_layer_size=(
        256,
        256,
    ),
    cin_split_half=True,
    init_std=0.0001,
    l2_reg_dnn=0,
    dnn_dropout=0,
    dnn_activation='relu',
    dnn_use_bn=False,
    task_net_size=(128, ),
    l2_reg_linear=0.00001,
    l2_reg_embedding=0.00001,
    seed=1024,
):
    # check_feature_config_dict(feature_dim_dict)
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')

    features = build_input_features(linear_feature_columns +
                                    dnn_feature_columns + gate_feature_columns)

    inputs_list = list(features.values())

    sparse_embedding_list, dense_value_list = input_from_feature_columns(
        features, dnn_feature_columns, embedding_size, l2_reg_embedding,
        init_std, seed)
    gate = get_dense_input(features, gate_feature_columns)[0]

    linear_logit = get_linear_logit(features,
                                    linear_feature_columns,
                                    l2_reg=l2_reg_linear,
                                    init_std=init_std,
                                    seed=seed,
                                    prefix='linear')

    fm_input = concat_fun(sparse_embedding_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, 0,
                       seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)

    # dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
    dnn_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                   dnn_use_bn, seed)(dnn_input)

    finish_out1 = DNN(task_net_size)(deep_out)
    finish_logit1 = tf.keras.layers.Dense(1, use_bias=False,
                                          activation=None)(finish_out1)

    like_out1 = DNN(task_net_size)(deep_out)
    like_logit1 = tf.keras.layers.Dense(1, use_bias=False,
                                        activation=None)(like_out1)

    finish_out2 = DNN(task_net_size)(deep_out)
    finish_logit2 = tf.keras.layers.Dense(1, use_bias=False,
                                          activation=None)(finish_out2)

    like_out2 = DNN(task_net_size)(deep_out)
    like_logit2 = tf.keras.layers.Dense(1, use_bias=False,
                                        activation=None)(like_out2)

    # condition = tf.placeholder("float32", shape=[None, 1], name="condition")

    finish_logit = gate * finish_logit1 + (1.0 - gate) * finish_logit2
    like_logit = gate * like_logit1 + (1.0 - gate) * like_logit2

    print(np.shape(like_logit))

    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit])
    like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit])

    output_finish = PredictionLayer('binary', name='finish')(finish_logit)
    output_like = PredictionLayer('binary', name='like')(like_logit)

    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])
    return model
Example #10
0
def DeepFM(feature_dim_dict,
           attention_feature_name=None,
           embedding_size=8,
           use_fm=True,
           dnn_hidden_units=(128, 128),
           l2_reg_linear=0.00001,
           l2_reg_embedding=0.00001,
           l2_reg_dnn=0,
           init_std=0.0001,
           seed=1024,
           dnn_dropout=0,
           dnn_activation='relu',
           dnn_use_bn=False,
           task='binary'):
    """Instantiates the DeepFM Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
    :param embedding_size: positive integer,sparse feature embedding_size
    :param use_fm: bool,use FM part or not
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_activation: Activation function to use in DNN
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """
    check_feature_config_dict(feature_dim_dict)

    deep_emb_list, linear_emb_list, dense_input_dict, inputs_list = \
        preprocess_input_embedding(feature_dim_dict,
                                   embedding_size,
                                   l2_reg_embedding,
                                   l2_reg_linear, init_std,
                                   seed,
                                   create_linear_weight=True,
                                   use_var_attention=(
                                       True if attention_feature_name else False),
                                   attention_feature_name=attention_feature_name)

    linear_logit = get_linear_logit(linear_emb_list, dense_input_dict,
                                    l2_reg_linear)

    fm_input = concat_fun(deep_emb_list, axis=1)
    deep_input = tf.keras.layers.Flatten()(fm_input)
    fm_out = FM()(fm_input)
    deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                   dnn_use_bn, seed)(deep_input)
    deep_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(deep_out)

    if len(dnn_hidden_units) == 0 and use_fm == False:  # only linear
        final_logit = linear_logit
    elif len(dnn_hidden_units) == 0 and use_fm == True:  # linear + FM
        final_logit = tf.keras.layers.add([linear_logit, fm_out])
    elif len(dnn_hidden_units) > 0 and use_fm == False:  # linear + Deep
        final_logit = tf.keras.layers.add([linear_logit, deep_logit])
    elif len(dnn_hidden_units) > 0 and use_fm == True:  # linear + FM + Deep
        final_logit = tf.keras.layers.add([linear_logit, fm_out, deep_logit])
    else:
        raise NotImplementedError

    output = PredictionLayer(task)(final_logit)
    model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
    return model
Example #11
0
    l2_reg=0.00001,
    init_std=0.0001,
    seed=1024)

dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
# print('test_model_input info')
# print(len(test_model_input))
# print(type(test_model_input[0]))
# print(len(test_model_input[0]))
# print('num_features:',len(test_model_input[0]) )

# MMoE
mmoe_layers = MMoE(units=16, num_experts=8, num_tasks=8)(dnn_input)

print('passed')
mmoe_cat_layer = concat_fun(mmoe_layers)

mmoe_high_layers = MMoEdiffGate(units=16, num_experts=8,
                                num_tasks=2)([mmoe_cat_layer, dnn_input])

output_layers = []

# Build tower layer from MMoE layer
output_info = ['finish', 'like']
for index, task_layer in enumerate(mmoe_high_layers):
    tower_layer = tf.keras.layers.Dense(units=128,
                                        activation='relu')(task_layer)
    output_layer = tf.keras.layers.Dense(units=1,
                                         name=output_info[index],
                                         activation='sigmoid')(tower_layer)
    output_layers.append(output_layer)
Example #12
0
def xDeepFM_MTL(
    feature_dim_dict,
    embedding_size=8,
    hidden_size=(256, 256),
    cin_layer_size=(
        256,
        256,
    ),
    cin_split_half=True,
    task_net_size=(128, ),
    l2_reg_linear=0.000001,
    l2_reg_embedding=0.000001,
    seed=1024,
):
    check_feature_config_dict(feature_dim_dict)  # 未知
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')

    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear,
        0.0001, seed)

    # video_input = tf.keras.layers.Input((128,))
    # inputs_list.append(video_input)
    fm_input = concat_fun(deep_emb_list, axis=1)  # 模型输入
    '''
    构建CIN,默认CIN的size为[256,256],激活函数为relu,输入为
    (batch_size,field_size,embedding_size),输出为(batch_size,feature_num)。
    如果split_half为True,那么隐藏层的feature map只有一半的会连接到输出单元。
    '''
    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)  # 全连接输出到Output_unit
    '''
     Flatten将输入除了batch的维度,其他维度拉直,得到的输出为(batch_size, sum_size)
     将embedding特征直接输入MLP
     '''
    deep_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = MLP(hidden_size)(deep_input)
    '''
     将deep_out过一个MLP,并全连接到finish的logits输出,同样的操作应用于like的logits输出
     '''
    finish_out = MLP(task_net_size)(deep_out)
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out = MLP(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)
    '''
     最终的finish的logit由linear_logit,finish_logit\like_logit和exFM_logit三者叠加。
     '''
    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit])
    like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit])
    '''
     将logit通过sigmoid转化为概率,通过输入和输出构建model
     '''
    output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit)
    output_like = PredictionLayer('sigmoid', name='like')(like_logit)
    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])
    return model
Example #13
0
def CapsuleNet(feature_dim_dict,
               seq_feature_list,
               embedding_size=8,
               hist_len_max=50,
               use_bn=False,
               dnn_hidden_units=(200, 80),
               dnn_activation='sigmoid',
               num_capsule=8,
               dim_capsule=2,
               routing_iterations=3,
               att_hidden_size=(64, 16),
               att_activation="dice",
               att_weight_normalization=True,
               att_embedding_size=1,
               att_head_num=8,
               l2_reg_dnn=0,
               l2_reg_embedding=1e-6,
               dnn_dropout=0,
               init_std=0.0001,
               alpha=1e-6,
               seed=1024,
               task='binary'):
    check_feature_config_dict(feature_dim_dict)

    sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input(
        feature_dim_dict, seq_feature_list, hist_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in seq_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict["sparse"],
                                            return_feat_list=seq_feature_list)
    keys_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                           user_behavior_input,
                                           feature_dim_dict['sparse'],
                                           return_feat_list=seq_feature_list)
    deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                                 sparse_input,
                                                 feature_dim_dict['sparse'])

    query_emb = concat_fun(query_emb_list)
    keys_emb = concat_fun(keys_emb_list)
    scores = AttentionSequencePoolingLayer(
        att_hidden_units=att_hidden_size,
        att_activation=att_activation,
        weight_normalization=att_weight_normalization,
        return_score=True)([query_emb, keys_emb, user_behavior_length])

    Self_Attention = Transformer(att_embedding_size,
                                 att_head_num,
                                 dropout_rate=0,
                                 use_layer_norm=True,
                                 use_positional_encoding=True,
                                 seed=seed,
                                 supports_masking=False,
                                 blinding=True)

    keys_emb = Self_Attention(
        [keys_emb, keys_emb, user_behavior_length, user_behavior_length])

    cap = Capsule(num_capsule=num_capsule,
                  dim_capsule=dim_capsule,
                  routings=routing_iterations,
                  share_weights=True,
                  supports_masking=True)
    hist_cap = cap(keys_emb, scores=scores)
    disp_loss = get_disp_loss(hist_cap)
    hist_cap = Reshape([1, num_capsule * dim_capsule])(NoMask()(hist_cap))
    deep_input_emb = concat_fun(deep_input_emb_list)
    deep_input_emb = Concatenate()([deep_input_emb, hist_cap])

    deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb))
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 use_bn, seed)(deep_input_emb)
    final_logit = Dense(1, use_bias=False)(output)
    output = PredictionLayer(task)(final_logit)

    model_input_list = get_inputs_list(
        [sparse_input, dense_input, user_behavior_input])

    model_input_list += [user_behavior_length]

    model = tf.keras.models.Model(inputs=model_input_list, outputs=output)
    model.add_loss(alpha * disp_loss)
    tf.keras.backend.get_session().run(tf.global_variables_initializer())
    return model
Example #14
0
def BST(feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16, use_bn=False, dnn_hidden_units=(200, 80),
        dnn_activation='relu', att_embedding_size=1, att_head_num=8,
        l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'):
    """Instantiates the Deep Interest Evolution Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]}
    :param seq_feature_list: list,to indicate  sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]``
    :param embedding_size: positive integer,sparse feature embedding_size.
    :param hist_len_max: positive int, to indicate the max length of seq input
    :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
    :param dnn_activation: Activation function to use in DNN
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """
    check_feature_config_dict(feature_dim_dict)

    sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input(
        feature_dim_dict, seq_feature_list, hist_len_max)
    # sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size,
    #                                               embeddings_initializer=RandomNormal(
    #                                                   mean=0.0, stddev=init_std, seed=seed),
    #                                               embeddings_regularizer=l2(
    #                                                   l2_reg_embedding),
    #                                               name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in
    #                          enumerate(feature_dim_dict["sparse"])}
    # print(sparse_embedding_dict)
    sparse_embedding_dict = {feat.name: Embedding(tf.cast(feat.dimension, tf.int32), embedding_size,
                                                  embeddings_initializer=RandomNormal(
                                                      mean=0.0, stddev=init_std, seed=seed),
                                                  embeddings_regularizer=l2(
                                                      l2_reg_embedding),
                                                  name='sparse_emb_' +
                                                       str(i) + '-' + feat.name,
                                                  mask_zero=(feat.name in seq_feature_list)) for i, feat in
                             enumerate(feature_dim_dict["sparse"])}
    # deep_emb_list = get_embedding_vec_list(
    # deep_sparse_emb_dict, sparse_input_dict, feature_dim_dict['sparse'])
    query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"],
                                            return_feat_list=seq_feature_list)
    keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'],
                                           return_feat_list=seq_feature_list)
    deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'])

    query_emb = concat_fun(query_emb_list)
    keys_emb = concat_fun(keys_emb_list)
    print("prev: {0}".format(keys_emb))
    # hist_cap = Capsule(
    #     num_capsule=8, dim_capsule=2,
    #     routings=3, share_weights=True)(NoMask()(keys_emb))
    # print("now: {0}".format(hist_cap))
    # # exit(0)
    # # keys_emb = concat_fun(keys_emb_list)
    # hist_cap = Reshape([1, 16])(hist_cap)
    deep_input_emb = concat_fun(deep_input_emb_list)
    print("deep input emb: ", deep_input_emb)
    # print("hist_cap: ", hist_cap)
    Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False,
                                 use_positional_encoding=True, seed=seed, supports_masking=False,
                                 blinding=True)
    # print("now: {0}".format(hist))
    hists = []
    for key_emb in keys_emb_list:
        hist = Self_Attention([key_emb, key_emb, user_behavior_length, user_behavior_length])
        hists.append(hist)
    hist = concat_fun(hists)

    # Tensor("concatenate_2/concat:0", shape=(?, 50, 8), dtype=float32)
    # <tf.Tensor 'concatenate_3/concat:0' shape=(?, 4, 8) dtype=float32>
    deep_input_emb = Concatenate()([deep_input_emb, hist])
    # print(deep_input_emb)
    deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb))
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()(
            [deep_input_emb] + list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn,
                 dnn_dropout, use_bn, seed)(deep_input_emb)
    final_logit = Dense(1, use_bias=False)(output)
    output = PredictionLayer(task)(final_logit)

    model_input_list = get_inputs_list(
        [sparse_input, dense_input, user_behavior_input])

    model_input_list += [user_behavior_length]

    model = tf.keras.models.Model(inputs=model_input_list, outputs=output)

    tf.keras.backend.get_session().run(tf.global_variables_initializer())
    return model
Example #15
0
def DSIN(
    feature_dim_dict,
    sess_feature_list,
    embedding_size=8,
    sess_max_count=5,
    sess_len_max=10,
    att_embedding_size=1,
    att_head_num=8,
    dnn_hidden_units=(200, 80),
    dnn_activation='sigmoid',
    l2_reg_dnn=0,
    l2_reg_embedding=1e-6,
    task='binary',
    dnn_dropout=0,
    init_std=0.0001,
    seed=1024,
    encoding='bias',
):

    check_feature_config_dict(feature_dim_dict)

    print(
        'sess_count',
        sess_max_count,
        'encoding',
        encoding,
    )

    sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input(
        feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in sess_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict["sparse"],
                                            sess_feature_list,
                                            sess_feature_list)

    query_emb = concat_fun(query_emb_list)

    deep_input_emb_list = get_embedding_vec_list(
        sparse_embedding_dict,
        sparse_input,
        feature_dim_dict["sparse"],
        mask_feat_list=sess_feature_list)
    deep_input_emb = concat_fun(deep_input_emb_list)
    deep_input_emb = Flatten()(NoMask()(deep_input_emb))

    be_flag = True if encoding == 'bias' else False
    tr_input = sess_interest_division(sparse_embedding_dict,
                                      user_behavior_input_dict,
                                      feature_dim_dict['sparse'],
                                      sess_feature_list,
                                      sess_max_count,
                                      bias_encoding=be_flag)

    Self_Attention = Transformer(att_embedding_size,
                                 att_head_num,
                                 dropout_rate=0,
                                 use_layer_norm=False,
                                 use_positional_encoding=(not be_flag),
                                 seed=seed,
                                 supports_masking=True,
                                 blinding=True)
    sess_fea = sess_interest_extractor(tr_input, sess_max_count,
                                       Self_Attention)

    interest_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True,
        supports_masking=False)([query_emb, sess_fea, user_sess_length])

    lstm_outputs = BiLSTM(
        len(sess_feature_list) * embedding_size,
        layers=2,
        res_layers=0,
        dropout_rate=0.2,
    )(sess_fea)
    lstm_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True)([query_emb, lstm_outputs, user_sess_length])

    deep_input_emb = Concatenate()([
        deep_input_emb,
        Flatten()(interest_attention_layer),
        Flatten()(lstm_attention_layer)
    ])
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 False, seed)(deep_input_emb)
    output = Dense(1, use_bias=False, activation=None)(output)
    output = PredictionLayer(task)(output)

    sess_input_list = []
    #sess_input_length_list = []
    for i in range(sess_max_count):
        sess_name = "sess_" + str(i)
        sess_input_list.extend(
            get_inputs_list([user_behavior_input_dict[sess_name]]))
        #sess_input_length_list.append(user_behavior_length_dict[sess_name])

    model_input_list = get_inputs_list(
        [sparse_input, dense_input]) + sess_input_list + [user_sess_length]

    model = Model(inputs=model_input_list, outputs=output)

    return model
Example #16
0
def DIEN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16,
         gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(200, 80),
         dnn_activation='relu',
         att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True,
         l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'):
    """Instantiates the Deep Interest Evolution Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]}
    :param seq_feature_list: list,to indicate  sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]``
    :param embedding_size: positive integer,sparse feature embedding_size.
    :param hist_len_max: positive int, to indicate the max length of seq input
    :param gru_type: str,can be GRU AIGRU AUGRU AGRU
    :param use_negsampling: bool, whether or not use negtive sampling
    :param alpha: float ,weight of auxiliary_loss
    :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
    :param dnn_activation: Activation function to use in DNN
    :param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net
    :param att_activation: Activation function to use in attention net
    :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit.
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """
    check_feature_config_dict(feature_dim_dict)

    sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input(
        feature_dim_dict, seq_feature_list, hist_len_max)
    sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size,
                                                  embeddings_initializer=RandomNormal(
                                                      mean=0.0, stddev=init_std, seed=seed),
                                                  embeddings_regularizer=l2(
                                                      l2_reg_embedding),
                                                  name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in
                             enumerate(feature_dim_dict["sparse"])}

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"],
                                            return_feat_list=seq_feature_list)
    keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'],
                                           return_feat_list=seq_feature_list)
    deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'])

    query_emb = concat_fun(query_emb_list)
    keys_emb = concat_fun(keys_emb_list)
    deep_input_emb = concat_fun(deep_input_emb_list)

    if use_negsampling:
        neg_user_behavior_input = OrderedDict()
        for i, feat in enumerate(seq_feature_list):
            neg_user_behavior_input[feat] = Input(shape=(hist_len_max,), name='neg_seq_' + str(i) + '-' + feat)

        neg_uiseq_embed_list = get_embedding_vec_list(sparse_embedding_dict, neg_user_behavior_input,
                                                      feature_dim_dict["sparse"], seq_feature_list, )
        # [sparse_embedding_dict[feat](
        # neg_user_behavior_input[feat]) for feat in seq_feature_list]
        neg_concat_behavior = concat_fun(neg_uiseq_embed_list)

    else:
        neg_concat_behavior = None

    hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type,
                                          use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior,
                                          embedding_size=embedding_size, att_hidden_size=att_hidden_units,
                                          att_activation=att_activation,
                                          att_weight_normalization=att_weight_normalization, )

    deep_input_emb = Concatenate()([deep_input_emb, hist])

    deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb)
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()(
            [deep_input_emb] + list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn,
                 dnn_dropout, use_bn, seed)(deep_input_emb)
    final_logit = Dense(1, use_bias=False)(output)
    output = PredictionLayer(task)(final_logit)

    model_input_list = get_inputs_list(
        [sparse_input, dense_input, user_behavior_input])

    if use_negsampling:
        model_input_list += list(neg_user_behavior_input.values())

    model_input_list += [user_behavior_length]

    model = tf.keras.models.Model(inputs=model_input_list, outputs=output)

    if use_negsampling:
        model.add_loss(alpha * aux_loss_1)
    tf.keras.backend.get_session().run(tf.global_variables_initializer())
    return model
Example #17
0
def DeepFM(linear_feature_columns,
           dnn_feature_columns,
           embedding_size=8,
           use_fm=True,
           dnn_hidden_units=(128, 128),
           l2_reg_linear=0.00001,
           l2_reg_embedding=0.00001,
           l2_reg_dnn=0,
           init_std=0.0001,
           seed=1024,
           dnn_dropout=0,
           dnn_activation='relu',
           dnn_use_bn=False,
           task='binary',
           att=False,
           seq_len=None,
           cate_feats=[],
           cate2nunique={}):
    """Instantiates the DeepFM Network architecture.

    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param embedding_size: positive integer,sparse feature embedding_size
    :param use_fm: bool,use FM part or not
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_activation: Activation function to use in DNN
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """

    features = build_input_features(linear_feature_columns +
                                    dnn_feature_columns)

    inputs_list = list(features.values())

    sparse_embedding_list, dense_value_list, embedding_dict = input_from_feature_columns(
        features, dnn_feature_columns, embedding_size, l2_reg_embedding,
        init_std, seed)

    linear_logit = get_linear_logit(features,
                                    linear_feature_columns,
                                    l2_reg=l2_reg_linear,
                                    init_std=init_std,
                                    seed=seed,
                                    prefix='linear')

    fm_input = concat_fun(sparse_embedding_list, axis=1)
    fm_logit = FM()(fm_input)

    dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)

    input_lstm = Input(shape=(seq_len, 1 + len(cate_feats)), name='lstm_input')
    input_lstm_gap = Lambda(lambda x: x[:, :, 0:1])(input_lstm)
    concate_list = [input_lstm_gap]
    for i, cate in enumerate(cate_feats):
        input_cate = Lambda(lambda x: x[:, :, i + 1])(input_lstm)
        emb = embedding_dict.get(cate)
        if emb is None:
            emb = Embedding(output_dim=8, input_dim=cate2nunique[cate])
        concate_list.append(emb(input_cate))
    input_lstm_concat = Concatenate(axis=-1)(concate_list)
    if att:
        lstm_out = LSTM(units=128, return_sequences=True)(input_lstm_concat)
        attention_mul = attention_3d_block(lstm_out, seq_len)
        lstm_out = Lambda(lambda x: K.sum(x, axis=1))(attention_mul)
    else:
        lstm_out = LSTM(units=128, return_sequences=False)(input_lstm_concat)

    dnn_input = concat_fun([dnn_input, lstm_out])
    dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                  dnn_use_bn, seed)(dnn_input)
    dnn_logit = tf.keras.layers.Dense(1, use_bias=False,
                                      activation=None)(dnn_out)

    if len(dnn_hidden_units) == 0 and use_fm == False:  # only linear
        final_logit = linear_logit
    elif len(dnn_hidden_units) == 0 and use_fm == True:  # linear + FM
        final_logit = tf.keras.layers.add([linear_logit, fm_logit])
    elif len(dnn_hidden_units) > 0 and use_fm == False:  # linear + Deep
        final_logit = tf.keras.layers.add([linear_logit, dnn_logit])
    elif len(dnn_hidden_units) > 0 and use_fm == True:  # linear + FM + Deep
        final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit])
    else:
        raise NotImplementedError

    output = PredictionLayer(task)(final_logit)
    model = tf.keras.models.Model(inputs=inputs_list + [input_lstm],
                                  outputs=output)
    return model
Example #18
0
File: dsin.py Project: zwcdp/DSIN
def DSIN(
    feature_dim_dict,
    sess_feature_list,
    embedding_size=8,
    sess_max_count=5,
    sess_len_max=10,
    bias_encoding=False,
    att_embedding_size=1,
    att_head_num=8,
    dnn_hidden_units=(200, 80),
    dnn_activation='sigmoid',
    dnn_dropout=0,
    dnn_use_bn=False,
    l2_reg_dnn=0,
    l2_reg_embedding=1e-6,
    init_std=0.0001,
    seed=1024,
    task='binary',
):
    """Instantiates the Deep Session Interest Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]}
    :param sess_feature_list: list,to indicate session feature sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]``
    :param embedding_size: positive integer,sparse feature embedding_size.
    :param sess_max_count: positive int, to indicate the max number of sessions
    :param sess_len_max: positive int, to indicate the max length of each session
    :param bias_encoding: bool. Whether use bias encoding or postional encoding
    :param att_embedding_size: positive int, the embedding size of each attention head
    :param att_head_num: positive int, the number of attention head
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param dnn_activation: Activation function to use in deep net
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """
    check_feature_config_dict(feature_dim_dict)

    if (att_embedding_size * att_head_num !=
            len(sess_feature_list) * embedding_size):
        raise ValueError(
            "len(session_feature_lsit) * embedding_size must equal to att_embedding_size * att_head_num ,got %d * %d != %d *%d"
            % (len(sess_feature_list), embedding_size, att_embedding_size,
               att_head_num))

    sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input(
        feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in sess_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict["sparse"],
                                            sess_feature_list,
                                            sess_feature_list)

    query_emb = concat_fun(query_emb_list)

    deep_input_emb_list = get_embedding_vec_list(
        sparse_embedding_dict,
        sparse_input,
        feature_dim_dict["sparse"],
        mask_feat_list=sess_feature_list)
    deep_input_emb = concat_fun(deep_input_emb_list)
    deep_input_emb = Flatten()(NoMask()(deep_input_emb))

    tr_input = sess_interest_division(sparse_embedding_dict,
                                      user_behavior_input_dict,
                                      feature_dim_dict['sparse'],
                                      sess_feature_list,
                                      sess_max_count,
                                      bias_encoding=bias_encoding)

    Self_Attention = Transformer(att_embedding_size,
                                 att_head_num,
                                 dropout_rate=0,
                                 use_layer_norm=False,
                                 use_positional_encoding=(not bias_encoding),
                                 seed=seed,
                                 supports_masking=True,
                                 blinding=True)
    sess_fea = sess_interest_extractor(tr_input, sess_max_count,
                                       Self_Attention)

    interest_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True,
        supports_masking=False)([query_emb, sess_fea, user_sess_length])

    lstm_outputs = BiLSTM(
        len(sess_feature_list) * embedding_size,
        layers=2,
        res_layers=0,
        dropout_rate=0.2,
    )(sess_fea)
    lstm_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True)([query_emb, lstm_outputs, user_sess_length])

    deep_input_emb = Concatenate()([
        deep_input_emb,
        Flatten()(interest_attention_layer),
        Flatten()(lstm_attention_layer)
    ])
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 dnn_use_bn, seed)(deep_input_emb)
    output = Dense(1, use_bias=False, activation=None)(output)
    output = PredictionLayer(task)(output)

    sess_input_list = []
    # sess_input_length_list = []
    for i in range(sess_max_count):
        sess_name = "sess_" + str(i)
        sess_input_list.extend(
            get_inputs_list([user_behavior_input_dict[sess_name]]))
        # sess_input_length_list.append(user_behavior_length_dict[sess_name])

    model_input_list = get_inputs_list(
        [sparse_input, dense_input]) + sess_input_list + [user_sess_length]

    model = Model(inputs=model_input_list, outputs=output)

    return model
Example #19
0
def xDeepFM_MTL(
    linear_feature_columns,
    dnn_feature_columns,
    embedding_size=8,
    dnn_hidden_units=(256, 256),
    cin_layer_size=(
        256,
        256,
    ),
    cin_split_half=True,
    init_std=0.0001,
    l2_reg_dnn=0,
    dnn_dropout=0,
    dnn_activation='relu',
    dnn_use_bn=False,
    task_net_size=(128, ),
    l2_reg_linear=0.00001,
    l2_reg_embedding=0.00001,
    seed=1024,
):
    # check_feature_config_dict(feature_dim_dict)
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')

    features = build_input_features(linear_feature_columns +
                                    dnn_feature_columns)

    inputs_list = list(features.values())

    sparse_embedding_list, dense_value_list = input_from_feature_columns(
        features, dnn_feature_columns, embedding_size, l2_reg_embedding,
        init_std, seed)

    linear_logit = get_linear_logit(features,
                                    linear_feature_columns,
                                    l2_reg=l2_reg_linear,
                                    init_std=init_std,
                                    seed=seed,
                                    prefix='linear')

    fm_input = concat_fun(sparse_embedding_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, 0,
                       seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)

    dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)

    deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                   dnn_use_bn, seed)(dnn_input)

    like_out = DNN(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)

    like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit])

    output_like = PredictionLayer('binary', name='like')(like_logit)
    model = tf.keras.models.Model(inputs=inputs_list, outputs=output_like)
    return model
Example #20
0
def sess_interest_extractor(tr_input, sess_max_count, TR):
    tr_out = []
    for i in range(sess_max_count):
        tr_out.append(TR([tr_input[i], tr_input[i]]))
    sess_fea = concat_fun(tr_out, axis=1)
    return sess_fea
Example #21
0
File: din.py Project: zwcdp/DSIN
def DIN(feature_dim_dict,
        seq_feature_list,
        embedding_size=8,
        hist_len_max=16,
        dnn_use_bn=False,
        dnn_hidden_units=(200, 80),
        dnn_activation='relu',
        att_hidden_size=(80, 40),
        att_activation="dice",
        att_weight_normalization=False,
        l2_reg_dnn=0,
        l2_reg_embedding=1e-6,
        dnn_dropout=0,
        init_std=0.0001,
        seed=1024,
        task='binary'):
    """Instantiates the Deep Interest Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]}
    :param seq_feature_list: list,to indicate  sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]``
    :param embedding_size: positive integer,sparse feature embedding_size.
    :param hist_len_max: positive int, to indicate the max length of seq input
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param dnn_activation: Activation function to use in deep net
    :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net
    :param att_activation: Activation function to use in attention net
    :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit.
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """
    check_feature_config_dict(feature_dim_dict)

    sparse_input, dense_input, user_behavior_input = get_input(
        feature_dim_dict, seq_feature_list, hist_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in seq_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict['sparse'],
                                            seq_feature_list, seq_feature_list)

    keys_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                           user_behavior_input,
                                           feature_dim_dict['sparse'],
                                           seq_feature_list, seq_feature_list)

    deep_input_emb_list = get_embedding_vec_list(
        sparse_embedding_dict,
        sparse_input,
        feature_dim_dict['sparse'],
        mask_feat_list=seq_feature_list)

    keys_emb = concat_fun(keys_emb_list)
    deep_input_emb = concat_fun(deep_input_emb_list)

    query_emb = concat_fun(query_emb_list)

    hist = AttentionSequencePoolingLayer(
        att_hidden_size,
        att_activation,
        weight_normalization=att_weight_normalization,
        supports_masking=True)([query_emb, keys_emb])

    deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist])
    deep_input_emb = Flatten()(deep_input_emb)
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 dnn_use_bn, seed)(deep_input_emb)
    final_logit = Dense(1, use_bias=False)(output)

    output = PredictionLayer(task)(final_logit)
    model_input_list = get_inputs_list(
        [sparse_input, dense_input, user_behavior_input])

    model = Model(inputs=model_input_list, outputs=output)
    return model
Example #22
0
def DeepFM(linear_feature_columns,
           dnn_feature_columns,
           embedding_size=8,
           use_fm=True,
           only_dnn=False,
           dnn_hidden_units=(128, 128),
           l2_reg_linear=0.00001,
           l2_reg_embedding=0.00001,
           l2_reg_dnn=0,
           init_std=0.0001,
           seed=1024,
           dnn_dropout=0,
           dnn_activation='relu',
           dnn_use_bn=False,
           task='binary'):
    """Instantiates the DeepFM Network architecture.

    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param embedding_size: positive integer,sparse feature embedding_size
    :param use_fm: bool,use FM part or not
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_activation: Activation function to use in DNN
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """

    ## 为每个特征创建Input[1,]; feature == > {'feature1': Input[1,], ...}
    features = build_input_features(linear_feature_columns +
                                    dnn_feature_columns)

    ## [Input1, Input2, ... ]
    inputs_list = list(features.values())

    sparse_embedding_list, dense_value_list = input_from_feature_columns(
        features, dnn_feature_columns, embedding_size, l2_reg_embedding,
        init_std, seed)
    ## [feature_1对应的embedding层,下连接对应feature1的Input[1,]层,...], [feature_1对应的Input[1,]层,...]

    linear_logit = get_linear_logit(features,
                                    linear_feature_columns,
                                    l2_reg=l2_reg_linear,
                                    init_std=init_std,
                                    seed=seed,
                                    prefix='linear')

    # linear_logit_finish = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std,
    #                                 seed=seed, prefix='linear_finish')

    # linear_logit_like = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std,
    #                                 seed=seed, prefix='linear_like')
    ## 线性变换层,没有激活函数

    fm_input = concat_fun(sparse_embedding_list, axis=1)
    ## 稀疏embedding层concate在一起

    fm_logit = FM()(fm_input)
    # fm_logit_finish = FM()(fm_input)
    # fm_logit_like = FM()(fm_input)

    ## FM的二次项部分输出,不包含一次项和bias

    dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)

    # dnn_out = Dense(128, dnn_activation, l2_reg_dnn, dnn_dropout,
    #               dnn_use_bn, seed)(dnn_input)

    dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                  dnn_use_bn, seed)(dnn_input)
    mmoe_out = MMoE(units=16, num_experts=8, num_tasks=2)(dnn_out)

    [finish_in, like_in] = mmoe_out

    finish_out_1 = Dense(128,
                         dnn_activation,
                         kernel_regularizer=l2(l2_reg_dnn))(finish_in)
    finish_out = Dense(128, dnn_activation,
                       kernel_regularizer=l2(l2_reg_dnn))(finish_out_1)
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out_1 = Dense(128, dnn_activation,
                       kernel_regularizer=l2(l2_reg_dnn))(like_in)
    like_out = Dense(128, dnn_activation,
                     kernel_regularizer=l2(l2_reg_dnn))(like_out_1)

    # finish_logit_stop_grad = Lambda(lambda x: stop_gradient(x))(finish_out)
    # like_out_finish = concat_fun([like_out, finish_logit_stop_grad])

    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)

    dnn_logit = tf.keras.layers.Dense(1, use_bias=False,
                                      activation=None)(dnn_out)
    # if len(dnn_hidden_units) > 0 and only_dnn == True:
    #     final_logit = dnn_logit
    # elif len(dnn_hidden_units) == 0 and use_fm == False:  # only linear
    #     final_logit = linear_logit
    # elif len(dnn_hidden_units) == 0 and use_fm == True:  # linear + FM
    #     final_logit = tf.keras.layers.add([linear_logit, fm_logit])
    # elif len(dnn_hidden_units) > 0 and use_fm == False:  # linear + Deep
    #     final_logit = tf.keras.layers.add([linear_logit, dnn_logit])
    # elif len(dnn_hidden_units) > 0 and use_fm == True:  # linear + FM + Deep
    #     final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit])
    # else:
    #     raise NotImplementedError

    finish_logit = tf.keras.layers.add([linear_logit, fm_logit, finish_logit])
    like_logit = tf.keras.layers.add([linear_logit, fm_logit, like_logit])

    output_finish = PredictionLayer('binary', name='finish')(finish_logit)
    output_like = PredictionLayer('binary', name='like')(like_logit)
    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])
    return model
Example #23
0
def xDeepFM_MTL(feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=(256, 256,),
                cin_split_half=True,
                task_net_size=(128,), l2_reg_linear=0.00001, l2_reg_embedding=0.00001,
                seed=1024, ):
    """

    :param feature_dim_dict: 特征词典,包括特征名和特征列表
    :param embedding_size:
    :param hidden_size:
    :param cin_layer_size:
    :param cin_split_half:
    :param task_net_size: 网络层数
    :param l2_reg_linear:
    :param l2_reg_embedding:
    :param seed:
    :return:
    """
    # 判断sparse 和dense feature结构是否正确
    check_feature_config_dict(feature_dim_dict)
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')

    # Todo, add text sequence embedding
    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, 0.0001, seed)

    # video_input = tf.keras.layers.Input((128,))
    # inputs_list.append(video_input)

    # TODO, add other feature
    if 'txt' in feature_dim_dict:
        # txt_input = OrderedDict()
        for i, feat in enumerate(feature_dim_dict["txt"]):
            txt_input = tf.keras.layers.Input(
                shape=(feat.dimension,), name='txt_' + str(i) + '-' + feat.name)
            inputs_list.append(txt_input)

    fm_input = concat_fun(deep_emb_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu',
                       cin_split_half, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(1, activation=None, )(exFM_out)

    deep_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = MLP(hidden_size)(deep_input)

    finish_out = MLP(task_net_size)(deep_out)
    finish_logit = tf.keras.layers.Dense(
        1, use_bias=False, activation=None)(finish_out)

    like_out = MLP(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(
        1, use_bias=False, activation=None)(like_out)

    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit])
    like_logit = tf.keras.layers.add(
        [linear_logit, like_logit, exFM_logit])

    output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit)
    output_like = PredictionLayer('sigmoid', name='like')(like_logit)
    model = tf.keras.models.Model(inputs=inputs_list, outputs=[
                                  output_finish, output_like])
    return model
Example #24
0
def DeepFM(linear_feature_columns,
           dnn_feature_columns,
           embedding_size=8,
           use_fm=True,
           use_only_dnn=False,
           dnn_hidden_units=(128, 128),
           l2_reg_linear=0.00001,
           l2_reg_embedding=0.00001,
           l2_reg_dnn=0,
           init_std=0.0001,
           seed=1024,
           dnn_dropout=0,
           dnn_activation='relu',
           dnn_use_bn=False,
           task='binary'):
    """Instantiates the DeepFM Network architecture.

    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param embedding_size: positive integer,sparse feature embedding_size
    :param use_fm: bool,use FM part or not
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_activation: Activation function to use in DNN
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """

    features = build_input_features(linear_feature_columns +
                                    dnn_feature_columns)

    inputs_list = list(features.values())

    sparse_embedding_list, dense_value_list = input_from_feature_columns(
        features, dnn_feature_columns, embedding_size, l2_reg_embedding,
        init_std, seed)

    linear_logit = get_linear_logit(features,
                                    linear_feature_columns,
                                    l2_reg=l2_reg_linear,
                                    init_std=init_std,
                                    seed=seed,
                                    prefix='linear')

    fm_input = concat_fun(sparse_embedding_list, axis=1)
    fm_logit = FM()(fm_input)

    dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
    dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                  dnn_use_bn, seed)(dnn_input)
    dnn_logit = tf.keras.layers.Dense(1, use_bias=False,
                                      activation=None)(dnn_out)

    if use_only_dnn == True:
        final_logit = dnn_logit

    elif len(dnn_hidden_units) == 0 and use_fm == False:  # only linear
        final_logit = linear_logit
    elif len(dnn_hidden_units) == 0 and use_fm == True:  # linear + FM
        final_logit = tf.keras.layers.add([linear_logit, fm_logit])
    elif len(dnn_hidden_units) > 0 and use_fm == False:  # linear + Deep
        final_logit = tf.keras.layers.add([linear_logit, dnn_logit])
    elif len(dnn_hidden_units) > 0 and use_fm == True:  # linear + FM + Deep
        final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit])
    else:
        raise NotImplementedError

    output = PredictionLayer(task)(final_logit)
    model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
    return model
Example #25
0
def xDeepFM(feature_dim_dict,
            embedding_size=8,
            seed=1024,
            init_std=0.0001,
            l2_reg_linear=0.00001,
            l2_reg_embedding=0.00001,
            cin_layer_size=(256, 256),
            cin_split_half=True,
            cin_activation='relu',
            hidden_size=(256, 256),
            activation='relu',
            keep_prob=1,
            use_bn=False,
            l2_reg_deep=0,
            final_activation='sigmoid',
            use_video=False,
            use_audio=False):

    check_feature_config_dict(feature_dim_dict)
    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear,
        init_std, seed, True)

    if use_video:
        video_input = tf.keras.layers.Input(shape=(128, ), name='video')
        video_emb = tf.keras.layers.Dense(
            embedding_size,
            use_bias=False,
            kernel_regularizer=l2(l2_reg_embedding))(video_input)
        video_emb = tf.keras.layers.Reshape(
            (1, embedding_size), input_shape=(embedding_size, ))(video_emb)
        deep_emb_list.append(video_emb)
        inputs_list.append(video_input)

    if use_audio:
        audio_input = tf.keras.layers.Input(shape=(128, ), name='audio')
        audio_emb = tf.keras.layers.Dense(
            embedding_size,
            use_bias=False,
            kernel_regularizer=l2(l2_reg_embedding))(audio_input)
        audio_emb = tf.keras.layers.Reshape(
            (1, embedding_size), input_shape=(embedding_size, ))(audio_emb)
        deep_emb_list.append(audio_emb)
        inputs_list.append(audio_input)

    fm_input = concat_fun(deep_emb_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half,
                       seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)

    deep_input = tf.keras.layers.Flatten()(fm_input)

    deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn,
                   seed)(deep_input)
    deep_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(deep_out)

    final_logit = tf.keras.layers.add([linear_logit, deep_logit, exFM_logit])
    output = PredictionLayer(final_activation, name='output')(final_logit)

    model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
    return model