Beispiel #1
0
def xDeepFM_MTL(
    feature_dim_dict,
    embedding_size=8,
    hidden_size=(256, 256),
    cin_layer_size=(
        256,
        256,
    ),
    cin_split_half=True,
    task_net_size=(128, ),
    l2_reg_linear=0.00001,
    l2_reg_embedding=0.00001,
    seed=1024,
):
    check_feature_config_dict(feature_dim_dict)
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')
    print("[xdeepfm] feature_dim_dict: {}".format(feature_dim_dict))

    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear,
        0.0001, seed)
    print("[xdeepfm] deep_emb_list:", deep_emb_list)
    print("[xdeepfm] linear_logit:", linear_logit)
    print("[xdeepfm] inputs_list:", inputs_list)

    # video_input = tf.keras.layers.Input((128,))
    # inputs_list.append(video_input)

    fm_input = concat_fun(deep_emb_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)

    deep_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = MLP(hidden_size)(deep_input)

    finish_out = MLP(task_net_size)(deep_out)
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out = MLP(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)

    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit])
    like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit])

    output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit)
    output_like = PredictionLayer('sigmoid', name='like')(like_logit)
    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])
    return model
Beispiel #2
0
def myAutoInt(
    feature_dim_dict,
    embedding_size=8,
    att_layer_num=3,
    att_embedding_size=8,
    att_head_num=4,
    att_res=True,
    hidden_size=(256, 256),
    activation='relu',
    l2_reg_deep=0,
    l2_reg_embedding=1e-5,
    use_bn=False,
    keep_prob=1.0,
    init_std=0.0001,
    seed=1024,
    final_activation='sigmoid',
):
    if len(hidden_size) <= 0 and att_layer_num <= 0:
        raise ValueError("Either hidden_layer or att_layer_num must > 0")
    check_feature_config_dict(feature_dim_dict)

    deep_emb_list, _, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, 0, init_std, seed,
        False)
    att_input = concat_fun(deep_emb_list, axis=1)

    for _ in range(att_layer_num):
        att_input = InteractingLayer(att_embedding_size, att_head_num,
                                     att_res)(att_input)
    att_output = tf.keras.layers.Flatten()(att_input)

    deep_input = tf.keras.layers.Flatten()(concat_fun(deep_emb_list))
    deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn,
                   seed)(deep_input)

    finish_out = tf.keras.layers.Concatenate()([att_output, deep_out])
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out = tf.keras.layers.Concatenate()([att_output, deep_out])
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)

    output_finish = PredictionLayer(final_activation,
                                    name='finish')(finish_logit)
    output_like = PredictionLayer(final_activation, name='like')(like_logit)

    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])

    return model
def MTL_with_Title(
    feature_dim_dict,
    embedding_size=8,
    hidden_size=(256, 256),
    cin_layer_size=(
        256,
        256,
    ),
    cin_split_half=True,
    task_net_size=(128, ),
    l2_reg_linear=0.00001,
    l2_reg_embedding=0.00001,
    seed=1024,
):
    check_feature_config_dict(feature_dim_dict)
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')

    # xDeepFM Model

    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear,
        0.0001, seed)

    fm_input = concat_fun(deep_emb_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)

    deep_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = MLP(hidden_size)(deep_input)

    finish_out = MLP(task_net_size)(deep_out)
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out = MLP(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)

    # Add Title Features

    title_input = Input(shape=(35, ), dtype='int32', name='title_input')
    title_embedding = Embedding(output_dim=32,
                                input_dim=134545,
                                input_length=35)(title_input)
    lstm_out = LSTM(units=32, return_sequences=True)(title_embedding)
    avg_out = GlobalAveragePooling1D()(lstm_out)
    dense1 = Dense(32, activation='relu')(avg_out)
    dense2 = Dense(1, activation='relu')(dense1)

    #

    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit, dense2])
    like_logit = tf.keras.layers.add(
        [linear_logit, like_logit, exFM_logit, dense2])

    output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit)
    output_like = PredictionLayer('sigmoid', name='like')(like_logit)
    print(str(inputs_list))
    inputs_list.append(title_input)
    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])
    return model
Beispiel #4
0
def xDeepFM_MTL(
    feature_dim_dict,
    embedding_size=8,
    hidden_size=(256, 256),
    cin_layer_size=(
        256,
        256,
    ),
    cin_split_half=True,
    task_net_size=(128, ),
    l2_reg_linear=0.000001,
    l2_reg_embedding=0.000001,
    seed=1024,
):
    check_feature_config_dict(feature_dim_dict)  # 未知
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')

    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear,
        0.0001, seed)

    # video_input = tf.keras.layers.Input((128,))
    # inputs_list.append(video_input)
    fm_input = concat_fun(deep_emb_list, axis=1)  # 模型输入
    '''
    构建CIN,默认CIN的size为[256,256],激活函数为relu,输入为
    (batch_size,field_size,embedding_size),输出为(batch_size,feature_num)。
    如果split_half为True,那么隐藏层的feature map只有一半的会连接到输出单元。
    '''
    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)  # 全连接输出到Output_unit
    '''
     Flatten将输入除了batch的维度,其他维度拉直,得到的输出为(batch_size, sum_size)
     将embedding特征直接输入MLP
     '''
    deep_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = MLP(hidden_size)(deep_input)
    '''
     将deep_out过一个MLP,并全连接到finish的logits输出,同样的操作应用于like的logits输出
     '''
    finish_out = MLP(task_net_size)(deep_out)
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out = MLP(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)
    '''
     最终的finish的logit由linear_logit,finish_logit\like_logit和exFM_logit三者叠加。
     '''
    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit])
    like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit])
    '''
     将logit通过sigmoid转化为概率,通过输入和输出构建model
     '''
    output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit)
    output_like = PredictionLayer('sigmoid', name='like')(like_logit)
    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])
    return model
Beispiel #5
0
def xDeepFM_MTL(feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=(256, 256,),
                cin_split_half=True,
                task_net_size=(128,), l2_reg_linear=0.00001, l2_reg_embedding=0.00001,
                seed=1024, ):
    """

    :param feature_dim_dict: 特征词典,包括特征名和特征列表
    :param embedding_size:
    :param hidden_size:
    :param cin_layer_size:
    :param cin_split_half:
    :param task_net_size: 网络层数
    :param l2_reg_linear:
    :param l2_reg_embedding:
    :param seed:
    :return:
    """
    # 判断sparse 和dense feature结构是否正确
    check_feature_config_dict(feature_dim_dict)
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')

    # Todo, add text sequence embedding
    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, 0.0001, seed)

    # video_input = tf.keras.layers.Input((128,))
    # inputs_list.append(video_input)

    # TODO, add other feature
    if 'txt' in feature_dim_dict:
        # txt_input = OrderedDict()
        for i, feat in enumerate(feature_dim_dict["txt"]):
            txt_input = tf.keras.layers.Input(
                shape=(feat.dimension,), name='txt_' + str(i) + '-' + feat.name)
            inputs_list.append(txt_input)

    fm_input = concat_fun(deep_emb_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu',
                       cin_split_half, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(1, activation=None, )(exFM_out)

    deep_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = MLP(hidden_size)(deep_input)

    finish_out = MLP(task_net_size)(deep_out)
    finish_logit = tf.keras.layers.Dense(
        1, use_bias=False, activation=None)(finish_out)

    like_out = MLP(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(
        1, use_bias=False, activation=None)(like_out)

    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit])
    like_logit = tf.keras.layers.add(
        [linear_logit, like_logit, exFM_logit])

    output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit)
    output_like = PredictionLayer('sigmoid', name='like')(like_logit)
    model = tf.keras.models.Model(inputs=inputs_list, outputs=[
                                  output_finish, output_like])
    return model
Beispiel #6
0
def xDeepFM(feature_dim_dict,
            embedding_size=8,
            seed=1024,
            init_std=0.0001,
            l2_reg_linear=0.00001,
            l2_reg_embedding=0.00001,
            cin_layer_size=(256, 256),
            cin_split_half=True,
            cin_activation='relu',
            hidden_size=(256, 256),
            activation='relu',
            keep_prob=1,
            use_bn=False,
            l2_reg_deep=0,
            final_activation='sigmoid',
            use_video=False,
            use_audio=False):

    check_feature_config_dict(feature_dim_dict)
    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear,
        init_std, seed, True)

    if use_video:
        video_input = tf.keras.layers.Input(shape=(128, ), name='video')
        video_emb = tf.keras.layers.Dense(
            embedding_size,
            use_bias=False,
            kernel_regularizer=l2(l2_reg_embedding))(video_input)
        video_emb = tf.keras.layers.Reshape(
            (1, embedding_size), input_shape=(embedding_size, ))(video_emb)
        deep_emb_list.append(video_emb)
        inputs_list.append(video_input)

    if use_audio:
        audio_input = tf.keras.layers.Input(shape=(128, ), name='audio')
        audio_emb = tf.keras.layers.Dense(
            embedding_size,
            use_bias=False,
            kernel_regularizer=l2(l2_reg_embedding))(audio_input)
        audio_emb = tf.keras.layers.Reshape(
            (1, embedding_size), input_shape=(embedding_size, ))(audio_emb)
        deep_emb_list.append(audio_emb)
        inputs_list.append(audio_input)

    fm_input = concat_fun(deep_emb_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half,
                       seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)

    deep_input = tf.keras.layers.Flatten()(fm_input)

    deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn,
                   seed)(deep_input)
    deep_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(deep_out)

    final_logit = tf.keras.layers.add([linear_logit, deep_logit, exFM_logit])
    output = PredictionLayer(final_activation, name='output')(final_logit)

    model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
    return model