Example #1
0
def SDM(user_feature_columns,
        item_feature_columns,
        history_feature_list,
        num_sampled=5,
        units=64,
        rnn_layers=2,
        dropout_rate=0.2,
        rnn_num_res=1,
        num_head=4,
        l2_reg_embedding=1e-6,
        dnn_activation='tanh',
        init_std=0.0001,
        seed=1024):
    """Instantiates the Sequential Deep Matching Model architecture.

    :param user_feature_columns: An iterable containing user's features used by  the model.
    :param item_feature_columns: An iterable containing item's features used by  the model.
    :param history_feature_list: list,to indicate short and prefer sequence sparse field
    :param num_sampled: int, the number of classes to randomly sample per batch.
    :param units: int, dimension for each output layer
    :param rnn_layers: int, layer number of rnn
    :param dropout_rate: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param rnn_num_res: int. The number of residual layers in rnn layers
    :param num_head: int int, the number of attention head
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_activation: Activation function to use in deep net
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :return: A Keras model instance.

    """

    if len(item_feature_columns) > 1:
        raise ValueError("Now MIND only support 1 item feature like item_id")
    item_feature_column = item_feature_columns[0]
    item_feature_name = item_feature_column.name
    item_vocabulary_size = item_feature_columns[0].vocabulary_size

    features = build_input_features(user_feature_columns)

    user_inputs_list = list(features.values())

    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat),
               user_feature_columns)) if user_feature_columns else []
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat),
               user_feature_columns)) if user_feature_columns else []
    if len(dense_feature_columns) != 0:
        raise ValueError("Now SDM don't support dense feature")
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat),
               user_feature_columns)) if user_feature_columns else []

    sparse_varlen_feature_columns = []
    prefer_history_columns = []
    short_history_columns = []

    prefer_fc_names = list(map(lambda x: "prefer_" + x, history_feature_list))
    short_fc_names = list(map(lambda x: "short_" + x, history_feature_list))
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        if feature_name in prefer_fc_names:
            prefer_history_columns.append(fc)

        elif feature_name in short_fc_names:
            short_history_columns.append(fc)
        else:
            sparse_varlen_feature_columns.append(fc)

    embedding_matrix_dict = create_embedding_matrix(user_feature_columns +
                                                    item_feature_columns,
                                                    l2_reg_embedding,
                                                    init_std,
                                                    seed,
                                                    prefix="")

    item_features = build_input_features(item_feature_columns)
    item_inputs_list = list(item_features.values())

    prefer_emb_list = embedding_lookup(embedding_matrix_dict,
                                       features,
                                       prefer_history_columns,
                                       prefer_fc_names,
                                       prefer_fc_names,
                                       to_list=True)  # L^u
    short_emb_list = embedding_lookup(embedding_matrix_dict,
                                      features,
                                      short_history_columns,
                                      short_fc_names,
                                      short_fc_names,
                                      to_list=True)  # S^u
    # dense_value_list = get_dense_input(features, dense_feature_columns)
    user_emb_list = embedding_lookup(embedding_matrix_dict,
                                     features,
                                     sparse_feature_columns,
                                     to_list=True)

    sequence_embed_dict = varlen_embedding_lookup(
        embedding_matrix_dict, features, sparse_varlen_feature_columns)
    sequence_embed_list = get_varlen_pooling_list(
        sequence_embed_dict,
        features,
        sparse_varlen_feature_columns,
        to_list=True)
    user_emb_list += sequence_embed_list  # e^u
    # if len(user_emb_list) > 0 or len(dense_value_list) > 0:
    #     user_emb_feature = combined_dnn_input(user_emb_list, dense_value_list)
    user_emb = concat_func(user_emb_list)
    user_emb_output = Dense(units,
                            activation=dnn_activation,
                            name="user_emb_output")(user_emb)

    prefer_sess_length = features['prefer_sess_length']
    prefer_att_outputs = []
    for i, prefer_emb in enumerate(prefer_emb_list):
        prefer_attention_output = AttentionSequencePoolingLayer(
            dropout_rate=0)([user_emb_output, prefer_emb, prefer_sess_length])
        prefer_att_outputs.append(prefer_attention_output)
    prefer_att_concat = concat_func(prefer_att_outputs)
    prefer_output = Dense(units,
                          activation=dnn_activation,
                          name="prefer_output")(prefer_att_concat)

    short_sess_length = features['short_sess_length']
    short_emb_concat = concat_func(short_emb_list)
    short_emb_input = Dense(units,
                            activation=dnn_activation,
                            name="short_emb_input")(short_emb_concat)

    short_rnn_output = DynamicMultiRNN(
        num_units=units,
        return_sequence=True,
        num_layers=rnn_layers,
        num_residual_layers=rnn_num_res,
        dropout_rate=dropout_rate)([short_emb_input, short_sess_length])

    short_att_output = SelfMultiHeadAttention(
        num_units=units,
        head_num=num_head,
        dropout_rate=dropout_rate,
        future_binding=True,
        use_layer_norm=True)([short_rnn_output, short_sess_length
                              ])  # [batch_size, time, num_units]

    short_output = UserAttention(num_units=units, activation=dnn_activation, use_res=True, dropout_rate=dropout_rate) \
        ([user_emb_output, short_att_output, short_sess_length])

    gate_input = concat_func([prefer_output, short_output, user_emb_output])
    gate = Dense(units, activation='sigmoid')(gate_input)

    gate_output = Lambda(
        lambda x: tf.multiply(x[0], x[1]) + tf.multiply(1 - x[0], x[2]))(
            [gate, short_output, prefer_output])
    gate_output_reshape = Lambda(lambda x: tf.squeeze(x, 1))(gate_output)

    item_index = EmbeddingIndex(list(range(item_vocabulary_size)))(
        item_features[item_feature_name])
    item_embedding_matrix = embedding_matrix_dict[item_feature_name]
    item_embedding_weight = NoMask()(item_embedding_matrix(item_index))

    pooling_item_embedding_weight = PoolingLayer()([item_embedding_weight])

    output = SampledSoftmaxLayer(num_sampled=num_sampled)([
        pooling_item_embedding_weight, gate_output_reshape,
        item_features[item_feature_name]
    ])
    model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output)

    model.__setattr__("user_input", user_inputs_list)
    model.__setattr__("user_embedding", gate_output_reshape)

    model.__setattr__("item_input", item_inputs_list)
    model.__setattr__(
        "item_embedding",
        get_item_embedding(pooling_item_embedding_weight,
                           item_features[item_feature_name]))

    return model
Example #2
0
def MIND(dnn_feature_columns,
         history_feature_list,
         target_song_size,
         k_max=2,
         dnn_use_bn=False,
         user_hidden_unit=64,
         dnn_activation='relu',
         l2_reg_dnn=0,
         l2_reg_embedding=1e-6,
         dnn_dropout=0,
         init_std=0.0001,
         seed=1024):
    """
    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param history_feature_list: list,to indicate  sequence sparse field
    :param target_song_size: int, the total size of the recall songs
    :param k_max: int, the max size of user interest embedding
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param user_hidden_unit: int. user dnn hidden layer size
    :param dnn_activation: Activation function to use in deep net
    :param l2_reg_dnn:  L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout:  float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :return:
    """
    features = build_input_features(dnn_feature_columns)
    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    history_feature_columns = []
    sparse_varlen_feature_columns = []
    history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        if feature_name in history_fc_names:
            history_feature_columns.append(fc)
        else:
            sparse_varlen_feature_columns.append(fc)

    hist_len = features['hist_len']

    inputs_list = list(features.values())
    embedding_dict = create_embedding_matrix(dnn_feature_columns,
                                             l2_reg_embedding,
                                             init_std,
                                             seed,
                                             prefix="")
    history_emb_list = embedding_lookup(embedding_dict,
                                        features,
                                        history_feature_columns,
                                        history_fc_names,
                                        history_fc_names,
                                        to_list=True)
    history_emb = concat_func(history_emb_list, mask=False)

    target_emb_list = embedding_lookup(embedding_dict,
                                       features,
                                       sparse_feature_columns, ['item'],
                                       history_feature_list,
                                       to_list=True)
    target_emb_tmp = concat_func(target_emb_list, mask=False)
    target_emb_size = target_emb_tmp.get_shape()[-1].value

    target_emb = tf.keras.layers.Lambda(
        shape_target, arguments={'target_emb_size':
                                 target_emb_size})(target_emb_tmp)

    dnn_input_emb_list = embedding_lookup(embedding_dict,
                                          features,
                                          sparse_feature_columns,
                                          mask_feat_list=history_feature_list,
                                          to_list=True)
    sequence_embed_dict = varlen_embedding_lookup(
        embedding_dict, features, sparse_varlen_feature_columns)
    sequence_embed_list = get_varlen_pooling_list(
        sequence_embed_dict,
        features,
        sparse_varlen_feature_columns,
        to_list=True)
    dnn_input_emb_list += sequence_embed_list

    deep_input_emb = concat_func(dnn_input_emb_list)
    user_other_feature = Flatten()(deep_input_emb)

    max_len = history_emb.get_shape()[1].value

    high_capsule = CapsuleLayer(input_units=target_emb_size,
                                out_units=target_emb_size,
                                max_len=max_len,
                                k_max=k_max)((history_emb, hist_len))
    other_feature_tile = tf.keras.layers.Lambda(
        tile_user_otherfeat, arguments={'k_max': k_max})(user_other_feature)

    user_deep_input = Concatenate()(
        [NoMask()(other_feature_tile), high_capsule])

    user_embeddings = DNN((user_hidden_unit, target_emb_size),
                          dnn_activation,
                          l2_reg_dnn,
                          dnn_dropout,
                          dnn_use_bn,
                          seed,
                          name="user_embedding")(user_deep_input)

    k_user = tf.cast(tf.maximum(
        1.,
        tf.minimum(tf.cast(k_max, dtype="float32"),
                   tf.log1p(tf.cast(hist_len, dtype="float32")) / tf.log(2.))),
                     dtype="int64")  # [B,1] forword/Cast_2

    user_embedding_final = DotProductAttentionLayer(
        shape=[target_emb_size, target_emb_size])(
            (user_embeddings, target_emb), seq_length=k_user, max_len=k_max)

    output = SampledSoftmaxLayer(
        target_song_size=target_song_size,
        target_emb_size=target_emb_size)(inputs=(user_embedding_final,
                                                 features['item']))

    model = Model(inputs=inputs_list, outputs=output)
    return model
Example #3
0
def DSIN(
    feature_dim_dict,
    sess_feature_list,
    embedding_size=8,
    sess_max_count=5,
    sess_len_max=10,
    att_embedding_size=1,
    att_head_num=8,
    dnn_hidden_units=(200, 80),
    dnn_activation='sigmoid',
    l2_reg_dnn=0,
    l2_reg_embedding=1e-6,
    task='binary',
    dnn_dropout=0,
    init_std=0.0001,
    seed=1024,
    encoding='bias',
):

    check_feature_config_dict(feature_dim_dict)

    print(
        'sess_count',
        sess_max_count,
        'encoding',
        encoding,
    )

    sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input(
        feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in sess_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict["sparse"],
                                            sess_feature_list,
                                            sess_feature_list)

    query_emb = concat_fun(query_emb_list)

    deep_input_emb_list = get_embedding_vec_list(
        sparse_embedding_dict,
        sparse_input,
        feature_dim_dict["sparse"],
        mask_feat_list=sess_feature_list)
    deep_input_emb = concat_fun(deep_input_emb_list)
    deep_input_emb = Flatten()(NoMask()(deep_input_emb))

    be_flag = True if encoding == 'bias' else False
    tr_input = sess_interest_division(sparse_embedding_dict,
                                      user_behavior_input_dict,
                                      feature_dim_dict['sparse'],
                                      sess_feature_list,
                                      sess_max_count,
                                      bias_encoding=be_flag)

    Self_Attention = Transformer(att_embedding_size,
                                 att_head_num,
                                 dropout_rate=0,
                                 use_layer_norm=False,
                                 use_positional_encoding=(not be_flag),
                                 seed=seed,
                                 supports_masking=True,
                                 blinding=True)
    sess_fea = sess_interest_extractor(tr_input, sess_max_count,
                                       Self_Attention)

    interest_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True,
        supports_masking=False)([query_emb, sess_fea, user_sess_length])

    lstm_outputs = BiLSTM(
        len(sess_feature_list) * embedding_size,
        layers=2,
        res_layers=0,
        dropout_rate=0.2,
    )(sess_fea)
    lstm_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True)([query_emb, lstm_outputs, user_sess_length])

    deep_input_emb = Concatenate()([
        deep_input_emb,
        Flatten()(interest_attention_layer),
        Flatten()(lstm_attention_layer)
    ])
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 False, seed)(deep_input_emb)
    output = Dense(1, use_bias=False, activation=None)(output)
    output = PredictionLayer(task)(output)

    sess_input_list = []
    #sess_input_length_list = []
    for i in range(sess_max_count):
        sess_name = "sess_" + str(i)
        sess_input_list.extend(
            get_inputs_list([user_behavior_input_dict[sess_name]]))
        #sess_input_length_list.append(user_behavior_length_dict[sess_name])

    model_input_list = get_inputs_list(
        [sparse_input, dense_input]) + sess_input_list + [user_sess_length]

    model = Model(inputs=model_input_list, outputs=output)

    return model
Example #4
0
def CapsuleNet(feature_dim_dict,
               seq_feature_list,
               embedding_size=8,
               hist_len_max=50,
               use_bn=False,
               dnn_hidden_units=(200, 80),
               dnn_activation='sigmoid',
               num_capsule=8,
               dim_capsule=2,
               routing_iterations=3,
               att_hidden_size=(64, 16),
               att_activation="dice",
               att_weight_normalization=True,
               att_embedding_size=1,
               att_head_num=8,
               l2_reg_dnn=0,
               l2_reg_embedding=1e-6,
               dnn_dropout=0,
               init_std=0.0001,
               alpha=1e-6,
               seed=1024,
               task='binary'):
    check_feature_config_dict(feature_dim_dict)

    sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input(
        feature_dim_dict, seq_feature_list, hist_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in seq_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict["sparse"],
                                            return_feat_list=seq_feature_list)
    keys_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                           user_behavior_input,
                                           feature_dim_dict['sparse'],
                                           return_feat_list=seq_feature_list)
    deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                                 sparse_input,
                                                 feature_dim_dict['sparse'])

    query_emb = concat_fun(query_emb_list)
    keys_emb = concat_fun(keys_emb_list)
    scores = AttentionSequencePoolingLayer(
        att_hidden_units=att_hidden_size,
        att_activation=att_activation,
        weight_normalization=att_weight_normalization,
        return_score=True)([query_emb, keys_emb, user_behavior_length])

    Self_Attention = Transformer(att_embedding_size,
                                 att_head_num,
                                 dropout_rate=0,
                                 use_layer_norm=True,
                                 use_positional_encoding=True,
                                 seed=seed,
                                 supports_masking=False,
                                 blinding=True)

    keys_emb = Self_Attention(
        [keys_emb, keys_emb, user_behavior_length, user_behavior_length])

    cap = Capsule(num_capsule=num_capsule,
                  dim_capsule=dim_capsule,
                  routings=routing_iterations,
                  share_weights=True,
                  supports_masking=True)
    hist_cap = cap(keys_emb, scores=scores)
    disp_loss = get_disp_loss(hist_cap)
    hist_cap = Reshape([1, num_capsule * dim_capsule])(NoMask()(hist_cap))
    deep_input_emb = concat_fun(deep_input_emb_list)
    deep_input_emb = Concatenate()([deep_input_emb, hist_cap])

    deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb))
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 use_bn, seed)(deep_input_emb)
    final_logit = Dense(1, use_bias=False)(output)
    output = PredictionLayer(task)(final_logit)

    model_input_list = get_inputs_list(
        [sparse_input, dense_input, user_behavior_input])

    model_input_list += [user_behavior_length]

    model = tf.keras.models.Model(inputs=model_input_list, outputs=output)
    model.add_loss(alpha * disp_loss)
    tf.keras.backend.get_session().run(tf.global_variables_initializer())
    return model
Example #5
0
def MIND(user_feature_columns,
         item_feature_columns,
         num_sampled=5,
         k_max=2,
         p=1.0,
         dynamic_k=False,
         user_dnn_hidden_units=(64, 32),
         dnn_activation='relu',
         dnn_use_bn=False,
         l2_reg_dnn=0,
         l2_reg_embedding=1e-6,
         dnn_dropout=0,
         output_activation='linear',
         seed=1024):
    """Instantiates the MIND Model architecture.

    :param user_feature_columns: An iterable containing user's features used by  the model.
    :param item_feature_columns: An iterable containing item's features used by  the model.
    :param num_sampled: int, the number of classes to randomly sample per batch.
    :param k_max: int, the max size of user interest embedding
    :param p: float,the parameter for adjusting the attention distribution in LabelAwareAttention.
    :param dynamic_k: bool, whether or not use dynamic interest number
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower
    :param dnn_activation: Activation function to use in deep net
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param l2_reg_dnn:  L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout:  float in [0,1), the probability we will drop out a given DNN coordinate.
    :param seed: integer ,to use as random seed.
    :param output_activation: Activation function to use in output layer
    :return: A Keras model instance.

    """

    if len(item_feature_columns) > 1:
        raise ValueError("Now MIND only support 1 item feature like item_id")
    item_feature_column = item_feature_columns[0]
    item_feature_name = item_feature_column.name
    item_vocabulary_size = item_feature_columns[0].vocabulary_size
    item_embedding_dim = item_feature_columns[0].embedding_dim
    # item_index = Input(tensor=tf.constant([list(range(item_vocabulary_size))]))

    history_feature_list = [item_feature_name]

    features = build_input_features(user_feature_columns)
    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat),
               user_feature_columns)) if user_feature_columns else []
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat),
               user_feature_columns)) if user_feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat),
               user_feature_columns)) if user_feature_columns else []
    history_feature_columns = []
    sparse_varlen_feature_columns = []
    history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        if feature_name in history_fc_names:
            history_feature_columns.append(fc)
        else:
            sparse_varlen_feature_columns.append(fc)
    seq_max_len = history_feature_columns[0].maxlen
    inputs_list = list(features.values())

    embedding_matrix_dict = create_embedding_matrix(user_feature_columns +
                                                    item_feature_columns,
                                                    l2_reg_embedding,
                                                    seed=seed,
                                                    prefix="")

    item_features = build_input_features(item_feature_columns)

    query_emb_list = embedding_lookup(embedding_matrix_dict,
                                      item_features,
                                      item_feature_columns,
                                      history_feature_list,
                                      history_feature_list,
                                      to_list=True)
    keys_emb_list = embedding_lookup(embedding_matrix_dict,
                                     features,
                                     history_feature_columns,
                                     history_fc_names,
                                     history_fc_names,
                                     to_list=True)
    dnn_input_emb_list = embedding_lookup(embedding_matrix_dict,
                                          features,
                                          sparse_feature_columns,
                                          mask_feat_list=history_feature_list,
                                          to_list=True)
    dense_value_list = get_dense_input(features, dense_feature_columns)

    sequence_embed_dict = varlen_embedding_lookup(
        embedding_matrix_dict, features, sparse_varlen_feature_columns)
    sequence_embed_list = get_varlen_pooling_list(
        sequence_embed_dict,
        features,
        sparse_varlen_feature_columns,
        to_list=True)

    dnn_input_emb_list += sequence_embed_list

    # keys_emb = concat_func(keys_emb_list, mask=True)
    # query_emb = concat_func(query_emb_list, mask=True)

    history_emb = PoolingLayer()(NoMask()(keys_emb_list))
    target_emb = PoolingLayer()(NoMask()(query_emb_list))

    # target_emb_size = target_emb.get_shape()[-1].value
    # max_len = history_emb.get_shape()[1].value
    hist_len = features['hist_len']

    high_capsule = CapsuleLayer(input_units=item_embedding_dim,
                                out_units=item_embedding_dim,
                                max_len=seq_max_len,
                                k_max=k_max)((history_emb, hist_len))

    if len(dnn_input_emb_list) > 0 or len(dense_value_list) > 0:
        user_other_feature = combined_dnn_input(dnn_input_emb_list,
                                                dense_value_list)

        other_feature_tile = tf.keras.layers.Lambda(
            tile_user_otherfeat, arguments={'k_max':
                                            k_max})(user_other_feature)

        user_deep_input = Concatenate()(
            [NoMask()(other_feature_tile), high_capsule])
    else:
        user_deep_input = high_capsule

    user_embeddings = DNN(user_dnn_hidden_units,
                          dnn_activation,
                          l2_reg_dnn,
                          dnn_dropout,
                          dnn_use_bn,
                          output_activation=output_activation,
                          seed=seed,
                          name="user_embedding")(user_deep_input)
    item_inputs_list = list(item_features.values())

    item_embedding_matrix = embedding_matrix_dict[item_feature_name]

    item_index = EmbeddingIndex(list(range(item_vocabulary_size)))(
        item_features[item_feature_name])

    item_embedding_weight = NoMask()(item_embedding_matrix(item_index))

    pooling_item_embedding_weight = PoolingLayer()([item_embedding_weight])

    if dynamic_k:
        user_embedding_final = LabelAwareAttention(
            k_max=k_max,
            pow_p=p,
        )((user_embeddings, target_emb, hist_len))
    else:
        user_embedding_final = LabelAwareAttention(
            k_max=k_max,
            pow_p=p,
        )((user_embeddings, target_emb))

    output = SampledSoftmaxLayer(num_sampled=num_sampled)([
        pooling_item_embedding_weight, user_embedding_final,
        item_features[item_feature_name]
    ])
    model = Model(inputs=inputs_list + item_inputs_list, outputs=output)

    model.__setattr__("user_input", inputs_list)
    model.__setattr__("user_embedding", user_embeddings)

    model.__setattr__("item_input", item_inputs_list)
    model.__setattr__(
        "item_embedding",
        get_item_embedding(pooling_item_embedding_weight,
                           item_features[item_feature_name]))

    return model
def KDD_DIN(dnn_feature_columns,
            history_feature_list,
            dnn_use_bn=False,
            dnn_hidden_units=(200, 80),
            dnn_activation='relu',
            att_hidden_size=(80, 40),
            att_activation="dice",
            att_weight_normalization=False,
            l2_reg_dnn=0,
            l2_reg_embedding=1e-6,
            dnn_dropout=0,
            init_std=0.0001,
            seed=1024,
            task='binary'):
    """Instantiates the Deep Interest Network architecture.

    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param history_feature_list: list,to indicate  sequence sparse field
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param dnn_activation: Activation function to use in deep net
    :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net
    :param att_activation: Activation function to use in attention net
    :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit.
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """

    features = build_input_features(dnn_feature_columns)

    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []

    history_feature_columns = []
    sparse_varlen_feature_columns = []
    history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        if feature_name in history_fc_names:
            history_feature_columns.append(fc)
        else:
            sparse_varlen_feature_columns.append(fc)

    inputs_list = list(features.values())

    embedding_dict = kdd_create_embedding_matrix(dnn_feature_columns,
                                                 l2_reg_embedding,
                                                 init_std,
                                                 seed,
                                                 prefix="")

    query_emb_list = embedding_lookup(embedding_dict,
                                      features,
                                      sparse_feature_columns,
                                      history_feature_list,
                                      history_feature_list,
                                      to_list=True)
    keys_emb_list = embedding_lookup(embedding_dict,
                                     features,
                                     history_feature_columns,
                                     history_fc_names,
                                     history_fc_names,
                                     to_list=True)
    dnn_input_emb_list = embedding_lookup(embedding_dict,
                                          features,
                                          sparse_feature_columns,
                                          mask_feat_list=history_feature_list,
                                          to_list=True)
    dense_value_list = get_dense_input(features, dense_feature_columns)

    sequence_embed_dict = varlen_embedding_lookup(
        embedding_dict, features, sparse_varlen_feature_columns)
    sequence_embed_list = get_varlen_pooling_list(
        sequence_embed_dict,
        features,
        sparse_varlen_feature_columns,
        to_list=True)

    dnn_input_emb_list += sequence_embed_list

    keys_emb = concat_func(keys_emb_list, mask=True)
    deep_input_emb = concat_func(dnn_input_emb_list)
    query_emb = concat_func(query_emb_list, mask=True)
    hist = AttentionSequencePoolingLayer(
        att_hidden_size,
        att_activation,
        weight_normalization=att_weight_normalization,
        supports_masking=True)([query_emb, keys_emb])

    deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist])
    deep_input_emb = Flatten()(deep_input_emb)
    dnn_input = combined_dnn_input([deep_input_emb], dense_value_list)
    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 dnn_use_bn, seed)(dnn_input)
    final_logit = Dense(1, use_bias=False)(output)

    output = PredictionLayer(task)(final_logit)

    model = Model(inputs=inputs_list, outputs=output)
    return model
Example #7
0
    def _model_fn(features, labels, mode, config):
        train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
        with variable_scope(DNN_SCOPE_NAME):
            sparse_feature_columns = []
            dense_feature_columns = []
            varlen_sparse_feature_columns = []

            for feat in dnn_feature_columns:

                new_feat_name = list(feat.parse_example_spec.keys())[0]
                if new_feat_name in ['hist_price_id', 'hist_des_id']:
                    varlen_sparse_feature_columns.append(
                        VarLenSparseFeat(SparseFeat(new_feat_name,
                                                    vocabulary_size=100,
                                                    embedding_dim=32,
                                                    use_hash=False),
                                         maxlen=3))
                elif is_embedding(feat):
                    sparse_feature_columns.append(
                        SparseFeat(new_feat_name,
                                   vocabulary_size=feat[0]._num_buckets + 1,
                                   embedding_dim=feat.dimension))
                else:
                    dense_feature_columns.append(DenseFeat(new_feat_name))

            history_feature_columns = []
            sparse_varlen_feature_columns = []
            history_fc_names = list(
                map(lambda x: "hist_" + x, history_feature_list))
            for fc in varlen_sparse_feature_columns:
                feature_name = fc.name
                if feature_name in history_fc_names:
                    history_feature_columns.append(fc)
                else:
                    sparse_varlen_feature_columns.append(fc)
            my_feature_columns = sparse_feature_columns + dense_feature_columns + varlen_sparse_feature_columns
            embedding_dict = create_embedding_matrix(my_feature_columns,
                                                     l2_reg_embedding,
                                                     seed,
                                                     prefix="")

            query_emb_list = embedding_lookup(embedding_dict,
                                              features,
                                              sparse_feature_columns,
                                              history_feature_list,
                                              history_feature_list,
                                              to_list=True)
            print('query_emb_list', query_emb_list)
            print('embedding_dict', embedding_dict)
            print('haha')
            print('history_feature_columns', history_feature_columns)
            print('haha')
            keys_emb_list = embedding_lookup(embedding_dict,
                                             features,
                                             history_feature_columns,
                                             history_fc_names,
                                             history_fc_names,
                                             to_list=True)
            print('keys_emb_list', keys_emb_list)
            dnn_input_emb_list = embedding_lookup(
                embedding_dict,
                features,
                sparse_feature_columns,
                mask_feat_list=history_feature_list,
                to_list=True)
            print('dnn_input_emb_list', dnn_input_emb_list)
            dense_value_list = get_dense_input(features, dense_feature_columns)
            sequence_embed_dict = varlen_embedding_lookup(
                embedding_dict, features, sparse_varlen_feature_columns)
            sequence_embed_list = get_varlen_pooling_list(
                sequence_embed_dict,
                features,
                sparse_varlen_feature_columns,
                to_list=True)

            dnn_input_emb_list += sequence_embed_list

            keys_emb = concat_func(keys_emb_list, mask=True)
            deep_input_emb = concat_func(dnn_input_emb_list)
            query_emb = concat_func(query_emb_list, mask=True)
            hist = AttentionSequencePoolingLayer(
                att_hidden_size,
                att_activation,
                weight_normalization=att_weight_normalization,
                supports_masking=True)([query_emb, keys_emb])

            deep_input_emb = tf.keras.layers.Concatenate()(
                [NoMask()(deep_input_emb), hist])
            deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb)
            dnn_input = combined_dnn_input([deep_input_emb], dense_value_list)
            output = DNN(dnn_hidden_units,
                         dnn_activation,
                         l2_reg_dnn,
                         dnn_dropout,
                         dnn_use_bn,
                         seed=seed)(dnn_input)
            final_logit = tf.keras.layers.Dense(
                1,
                use_bias=False,
                kernel_initializer=tf.keras.initializers.glorot_normal(seed))(
                    output)
        #             logits_list.append(final_logit)
        #         logits = add_func(logits_list)
        #             print(labels)
        #             tf.summary.histogram(final_logit + '/final_logit', final_logit)
        return deepctr_model_fn(features,
                                mode,
                                final_logit,
                                labels,
                                task,
                                linear_optimizer,
                                dnn_optimizer,
                                training_chief_hooks=training_chief_hooks)
Example #8
0
def YoutubeDNN(user_feature_columns, item_feature_columns, num_sampled=5,
               user_dnn_hidden_units=(64, 32),
               dnn_activation='relu', dnn_use_bn=False,
               l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, output_activation='linear', seed=1024, ):
    """Instantiates the YoutubeDNN Model architecture.

    :param user_feature_columns: An iterable containing user's features used by  the model.
    :param item_feature_columns: An iterable containing item's features used by  the model.
    :param num_sampled: int, the number of classes to randomly sample per batch.
    :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower
    :param dnn_activation: Activation function to use in deep net
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param seed: integer ,to use as random seed.
    :param output_activation: Activation function to use in output layer
    :return: A Keras model instance.

    """

    if len(item_feature_columns) > 1:
        raise ValueError("Now YoutubeNN only support 1 item feature like item_id")
    item_feature_name = item_feature_columns[0].name
    item_vocabulary_size = item_feature_columns[0].vocabulary_size

    embedding_matrix_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding,
                                                    seed=seed)

    user_features = build_input_features(user_feature_columns)
    user_inputs_list = list(user_features.values())
    user_sparse_embedding_list, user_dense_value_list = input_from_feature_columns(user_features, user_feature_columns,
                                                                                   l2_reg_embedding, seed=seed,
                                                                                   embedding_matrix_dict=embedding_matrix_dict)
    user_dnn_input = combined_dnn_input(user_sparse_embedding_list, user_dense_value_list)

    item_features = build_input_features(item_feature_columns)
    item_inputs_list = list(item_features.values())
    user_dnn_out = DNN(user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                       dnn_use_bn, output_activation=output_activation, seed=seed)(user_dnn_input)

    item_index = EmbeddingIndex(list(range(item_vocabulary_size)))(item_features[item_feature_name])

    item_embedding_matrix = embedding_matrix_dict[
        item_feature_name]
    item_embedding_weight = NoMask()(item_embedding_matrix(item_index))

    pooling_item_embedding_weight = PoolingLayer()([item_embedding_weight])

    output = SampledSoftmaxLayer(num_sampled=num_sampled)(
        [pooling_item_embedding_weight, user_dnn_out, item_features[item_feature_name]])
    model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output)

    model.__setattr__("user_input", user_inputs_list)
    model.__setattr__("user_embedding", user_dnn_out)

    model.__setattr__("item_input", item_inputs_list)
    model.__setattr__("item_embedding",
                      get_item_embedding(pooling_item_embedding_weight, item_features[item_feature_name]))

    return model
Example #9
0
File: din.py Project: zwcdp/DSIN
def DIN(feature_dim_dict,
        seq_feature_list,
        embedding_size=8,
        hist_len_max=16,
        dnn_use_bn=False,
        dnn_hidden_units=(200, 80),
        dnn_activation='relu',
        att_hidden_size=(80, 40),
        att_activation="dice",
        att_weight_normalization=False,
        l2_reg_dnn=0,
        l2_reg_embedding=1e-6,
        dnn_dropout=0,
        init_std=0.0001,
        seed=1024,
        task='binary'):
    """Instantiates the Deep Interest Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]}
    :param seq_feature_list: list,to indicate  sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]``
    :param embedding_size: positive integer,sparse feature embedding_size.
    :param hist_len_max: positive int, to indicate the max length of seq input
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param dnn_activation: Activation function to use in deep net
    :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net
    :param att_activation: Activation function to use in attention net
    :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit.
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """
    check_feature_config_dict(feature_dim_dict)

    sparse_input, dense_input, user_behavior_input = get_input(
        feature_dim_dict, seq_feature_list, hist_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in seq_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict['sparse'],
                                            seq_feature_list, seq_feature_list)

    keys_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                           user_behavior_input,
                                           feature_dim_dict['sparse'],
                                           seq_feature_list, seq_feature_list)

    deep_input_emb_list = get_embedding_vec_list(
        sparse_embedding_dict,
        sparse_input,
        feature_dim_dict['sparse'],
        mask_feat_list=seq_feature_list)

    keys_emb = concat_fun(keys_emb_list)
    deep_input_emb = concat_fun(deep_input_emb_list)

    query_emb = concat_fun(query_emb_list)

    hist = AttentionSequencePoolingLayer(
        att_hidden_size,
        att_activation,
        weight_normalization=att_weight_normalization,
        supports_masking=True)([query_emb, keys_emb])

    deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist])
    deep_input_emb = Flatten()(deep_input_emb)
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 dnn_use_bn, seed)(deep_input_emb)
    final_logit = Dense(1, use_bias=False)(output)

    output = PredictionLayer(task)(final_logit)
    model_input_list = get_inputs_list(
        [sparse_input, dense_input, user_behavior_input])

    model = Model(inputs=model_input_list, outputs=output)
    return model
Example #10
0
File: dsin.py Project: zwcdp/DSIN
def DSIN(
    feature_dim_dict,
    sess_feature_list,
    embedding_size=8,
    sess_max_count=5,
    sess_len_max=10,
    bias_encoding=False,
    att_embedding_size=1,
    att_head_num=8,
    dnn_hidden_units=(200, 80),
    dnn_activation='sigmoid',
    dnn_dropout=0,
    dnn_use_bn=False,
    l2_reg_dnn=0,
    l2_reg_embedding=1e-6,
    init_std=0.0001,
    seed=1024,
    task='binary',
):
    """Instantiates the Deep Session Interest Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]}
    :param sess_feature_list: list,to indicate session feature sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]``
    :param embedding_size: positive integer,sparse feature embedding_size.
    :param sess_max_count: positive int, to indicate the max number of sessions
    :param sess_len_max: positive int, to indicate the max length of each session
    :param bias_encoding: bool. Whether use bias encoding or postional encoding
    :param att_embedding_size: positive int, the embedding size of each attention head
    :param att_head_num: positive int, the number of attention head
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param dnn_activation: Activation function to use in deep net
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """
    check_feature_config_dict(feature_dim_dict)

    if (att_embedding_size * att_head_num !=
            len(sess_feature_list) * embedding_size):
        raise ValueError(
            "len(session_feature_lsit) * embedding_size must equal to att_embedding_size * att_head_num ,got %d * %d != %d *%d"
            % (len(sess_feature_list), embedding_size, att_embedding_size,
               att_head_num))

    sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input(
        feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in sess_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict["sparse"],
                                            sess_feature_list,
                                            sess_feature_list)

    query_emb = concat_fun(query_emb_list)

    deep_input_emb_list = get_embedding_vec_list(
        sparse_embedding_dict,
        sparse_input,
        feature_dim_dict["sparse"],
        mask_feat_list=sess_feature_list)
    deep_input_emb = concat_fun(deep_input_emb_list)
    deep_input_emb = Flatten()(NoMask()(deep_input_emb))

    tr_input = sess_interest_division(sparse_embedding_dict,
                                      user_behavior_input_dict,
                                      feature_dim_dict['sparse'],
                                      sess_feature_list,
                                      sess_max_count,
                                      bias_encoding=bias_encoding)

    Self_Attention = Transformer(att_embedding_size,
                                 att_head_num,
                                 dropout_rate=0,
                                 use_layer_norm=False,
                                 use_positional_encoding=(not bias_encoding),
                                 seed=seed,
                                 supports_masking=True,
                                 blinding=True)
    sess_fea = sess_interest_extractor(tr_input, sess_max_count,
                                       Self_Attention)

    interest_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True,
        supports_masking=False)([query_emb, sess_fea, user_sess_length])

    lstm_outputs = BiLSTM(
        len(sess_feature_list) * embedding_size,
        layers=2,
        res_layers=0,
        dropout_rate=0.2,
    )(sess_fea)
    lstm_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True)([query_emb, lstm_outputs, user_sess_length])

    deep_input_emb = Concatenate()([
        deep_input_emb,
        Flatten()(interest_attention_layer),
        Flatten()(lstm_attention_layer)
    ])
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 dnn_use_bn, seed)(deep_input_emb)
    output = Dense(1, use_bias=False, activation=None)(output)
    output = PredictionLayer(task)(output)

    sess_input_list = []
    # sess_input_length_list = []
    for i in range(sess_max_count):
        sess_name = "sess_" + str(i)
        sess_input_list.extend(
            get_inputs_list([user_behavior_input_dict[sess_name]]))
        # sess_input_length_list.append(user_behavior_length_dict[sess_name])

    model_input_list = get_inputs_list(
        [sparse_input, dense_input]) + sess_input_list + [user_sess_length]

    model = Model(inputs=model_input_list, outputs=output)

    return model
Example #11
0
def BST(feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16, use_bn=False, dnn_hidden_units=(200, 80),
        dnn_activation='relu', att_embedding_size=1, att_head_num=8,
        l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'):
    """Instantiates the Deep Interest Evolution Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]}
    :param seq_feature_list: list,to indicate  sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]``
    :param embedding_size: positive integer,sparse feature embedding_size.
    :param hist_len_max: positive int, to indicate the max length of seq input
    :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
    :param dnn_activation: Activation function to use in DNN
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """
    check_feature_config_dict(feature_dim_dict)

    sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input(
        feature_dim_dict, seq_feature_list, hist_len_max)
    # sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size,
    #                                               embeddings_initializer=RandomNormal(
    #                                                   mean=0.0, stddev=init_std, seed=seed),
    #                                               embeddings_regularizer=l2(
    #                                                   l2_reg_embedding),
    #                                               name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in
    #                          enumerate(feature_dim_dict["sparse"])}
    # print(sparse_embedding_dict)
    sparse_embedding_dict = {feat.name: Embedding(tf.cast(feat.dimension, tf.int32), embedding_size,
                                                  embeddings_initializer=RandomNormal(
                                                      mean=0.0, stddev=init_std, seed=seed),
                                                  embeddings_regularizer=l2(
                                                      l2_reg_embedding),
                                                  name='sparse_emb_' +
                                                       str(i) + '-' + feat.name,
                                                  mask_zero=(feat.name in seq_feature_list)) for i, feat in
                             enumerate(feature_dim_dict["sparse"])}
    # deep_emb_list = get_embedding_vec_list(
    # deep_sparse_emb_dict, sparse_input_dict, feature_dim_dict['sparse'])
    query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"],
                                            return_feat_list=seq_feature_list)
    keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'],
                                           return_feat_list=seq_feature_list)
    deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'])

    query_emb = concat_fun(query_emb_list)
    keys_emb = concat_fun(keys_emb_list)
    print("prev: {0}".format(keys_emb))
    # hist_cap = Capsule(
    #     num_capsule=8, dim_capsule=2,
    #     routings=3, share_weights=True)(NoMask()(keys_emb))
    # print("now: {0}".format(hist_cap))
    # # exit(0)
    # # keys_emb = concat_fun(keys_emb_list)
    # hist_cap = Reshape([1, 16])(hist_cap)
    deep_input_emb = concat_fun(deep_input_emb_list)
    print("deep input emb: ", deep_input_emb)
    # print("hist_cap: ", hist_cap)
    Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False,
                                 use_positional_encoding=True, seed=seed, supports_masking=False,
                                 blinding=True)
    # print("now: {0}".format(hist))
    hists = []
    for key_emb in keys_emb_list:
        hist = Self_Attention([key_emb, key_emb, user_behavior_length, user_behavior_length])
        hists.append(hist)
    hist = concat_fun(hists)

    # Tensor("concatenate_2/concat:0", shape=(?, 50, 8), dtype=float32)
    # <tf.Tensor 'concatenate_3/concat:0' shape=(?, 4, 8) dtype=float32>
    deep_input_emb = Concatenate()([deep_input_emb, hist])
    # print(deep_input_emb)
    deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb))
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()(
            [deep_input_emb] + list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn,
                 dnn_dropout, use_bn, seed)(deep_input_emb)
    final_logit = Dense(1, use_bias=False)(output)
    output = PredictionLayer(task)(final_logit)

    model_input_list = get_inputs_list(
        [sparse_input, dense_input, user_behavior_input])

    model_input_list += [user_behavior_length]

    model = tf.keras.models.Model(inputs=model_input_list, outputs=output)

    tf.keras.backend.get_session().run(tf.global_variables_initializer())
    return model