def sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, sparse_fg_list, sess_feture_list, sess_max_count, bias_encoding=True): tr_input = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) keys_emb_list = get_embedding_vec_list( sparse_embedding_dict, user_behavior_input_dict[sess_name], sparse_fg_list, sess_feture_list, sess_feture_list) # [sparse_embedding_dict[feat](user_behavior_input_dict[sess_name][feat]) for feat in # sess_feture_list] keys_emb = concat_fun(keys_emb_list) tr_input.append(keys_emb) if bias_encoding: tr_input = BiasEncoding(sess_max_count)(tr_input) return tr_input
def CapsuleNet(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=50, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', num_capsule=8, dim_capsule=2, routing_iterations=3, att_hidden_size=(64, 16), att_activation="dice", att_weight_normalization=True, att_embedding_size=1, att_head_num=8, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, alpha=1e-6, seed=1024, task='binary'): check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) scores = AttentionSequencePoolingLayer( att_hidden_units=att_hidden_size, att_activation=att_activation, weight_normalization=att_weight_normalization, return_score=True)([query_emb, keys_emb, user_behavior_length]) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=True, use_positional_encoding=True, seed=seed, supports_masking=False, blinding=True) keys_emb = Self_Attention( [keys_emb, keys_emb, user_behavior_length, user_behavior_length]) cap = Capsule(num_capsule=num_capsule, dim_capsule=dim_capsule, routings=routing_iterations, share_weights=True, supports_masking=True) hist_cap = cap(keys_emb, scores=scores) disp_loss = get_disp_loss(hist_cap) hist_cap = Reshape([1, num_capsule * dim_capsule])(NoMask()(hist_cap)) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Concatenate()([deep_input_emb, hist_cap]) deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb)) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) model.add_loss(alpha * disp_loss) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def DSIN( feature_dim_dict, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', l2_reg_dnn=0, l2_reg_embedding=1e-6, task='binary', dnn_dropout=0, init_std=0.0001, seed=1024, encoding='bias', ): check_feature_config_dict(feature_dim_dict) print( 'sess_count', sess_max_count, 'encoding', encoding, ) sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input( feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], sess_feature_list, sess_feature_list) query_emb = concat_fun(query_emb_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], mask_feat_list=sess_feature_list) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Flatten()(NoMask()(deep_input_emb)) be_flag = True if encoding == 'bias' else False tr_input = sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, feature_dim_dict['sparse'], sess_feature_list, sess_max_count, bias_encoding=be_flag) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not be_flag), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( len(sess_feature_list) * embedding_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) deep_input_emb = Concatenate()([ deep_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed)(deep_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] #sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) #sess_input_length_list.append(user_behavior_length_dict[sess_name]) model_input_list = get_inputs_list( [sparse_input, dense_input]) + sess_input_list + [user_sess_length] model = Model(inputs=model_input_list, outputs=output) return model
def DIN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, dnn_use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], mask_feat_list=seq_feature_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) query_emb = concat_fun(query_emb_list) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist]) deep_input_emb = Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model = Model(inputs=model_input_list, outputs=output) return model
def DSIN( feature_dim_dict, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, bias_encoding=False, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', dnn_dropout=0, dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, init_std=0.0001, seed=1024, task='binary', ): """Instantiates the Deep Session Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param sess_feature_list: list,to indicate session feature sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param sess_max_count: positive int, to indicate the max number of sessions :param sess_len_max: positive int, to indicate the max length of each session :param bias_encoding: bool. Whether use bias encoding or postional encoding :param att_embedding_size: positive int, the embedding size of each attention head :param att_head_num: positive int, the number of attention head :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) if (att_embedding_size * att_head_num != len(sess_feature_list) * embedding_size): raise ValueError( "len(session_feature_lsit) * embedding_size must equal to att_embedding_size * att_head_num ,got %d * %d != %d *%d" % (len(sess_feature_list), embedding_size, att_embedding_size, att_head_num)) sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input( feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], sess_feature_list, sess_feature_list) query_emb = concat_fun(query_emb_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], mask_feat_list=sess_feature_list) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Flatten()(NoMask()(deep_input_emb)) tr_input = sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, feature_dim_dict['sparse'], sess_feature_list, sess_max_count, bias_encoding=bias_encoding) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not bias_encoding), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( len(sess_feature_list) * embedding_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) deep_input_emb = Concatenate()([ deep_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] # sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) # sess_input_length_list.append(user_behavior_length_dict[sess_name]) model_input_list = get_inputs_list( [sparse_input, dense_input]) + sess_input_list + [user_sess_length] model = Model(inputs=model_input_list, outputs=output) return model
def DIEN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in enumerate(feature_dim_dict["sparse"])} query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) if use_negsampling: neg_user_behavior_input = OrderedDict() for i, feat in enumerate(seq_feature_list): neg_user_behavior_input[feat] = Input(shape=(hist_len_max,), name='neg_seq_' + str(i) + '-' + feat) neg_uiseq_embed_list = get_embedding_vec_list(sparse_embedding_dict, neg_user_behavior_input, feature_dim_dict["sparse"], seq_feature_list, ) # [sparse_embedding_dict[feat]( # neg_user_behavior_input[feat]) for feat in seq_feature_list] neg_concat_behavior = concat_fun(neg_uiseq_embed_list) else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, embedding_size=embedding_size, att_hidden_size=att_hidden_units, att_activation=att_activation, att_weight_normalization=att_weight_normalization, ) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) if use_negsampling: model_input_list += list(neg_user_behavior_input.values()) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def BST(feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_embedding_size=1, att_head_num=8, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) # sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, # embeddings_initializer=RandomNormal( # mean=0.0, stddev=init_std, seed=seed), # embeddings_regularizer=l2( # l2_reg_embedding), # name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in # enumerate(feature_dim_dict["sparse"])} # print(sparse_embedding_dict) sparse_embedding_dict = {feat.name: Embedding(tf.cast(feat.dimension, tf.int32), embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"])} # deep_emb_list = get_embedding_vec_list( # deep_sparse_emb_dict, sparse_input_dict, feature_dim_dict['sparse']) query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) print("prev: {0}".format(keys_emb)) # hist_cap = Capsule( # num_capsule=8, dim_capsule=2, # routings=3, share_weights=True)(NoMask()(keys_emb)) # print("now: {0}".format(hist_cap)) # # exit(0) # # keys_emb = concat_fun(keys_emb_list) # hist_cap = Reshape([1, 16])(hist_cap) deep_input_emb = concat_fun(deep_input_emb_list) print("deep input emb: ", deep_input_emb) # print("hist_cap: ", hist_cap) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=True, seed=seed, supports_masking=False, blinding=True) # print("now: {0}".format(hist)) hists = [] for key_emb in keys_emb_list: hist = Self_Attention([key_emb, key_emb, user_behavior_length, user_behavior_length]) hists.append(hist) hist = concat_fun(hists) # Tensor("concatenate_2/concat:0", shape=(?, 50, 8), dtype=float32) # <tf.Tensor 'concatenate_3/concat:0' shape=(?, 4, 8) dtype=float32> deep_input_emb = Concatenate()([deep_input_emb, hist]) # print(deep_input_emb) deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb)) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model