def get_linear_logit(features, feature_columns, units=1, l2_reg=0, init_std=0.0001, seed=1024, prefix='linear'): linear_emb_list = [ input_from_feature_columns(features, feature_columns, 1, l2_reg, init_std, seed, prefix=prefix + str(i))[0] for i in range(units)] _, dense_input_list,_ = input_from_feature_columns(features, feature_columns, 1, l2_reg, init_std, seed, prefix=prefix) linear_logit_list = [] for i in range(units): if len(linear_emb_list[0]) > 0 and len(dense_input_list) > 0: sparse_input = concat_fun(linear_emb_list[i]) dense_input = concat_fun(dense_input_list) linear_logit = Linear(l2_reg, mode=2)([sparse_input, dense_input]) elif len(linear_emb_list[0]) > 0: sparse_input = concat_fun(linear_emb_list[i]) linear_logit = Linear(l2_reg, mode=0)(sparse_input) elif len(dense_input_list) > 0: dense_input = concat_fun(dense_input_list) linear_logit = Linear(l2_reg, mode=1)(dense_input) else: raise NotImplementedError linear_logit_list.append(linear_logit) return concat_fun(linear_logit_list)
def combined_dnn_input(sparse_embedding_list, dense_value_list): if len(sparse_embedding_list) > 0 and len(dense_value_list) > 0: sparse_dnn_input = Flatten()(concat_fun(sparse_embedding_list)) dense_dnn_input = Flatten()(concat_fun(dense_value_list)) return concat_fun([sparse_dnn_input, dense_dnn_input]) elif len(sparse_embedding_list) > 0: return Flatten()(concat_fun(sparse_embedding_list)) elif len(dense_value_list) > 0: return Flatten()(concat_fun(dense_value_list)) else: raise NotImplementedError
def myAutoInt( feature_dim_dict, embedding_size=8, att_layer_num=3, att_embedding_size=8, att_head_num=4, att_res=True, hidden_size=(256, 256), activation='relu', l2_reg_deep=0, l2_reg_embedding=1e-5, use_bn=False, keep_prob=1.0, init_std=0.0001, seed=1024, final_activation='sigmoid', ): if len(hidden_size) <= 0 and att_layer_num <= 0: raise ValueError("Either hidden_layer or att_layer_num must > 0") check_feature_config_dict(feature_dim_dict) deep_emb_list, _, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, 0, init_std, seed, False) att_input = concat_fun(deep_emb_list, axis=1) for _ in range(att_layer_num): att_input = InteractingLayer(att_embedding_size, att_head_num, att_res)(att_input) att_output = tf.keras.layers.Flatten()(att_input) deep_input = tf.keras.layers.Flatten()(concat_fun(deep_emb_list)) deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn, seed)(deep_input) finish_out = tf.keras.layers.Concatenate()([att_output, deep_out]) finish_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out) like_out = tf.keras.layers.Concatenate()([att_output, deep_out]) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) output_finish = PredictionLayer(final_activation, name='finish')(finish_logit) output_like = PredictionLayer(final_activation, name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model
def xDeepFM_MTL( feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=( 256, 256, ), cin_split_half=True, task_net_size=(128, ), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, seed=1024, ): check_feature_config_dict(feature_dim_dict) if len(task_net_size) < 1: raise ValueError('task_net_size must be at least one layer') print("[xdeepfm] feature_dim_dict: {}".format(feature_dim_dict)) deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, 0.0001, seed) print("[xdeepfm] deep_emb_list:", deep_emb_list) print("[xdeepfm] linear_logit:", linear_logit) print("[xdeepfm] inputs_list:", inputs_list) # video_input = tf.keras.layers.Input((128,)) # inputs_list.append(video_input) fm_input = concat_fun(deep_emb_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) deep_input = tf.keras.layers.Flatten()(fm_input) deep_out = MLP(hidden_size)(deep_input) finish_out = MLP(task_net_size)(deep_out) finish_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out) like_out = MLP(task_net_size)(deep_out) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) finish_logit = tf.keras.layers.add( [linear_logit, finish_logit, exFM_logit]) like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit]) output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit) output_like = PredictionLayer('sigmoid', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model
def sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, sparse_fg_list, sess_feture_list, sess_max_count, bias_encoding=True): tr_input = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) keys_emb_list = get_embedding_vec_list( sparse_embedding_dict, user_behavior_input_dict[sess_name], sparse_fg_list, sess_feture_list, sess_feture_list) # [sparse_embedding_dict[feat](user_behavior_input_dict[sess_name][feat]) for feat in # sess_feture_list] keys_emb = concat_fun(keys_emb_list) tr_input.append(keys_emb) if bias_encoding: tr_input = BiasEncoding(sess_max_count)(tr_input) return tr_input
def SVD(feature_columns, embedding_size=100, l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, init_std=0.0001, seed=1024, bi_dropout=0, dnn_dropout=0): """Instantiates the Neural Factorization Machine architecture. :param feature_columns: An iterable containing all the sparse features used by model. :param num_factors: number of units in latent representation layer. :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear part. :param l2_reg_dnn: float . L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param biout_dropout: When not ``None``, the probability we will drop out the output of BiInteractionPooling Layer. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param act_func: Activation function to use at prediction layer. :param task: str, ``"binary"`` for 'binary_crossentropy' loss or ``"multiclass"`` for 'categorical_crossentropy' loss :return: A Keras model instance. """ features = build_input_features(feature_columns) input_layers = list(features.values()) sparse_embedding_list, _ = input_from_feature_columns( features, feature_columns, embedding_size, l2_reg_embedding, init_std, seed) fm_input = concat_fun(sparse_embedding_list, axis=1) fm_logit = FM()(fm_input) model = tf.keras.models.Model(inputs=input_layers, outputs=fm_logit) return model
def MT_xDeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidden_units=(256, 256), cin_layer_size=(128, 128,), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, l2_reg_cin=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the xDeepFM architecture. :param flag_columns: :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network :param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit :param cin_activation: activation function used on feature maps :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: L2 regularizer strength applied to embedding vector :param l2_reg_dnn: L2 regularizer strength applied to deep net :param l2_reg_cin: L2 regularizer strength applied to CIN. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns(features,dnn_feature_columns, embedding_size, l2_reg_embedding,init_std, seed) linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') fm_input = concat_fun(sparse_embedding_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half, l2_reg_cin, seed)(fm_input) exFM_logit = tf.keras.layers.Dense(4, activation=None, )(exFM_out) dnn_input = combined_dnn_input(sparse_embedding_list,dense_value_list) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) deep_logit = tf.keras.layers.Dense( 4, use_bias=False, activation=None)(deep_out) if len(dnn_hidden_units) == 0 and len(cin_layer_size) == 0: # only linear final_logit = linear_logit elif len(dnn_hidden_units) == 0 and len(cin_layer_size) > 0: # linear + CIN final_logit = tf.keras.layers.add([linear_logit, exFM_logit]) elif len(dnn_hidden_units) > 0 and len(cin_layer_size) == 0: # linear + Deep final_logit = tf.keras.layers.add([linear_logit, deep_logit]) elif len(dnn_hidden_units) > 0 and len(cin_layer_size) > 0: # linear + CIN + Deep final_logit = tf.keras.layers.add( [linear_logit, deep_logit, exFM_logit]) else: raise NotImplementedError output_units = PredictionLayer(task)(final_logit) # output = None # for i in range(len(flag_columns)): # print(i) # selected_index = [0, 1] if flag_columns[i] else [2, 3] # if output != None: # output = tf.concat([output, tf.reshape(tf.gather(output_units[i, :], selected_index), (1, -1))], axis=0) # else: # output = tf.reshape(tf.gather(output_units[i, :], selected_index), (1, -1)) finish = tf.cast(1-features['u_region_id'], dtype=tf.float32)*output_units[:,0]+\ tf.cast(features['u_region_id'], dtype=tf.float32)*output_units[:,1] like = tf.cast(1-features['u_region_id'], dtype=tf.float32)*output_units[:,2]+\ tf.cast(1-features['u_region_id'], dtype=tf.float32)*output_units[:,3] # mask = tf.cond(pred=tf.equal(features['u_region_id'], tf.constant(value = 1, dtype = tf.int32)), # true_fn=lambda: [True, True, False, False], false_fn=lambda: [False, False, True, True]) # output = tf.reshape(tf.boolean_mask(output_units, mask), shape=[-1, 2]) # finish = output[:, 0] # like = output[:, 1] # print(output) model = tf.keras.models.Model(inputs=inputs_list, outputs=[finish, like]) return model
def MTL_with_Title( feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=( 256, 256, ), cin_split_half=True, task_net_size=(128, ), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, seed=1024, ): check_feature_config_dict(feature_dim_dict) if len(task_net_size) < 1: raise ValueError('task_net_size must be at least one layer') # xDeepFM Model deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, 0.0001, seed) fm_input = concat_fun(deep_emb_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) deep_input = tf.keras.layers.Flatten()(fm_input) deep_out = MLP(hidden_size)(deep_input) finish_out = MLP(task_net_size)(deep_out) finish_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out) like_out = MLP(task_net_size)(deep_out) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) # Add Title Features title_input = Input(shape=(35, ), dtype='int32', name='title_input') title_embedding = Embedding(output_dim=32, input_dim=134545, input_length=35)(title_input) lstm_out = LSTM(units=32, return_sequences=True)(title_embedding) avg_out = GlobalAveragePooling1D()(lstm_out) dense1 = Dense(32, activation='relu')(avg_out) dense2 = Dense(1, activation='relu')(dense1) # finish_logit = tf.keras.layers.add( [linear_logit, finish_logit, exFM_logit, dense2]) like_logit = tf.keras.layers.add( [linear_logit, like_logit, exFM_logit, dense2]) output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit) output_like = PredictionLayer('sigmoid', name='like')(like_logit) print(str(inputs_list)) inputs_list.append(title_input) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model
def xDeepFM_MTL( linear_feature_columns, dnn_feature_columns, gate_feature_columns, embedding_size=8, dnn_hidden_units=(256, 256), cin_layer_size=( 256, 256, ), cin_split_half=True, init_std=0.0001, l2_reg_dnn=0, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_net_size=(128, ), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, seed=1024, ): # check_feature_config_dict(feature_dim_dict) if len(task_net_size) < 1: raise ValueError('task_net_size must be at least one layer') features = build_input_features(linear_feature_columns + dnn_feature_columns + gate_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) gate = get_dense_input(features, gate_feature_columns)[0] linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') fm_input = concat_fun(sparse_embedding_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, 0, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) # dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_input = tf.keras.layers.Flatten()(fm_input) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) finish_out1 = DNN(task_net_size)(deep_out) finish_logit1 = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out1) like_out1 = DNN(task_net_size)(deep_out) like_logit1 = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out1) finish_out2 = DNN(task_net_size)(deep_out) finish_logit2 = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out2) like_out2 = DNN(task_net_size)(deep_out) like_logit2 = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out2) # condition = tf.placeholder("float32", shape=[None, 1], name="condition") finish_logit = gate * finish_logit1 + (1.0 - gate) * finish_logit2 like_logit = gate * like_logit1 + (1.0 - gate) * like_logit2 print(np.shape(like_logit)) finish_logit = tf.keras.layers.add( [linear_logit, finish_logit, exFM_logit]) like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit]) output_finish = PredictionLayer('binary', name='finish')(finish_logit) output_like = PredictionLayer('binary', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model
def DeepFM(feature_dim_dict, attention_feature_name=None, embedding_size=8, use_fm=True, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) deep_emb_list, linear_emb_list, dense_input_dict, inputs_list = \ preprocess_input_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, create_linear_weight=True, use_var_attention=( True if attention_feature_name else False), attention_feature_name=attention_feature_name) linear_logit = get_linear_logit(linear_emb_list, dense_input_dict, l2_reg_linear) fm_input = concat_fun(deep_emb_list, axis=1) deep_input = tf.keras.layers.Flatten()(fm_input) fm_out = FM()(fm_input) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input) deep_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(deep_out) if len(dnn_hidden_units) == 0 and use_fm == False: # only linear final_logit = linear_logit elif len(dnn_hidden_units) == 0 and use_fm == True: # linear + FM final_logit = tf.keras.layers.add([linear_logit, fm_out]) elif len(dnn_hidden_units) > 0 and use_fm == False: # linear + Deep final_logit = tf.keras.layers.add([linear_logit, deep_logit]) elif len(dnn_hidden_units) > 0 and use_fm == True: # linear + FM + Deep final_logit = tf.keras.layers.add([linear_logit, fm_out, deep_logit]) else: raise NotImplementedError output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
l2_reg=0.00001, init_std=0.0001, seed=1024) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) # print('test_model_input info') # print(len(test_model_input)) # print(type(test_model_input[0])) # print(len(test_model_input[0])) # print('num_features:',len(test_model_input[0]) ) # MMoE mmoe_layers = MMoE(units=16, num_experts=8, num_tasks=8)(dnn_input) print('passed') mmoe_cat_layer = concat_fun(mmoe_layers) mmoe_high_layers = MMoEdiffGate(units=16, num_experts=8, num_tasks=2)([mmoe_cat_layer, dnn_input]) output_layers = [] # Build tower layer from MMoE layer output_info = ['finish', 'like'] for index, task_layer in enumerate(mmoe_high_layers): tower_layer = tf.keras.layers.Dense(units=128, activation='relu')(task_layer) output_layer = tf.keras.layers.Dense(units=1, name=output_info[index], activation='sigmoid')(tower_layer) output_layers.append(output_layer)
def xDeepFM_MTL( feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=( 256, 256, ), cin_split_half=True, task_net_size=(128, ), l2_reg_linear=0.000001, l2_reg_embedding=0.000001, seed=1024, ): check_feature_config_dict(feature_dim_dict) # 未知 if len(task_net_size) < 1: raise ValueError('task_net_size must be at least one layer') deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, 0.0001, seed) # video_input = tf.keras.layers.Input((128,)) # inputs_list.append(video_input) fm_input = concat_fun(deep_emb_list, axis=1) # 模型输入 ''' 构建CIN,默认CIN的size为[256,256],激活函数为relu,输入为 (batch_size,field_size,embedding_size),输出为(batch_size,feature_num)。 如果split_half为True,那么隐藏层的feature map只有一半的会连接到输出单元。 ''' if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) # 全连接输出到Output_unit ''' Flatten将输入除了batch的维度,其他维度拉直,得到的输出为(batch_size, sum_size) 将embedding特征直接输入MLP ''' deep_input = tf.keras.layers.Flatten()(fm_input) deep_out = MLP(hidden_size)(deep_input) ''' 将deep_out过一个MLP,并全连接到finish的logits输出,同样的操作应用于like的logits输出 ''' finish_out = MLP(task_net_size)(deep_out) finish_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out) like_out = MLP(task_net_size)(deep_out) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) ''' 最终的finish的logit由linear_logit,finish_logit\like_logit和exFM_logit三者叠加。 ''' finish_logit = tf.keras.layers.add( [linear_logit, finish_logit, exFM_logit]) like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit]) ''' 将logit通过sigmoid转化为概率,通过输入和输出构建model ''' output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit) output_like = PredictionLayer('sigmoid', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model
def CapsuleNet(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=50, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', num_capsule=8, dim_capsule=2, routing_iterations=3, att_hidden_size=(64, 16), att_activation="dice", att_weight_normalization=True, att_embedding_size=1, att_head_num=8, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, alpha=1e-6, seed=1024, task='binary'): check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) scores = AttentionSequencePoolingLayer( att_hidden_units=att_hidden_size, att_activation=att_activation, weight_normalization=att_weight_normalization, return_score=True)([query_emb, keys_emb, user_behavior_length]) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=True, use_positional_encoding=True, seed=seed, supports_masking=False, blinding=True) keys_emb = Self_Attention( [keys_emb, keys_emb, user_behavior_length, user_behavior_length]) cap = Capsule(num_capsule=num_capsule, dim_capsule=dim_capsule, routings=routing_iterations, share_weights=True, supports_masking=True) hist_cap = cap(keys_emb, scores=scores) disp_loss = get_disp_loss(hist_cap) hist_cap = Reshape([1, num_capsule * dim_capsule])(NoMask()(hist_cap)) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Concatenate()([deep_input_emb, hist_cap]) deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb)) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) model.add_loss(alpha * disp_loss) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def BST(feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_embedding_size=1, att_head_num=8, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) # sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, # embeddings_initializer=RandomNormal( # mean=0.0, stddev=init_std, seed=seed), # embeddings_regularizer=l2( # l2_reg_embedding), # name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in # enumerate(feature_dim_dict["sparse"])} # print(sparse_embedding_dict) sparse_embedding_dict = {feat.name: Embedding(tf.cast(feat.dimension, tf.int32), embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"])} # deep_emb_list = get_embedding_vec_list( # deep_sparse_emb_dict, sparse_input_dict, feature_dim_dict['sparse']) query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) print("prev: {0}".format(keys_emb)) # hist_cap = Capsule( # num_capsule=8, dim_capsule=2, # routings=3, share_weights=True)(NoMask()(keys_emb)) # print("now: {0}".format(hist_cap)) # # exit(0) # # keys_emb = concat_fun(keys_emb_list) # hist_cap = Reshape([1, 16])(hist_cap) deep_input_emb = concat_fun(deep_input_emb_list) print("deep input emb: ", deep_input_emb) # print("hist_cap: ", hist_cap) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=True, seed=seed, supports_masking=False, blinding=True) # print("now: {0}".format(hist)) hists = [] for key_emb in keys_emb_list: hist = Self_Attention([key_emb, key_emb, user_behavior_length, user_behavior_length]) hists.append(hist) hist = concat_fun(hists) # Tensor("concatenate_2/concat:0", shape=(?, 50, 8), dtype=float32) # <tf.Tensor 'concatenate_3/concat:0' shape=(?, 4, 8) dtype=float32> deep_input_emb = Concatenate()([deep_input_emb, hist]) # print(deep_input_emb) deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb)) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def DSIN( feature_dim_dict, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', l2_reg_dnn=0, l2_reg_embedding=1e-6, task='binary', dnn_dropout=0, init_std=0.0001, seed=1024, encoding='bias', ): check_feature_config_dict(feature_dim_dict) print( 'sess_count', sess_max_count, 'encoding', encoding, ) sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input( feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], sess_feature_list, sess_feature_list) query_emb = concat_fun(query_emb_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], mask_feat_list=sess_feature_list) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Flatten()(NoMask()(deep_input_emb)) be_flag = True if encoding == 'bias' else False tr_input = sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, feature_dim_dict['sparse'], sess_feature_list, sess_max_count, bias_encoding=be_flag) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not be_flag), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( len(sess_feature_list) * embedding_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) deep_input_emb = Concatenate()([ deep_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed)(deep_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] #sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) #sess_input_length_list.append(user_behavior_length_dict[sess_name]) model_input_list = get_inputs_list( [sparse_input, dense_input]) + sess_input_list + [user_sess_length] model = Model(inputs=model_input_list, outputs=output) return model
def DIEN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in enumerate(feature_dim_dict["sparse"])} query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) if use_negsampling: neg_user_behavior_input = OrderedDict() for i, feat in enumerate(seq_feature_list): neg_user_behavior_input[feat] = Input(shape=(hist_len_max,), name='neg_seq_' + str(i) + '-' + feat) neg_uiseq_embed_list = get_embedding_vec_list(sparse_embedding_dict, neg_user_behavior_input, feature_dim_dict["sparse"], seq_feature_list, ) # [sparse_embedding_dict[feat]( # neg_user_behavior_input[feat]) for feat in seq_feature_list] neg_concat_behavior = concat_fun(neg_uiseq_embed_list) else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, embedding_size=embedding_size, att_hidden_size=att_hidden_units, att_activation=att_activation, att_weight_normalization=att_weight_normalization, ) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) if use_negsampling: model_input_list += list(neg_user_behavior_input.values()) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def DeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', att=False, seq_len=None, cate_feats=[], cate2nunique={}): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list, embedding_dict = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') fm_input = concat_fun(sparse_embedding_list, axis=1) fm_logit = FM()(fm_input) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) input_lstm = Input(shape=(seq_len, 1 + len(cate_feats)), name='lstm_input') input_lstm_gap = Lambda(lambda x: x[:, :, 0:1])(input_lstm) concate_list = [input_lstm_gap] for i, cate in enumerate(cate_feats): input_cate = Lambda(lambda x: x[:, :, i + 1])(input_lstm) emb = embedding_dict.get(cate) if emb is None: emb = Embedding(output_dim=8, input_dim=cate2nunique[cate]) concate_list.append(emb(input_cate)) input_lstm_concat = Concatenate(axis=-1)(concate_list) if att: lstm_out = LSTM(units=128, return_sequences=True)(input_lstm_concat) attention_mul = attention_3d_block(lstm_out, seq_len) lstm_out = Lambda(lambda x: K.sum(x, axis=1))(attention_mul) else: lstm_out = LSTM(units=128, return_sequences=False)(input_lstm_concat) dnn_input = concat_fun([dnn_input, lstm_out]) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_out) if len(dnn_hidden_units) == 0 and use_fm == False: # only linear final_logit = linear_logit elif len(dnn_hidden_units) == 0 and use_fm == True: # linear + FM final_logit = tf.keras.layers.add([linear_logit, fm_logit]) elif len(dnn_hidden_units) > 0 and use_fm == False: # linear + Deep final_logit = tf.keras.layers.add([linear_logit, dnn_logit]) elif len(dnn_hidden_units) > 0 and use_fm == True: # linear + FM + Deep final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit]) else: raise NotImplementedError output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list + [input_lstm], outputs=output) return model
def DSIN( feature_dim_dict, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, bias_encoding=False, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', dnn_dropout=0, dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, init_std=0.0001, seed=1024, task='binary', ): """Instantiates the Deep Session Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param sess_feature_list: list,to indicate session feature sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param sess_max_count: positive int, to indicate the max number of sessions :param sess_len_max: positive int, to indicate the max length of each session :param bias_encoding: bool. Whether use bias encoding or postional encoding :param att_embedding_size: positive int, the embedding size of each attention head :param att_head_num: positive int, the number of attention head :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) if (att_embedding_size * att_head_num != len(sess_feature_list) * embedding_size): raise ValueError( "len(session_feature_lsit) * embedding_size must equal to att_embedding_size * att_head_num ,got %d * %d != %d *%d" % (len(sess_feature_list), embedding_size, att_embedding_size, att_head_num)) sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input( feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], sess_feature_list, sess_feature_list) query_emb = concat_fun(query_emb_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], mask_feat_list=sess_feature_list) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Flatten()(NoMask()(deep_input_emb)) tr_input = sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, feature_dim_dict['sparse'], sess_feature_list, sess_max_count, bias_encoding=bias_encoding) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not bias_encoding), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( len(sess_feature_list) * embedding_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) deep_input_emb = Concatenate()([ deep_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] # sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) # sess_input_length_list.append(user_behavior_length_dict[sess_name]) model_input_list = get_inputs_list( [sparse_input, dense_input]) + sess_input_list + [user_sess_length] model = Model(inputs=model_input_list, outputs=output) return model
def xDeepFM_MTL( linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidden_units=(256, 256), cin_layer_size=( 256, 256, ), cin_split_half=True, init_std=0.0001, l2_reg_dnn=0, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_net_size=(128, ), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, seed=1024, ): # check_feature_config_dict(feature_dim_dict) if len(task_net_size) < 1: raise ValueError('task_net_size must be at least one layer') features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') fm_input = concat_fun(sparse_embedding_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, 0, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) like_out = DNN(task_net_size)(deep_out) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) like_logit = tf.keras.layers.add([linear_logit, like_logit, exFM_logit]) output_like = PredictionLayer('binary', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output_like) return model
def sess_interest_extractor(tr_input, sess_max_count, TR): tr_out = [] for i in range(sess_max_count): tr_out.append(TR([tr_input[i], tr_input[i]])) sess_fea = concat_fun(tr_out, axis=1) return sess_fea
def DIN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, dnn_use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], mask_feat_list=seq_feature_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) query_emb = concat_fun(query_emb_list) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist]) deep_input_emb = Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model = Model(inputs=model_input_list, outputs=output) return model
def DeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True, only_dnn=False, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ ## 为每个特征创建Input[1,]; feature == > {'feature1': Input[1,], ...} features = build_input_features(linear_feature_columns + dnn_feature_columns) ## [Input1, Input2, ... ] inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) ## [feature_1对应的embedding层,下连接对应feature1的Input[1,]层,...], [feature_1对应的Input[1,]层,...] linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') # linear_logit_finish = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, # seed=seed, prefix='linear_finish') # linear_logit_like = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, # seed=seed, prefix='linear_like') ## 线性变换层,没有激活函数 fm_input = concat_fun(sparse_embedding_list, axis=1) ## 稀疏embedding层concate在一起 fm_logit = FM()(fm_input) # fm_logit_finish = FM()(fm_input) # fm_logit_like = FM()(fm_input) ## FM的二次项部分输出,不包含一次项和bias dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) # dnn_out = Dense(128, dnn_activation, l2_reg_dnn, dnn_dropout, # dnn_use_bn, seed)(dnn_input) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) mmoe_out = MMoE(units=16, num_experts=8, num_tasks=2)(dnn_out) [finish_in, like_in] = mmoe_out finish_out_1 = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(finish_in) finish_out = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(finish_out_1) finish_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out) like_out_1 = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(like_in) like_out = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(like_out_1) # finish_logit_stop_grad = Lambda(lambda x: stop_gradient(x))(finish_out) # like_out_finish = concat_fun([like_out, finish_logit_stop_grad]) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_out) # if len(dnn_hidden_units) > 0 and only_dnn == True: # final_logit = dnn_logit # elif len(dnn_hidden_units) == 0 and use_fm == False: # only linear # final_logit = linear_logit # elif len(dnn_hidden_units) == 0 and use_fm == True: # linear + FM # final_logit = tf.keras.layers.add([linear_logit, fm_logit]) # elif len(dnn_hidden_units) > 0 and use_fm == False: # linear + Deep # final_logit = tf.keras.layers.add([linear_logit, dnn_logit]) # elif len(dnn_hidden_units) > 0 and use_fm == True: # linear + FM + Deep # final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit]) # else: # raise NotImplementedError finish_logit = tf.keras.layers.add([linear_logit, fm_logit, finish_logit]) like_logit = tf.keras.layers.add([linear_logit, fm_logit, like_logit]) output_finish = PredictionLayer('binary', name='finish')(finish_logit) output_like = PredictionLayer('binary', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model
def xDeepFM_MTL(feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=(256, 256,), cin_split_half=True, task_net_size=(128,), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, seed=1024, ): """ :param feature_dim_dict: 特征词典,包括特征名和特征列表 :param embedding_size: :param hidden_size: :param cin_layer_size: :param cin_split_half: :param task_net_size: 网络层数 :param l2_reg_linear: :param l2_reg_embedding: :param seed: :return: """ # 判断sparse 和dense feature结构是否正确 check_feature_config_dict(feature_dim_dict) if len(task_net_size) < 1: raise ValueError('task_net_size must be at least one layer') # Todo, add text sequence embedding deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, 0.0001, seed) # video_input = tf.keras.layers.Input((128,)) # inputs_list.append(video_input) # TODO, add other feature if 'txt' in feature_dim_dict: # txt_input = OrderedDict() for i, feat in enumerate(feature_dim_dict["txt"]): txt_input = tf.keras.layers.Input( shape=(feat.dimension,), name='txt_' + str(i) + '-' + feat.name) inputs_list.append(txt_input) fm_input = concat_fun(deep_emb_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input) exFM_logit = tf.keras.layers.Dense(1, activation=None, )(exFM_out) deep_input = tf.keras.layers.Flatten()(fm_input) deep_out = MLP(hidden_size)(deep_input) finish_out = MLP(task_net_size)(deep_out) finish_logit = tf.keras.layers.Dense( 1, use_bias=False, activation=None)(finish_out) like_out = MLP(task_net_size)(deep_out) like_logit = tf.keras.layers.Dense( 1, use_bias=False, activation=None)(like_out) finish_logit = tf.keras.layers.add( [linear_logit, finish_logit, exFM_logit]) like_logit = tf.keras.layers.add( [linear_logit, like_logit, exFM_logit]) output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit) output_like = PredictionLayer('sigmoid', name='like')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[ output_finish, output_like]) return model
def DeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True, use_only_dnn=False, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') fm_input = concat_fun(sparse_embedding_list, axis=1) fm_logit = FM()(fm_input) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_out) if use_only_dnn == True: final_logit = dnn_logit elif len(dnn_hidden_units) == 0 and use_fm == False: # only linear final_logit = linear_logit elif len(dnn_hidden_units) == 0 and use_fm == True: # linear + FM final_logit = tf.keras.layers.add([linear_logit, fm_logit]) elif len(dnn_hidden_units) > 0 and use_fm == False: # linear + Deep final_logit = tf.keras.layers.add([linear_logit, dnn_logit]) elif len(dnn_hidden_units) > 0 and use_fm == True: # linear + FM + Deep final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit]) else: raise NotImplementedError output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def xDeepFM(feature_dim_dict, embedding_size=8, seed=1024, init_std=0.0001, l2_reg_linear=0.00001, l2_reg_embedding=0.00001, cin_layer_size=(256, 256), cin_split_half=True, cin_activation='relu', hidden_size=(256, 256), activation='relu', keep_prob=1, use_bn=False, l2_reg_deep=0, final_activation='sigmoid', use_video=False, use_audio=False): check_feature_config_dict(feature_dim_dict) deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, True) if use_video: video_input = tf.keras.layers.Input(shape=(128, ), name='video') video_emb = tf.keras.layers.Dense( embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding))(video_input) video_emb = tf.keras.layers.Reshape( (1, embedding_size), input_shape=(embedding_size, ))(video_emb) deep_emb_list.append(video_emb) inputs_list.append(video_input) if use_audio: audio_input = tf.keras.layers.Input(shape=(128, ), name='audio') audio_emb = tf.keras.layers.Dense( embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding))(audio_input) audio_emb = tf.keras.layers.Reshape( (1, embedding_size), input_shape=(embedding_size, ))(audio_emb) deep_emb_list.append(audio_emb) inputs_list.append(audio_input) fm_input = concat_fun(deep_emb_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) deep_input = tf.keras.layers.Flatten()(fm_input) deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn, seed)(deep_input) deep_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(deep_out) final_logit = tf.keras.layers.add([linear_logit, deep_logit, exFM_logit]) output = PredictionLayer(final_activation, name='output')(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model