def DeepFM2(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', use_attention=True, attention_factor=8, l2_reg_att=1e-5, afm_dropout=0): """Instantiates the DeepFM Network architecture. :param afm_dropout: :param l2_reg_att: :param attention_factor: :param use_attention: :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features( linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) if use_attention: fm_logit = add_func([AFMLayer(attention_factor, l2_reg_att, afm_dropout, seed)(list(v)) for k, v in group_embedding_dict.items() if k in fm_group]) else: fm_logit = add_func([FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group]) # fm_logit = add_func([FM()(concat_func(v, axis=1)) # for k, v in group_embedding_dict.items() if k in fm_group]) dnn_input = combined_dnn_input(list(chain.from_iterable( group_embedding_dict.values())), dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def M(emb1, emb1_label, emb2, emb2_label, emb3, emb3_label, emb4, emb4_label, emb5, emb5_label, linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): #!################################################################################################################ feed_forward_size_trans_1 = 2048 max_seq_len_trans_1 = 40 model_dim_trans_1 = 128 input_trans_1 = Input(shape=(max_seq_len_trans_1, ), name='input_trans_1_layer') input_trans_1_label = Input(shape=(max_seq_len_trans_1, ), name='input_trans_1_label_layer') x = Embedding(input_dim=5307 + 1, output_dim=128, weights=[emb1], trainable=False, input_length=40, mask_zero=True)(input_trans_1) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb1_label], trainable=False, input_length=40, mask_zero=True)(input_trans_1_label) encodings = PositionEncoding(model_dim_trans_1)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_1, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_1, feed_forward_size_trans_1) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_1 = Dense(5, activation='softmax', name='output_trans_1_layer')(encodings) #!################################################################################################################ feed_forward_size_trans_2 = 2048 max_seq_len_trans_2 = 40 model_dim_trans_2 = 128 input_trans_2 = Input(shape=(max_seq_len_trans_2, ), name='input_trans_2_layer') input_trans_2_label = Input(shape=(max_seq_len_trans_2, ), name='input_trans_2_label_layer') x = Embedding(input_dim=101 + 1, output_dim=128, weights=[emb2], trainable=False, input_length=40, mask_zero=True)(input_trans_2) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb2_label], trainable=False, input_length=40, mask_zero=True)(input_trans_2_label) encodings = PositionEncoding(model_dim_trans_2)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_2, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_2, feed_forward_size_trans_2) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_2 = Dense(5, activation='softmax', name='output_trans_2_layer')(encodings) #!################################################################################################################ feed_forward_size_trans_3 = 2048 max_seq_len_trans_3 = 40 model_dim_trans_3 = 128 input_trans_3 = Input(shape=(max_seq_len_trans_3, ), name='input_trans_3_layer') input_trans_3_label = Input(shape=(max_seq_len_trans_3, ), name='input_trans_3_label_layer') x = Embedding(input_dim=8 + 1, output_dim=128, weights=[emb3], trainable=False, input_length=40, mask_zero=True)(input_trans_3) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb3_label], trainable=False, input_length=40, mask_zero=True)(input_trans_3_label) encodings = PositionEncoding(model_dim_trans_3)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_3, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_3, feed_forward_size_trans_3) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_3 = Dense(5, activation='softmax', name='output_trans_3_layer')(encodings) #!################################################################################################################ feed_forward_size_trans_4 = 2048 max_seq_len_trans_4 = 40 model_dim_trans_4 = 128 input_trans_4 = Input(shape=(max_seq_len_trans_4, ), name='input_trans_4_layer') input_trans_4_label = Input(shape=(max_seq_len_trans_4, ), name='input_trans_4_label_layer') x = Embedding(input_dim=38 + 1, output_dim=128, weights=[emb4], trainable=False, input_length=40, mask_zero=True)(input_trans_4) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb4_label], trainable=False, input_length=40, mask_zero=True)(input_trans_4_label) encodings = PositionEncoding(model_dim_trans_4)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_4, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_4, feed_forward_size_trans_4) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_4 = Dense(5, activation='softmax', name='output_trans_4_layer')(encodings) #!################################################################################################################ feed_forward_size_trans_5 = 2048 max_seq_len_trans_5 = 40 model_dim_trans_5 = 128 input_trans_5 = Input(shape=(max_seq_len_trans_5, ), name='input_trans_5_layer') input_trans_5_label = Input(shape=(max_seq_len_trans_5, ), name='input_trans_5_label_layer') x = Embedding(input_dim=4317 + 1, output_dim=128, weights=[emb5], trainable=False, input_length=40, mask_zero=True)(input_trans_5) x_label = Embedding(input_dim=2 + 1, output_dim=128, weights=[emb5_label], trainable=False, input_length=40, mask_zero=True)(input_trans_5_label) encodings = PositionEncoding(model_dim_trans_5)(x) encodings = Add()([x, encodings]) encodings = Add()([x_label, encodings]) # encodings = x masks = tf.equal(input_trans_5, 0) # (bs, 100, 128*2) attention_out = MultiHeadAttention( 4, 32)([encodings, encodings, encodings, masks]) # Add & Norm attention_out += encodings attention_out = LayerNormalization()(attention_out) # Feed-Forward ff = PositionWiseFeedForward(model_dim_trans_5, feed_forward_size_trans_5) ff_out = ff(attention_out) # Add & Norm ff_out += attention_out encodings = LayerNormalization()(ff_out) encodings = GlobalMaxPooling1D()(encodings) encodings = Dropout(0.2)(encodings) output_trans_5 = Dense(5, activation='softmax', name='output_trans_5_layer')(encodings) #!################################################################################################################ trans_output = concatenate([output_trans_1, output_trans_2], axis=-1) trans_output = concatenate([trans_output, output_trans_3], axis=-1) trans_output = concatenate([trans_output, output_trans_4], axis=-1) trans_output = concatenate([trans_output, output_trans_5], axis=-1) # trans_output = Dense(2, activation='softmax', name='output_trans')(trans_output) #!################################################################################################################ #!mix2 features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) mix = concatenate([trans_output, dnn_input], axis=-1) #!#mix dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(mix) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) #!################################################################################################################ model = Model(inputs=[ input_trans_1, input_trans_1_label, input_trans_2, input_trans_2_label, input_trans_3, input_trans_3_label, input_trans_4, input_trans_4_label, input_trans_5, input_trans_5_label, features ], outputs=[output]) model.compile(optimizer=optimizers.Adam(2.5e-4), loss={'prediction_layer': losses.binary_crossentropy}, metrics=['AUC']) return model
def create_model(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): K.clear_session() #!################################################################################################################ inputs_all = [ get_input_feature_layer(name='slotid_nettype', feature_shape=dense_feature_size) ] # slotid_nettype layer_slotid_nettype = inputs_all[0] layer_slotid_nettype = K.expand_dims(layer_slotid_nettype, 1) #!################################################################################################################ # seq_inputs_dict = get_cross_seq_input_layers(cols=cross_arr_name_list) # inputs_all = inputs_all + list(seq_inputs_dict.values()) # 输入层list 做交叉 # cross_emb_out = [] # last_col = '' # for index, col in enumerate(cross_arr_name_list): # # print(col, 'get embedding!') # emb_layer = get_emb_layer( # col, trainable=False, emb_matrix=dict_cross_emb_all[col]) # x = emb_layer(inputs_all[1+index]) # if col.split('_')[-1] == 'i': # cross_user_item_i = x # last_col = col # continue # else: # print(f'crossing net add {last_col} and {col}') # cross_emb_out.append( # cross_net(cross_user_item_i, x, layer_slotid_nettype, hidden_unit=4)) # cross_emb_out = tf.keras.layers.concatenate(cross_emb_out) # cross_emb_out = tf.squeeze(cross_emb_out, [1]) #!################################################################################################################ # seq_inputs_dict = get_seq_input_layers(cols=arr_name_list) # inputs_all = inputs_all+list(seq_inputs_dict.values()) # 输入层list # masks = tf.equal(seq_inputs_dict['task_id'], 0) # # 普通序列+label序列 # layers2concat = [] # for index, col in enumerate(arr_name_list): # print(col, 'get embedding!') # emb_layer = get_emb_layer( # col, trainable=TRAINABLE_DICT[col], emb_matrix=id_list_dict_emb_all[col][1]) # x = emb_layer(seq_inputs_dict[col]) # if conv1d_info_dict[col] > -1: # cov_layer = tf.keras.layers.Conv1D(filters=conv1d_info_dict[col], # kernel_size=1, # activation='relu') # x = cov_layer(x) # layers2concat.append(x) # x = tf.keras.layers.concatenate(layers2concat) #!################################################################################################################ #!mix1 # x = trans_net(x, masks, hidden_unit=256) # max_pool = tf.keras.layers.GlobalMaxPooling1D() # average_pool = tf.keras.layers.GlobalAveragePooling1D() # xmaxpool = max_pool(x) # xmeanpool = average_pool(x) # trans_output = tf.keras.layers.concatenate([xmaxpool, xmeanpool]) #!################################################################################################################ #!mix2 features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) # mix = concatenate([cross_emb_out, trans_output, # dnn_input], axis=-1) # !#mix mix = dnn_input dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(mix) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) #!################################################################################################################ # model = Model(inputs=inputs_all+[features], model = Model(inputs=inputs_list, outputs=[output]) print(model.summary()) return model
def DeepFM(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', use_image=False, use_text=False, embedding_size=128): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ train_path = '../data/underexpose_train' features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, init_std, seed, support_group=True) if use_image: video_input = tf.keras.layers.Input(shape=(128, ), name='image') video_emb = tf.keras.layers.Dense( embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding))(video_input) video_emb = tf.keras.layers.Reshape( (1, embedding_size), input_shape=(embedding_size, ))(video_emb) group_embedding_dict[DEFAULT_GROUP_NAME].append(video_emb) inputs_list.append(video_input) if use_text: audio_input = tf.keras.layers.Input(shape=(128, ), name='text') audio_emb = tf.keras.layers.Dense( embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding))(audio_input) audio_emb = tf.keras.layers.Reshape( (1, embedding_size), input_shape=(embedding_size, ))(audio_emb) group_embedding_dict[DEFAULT_GROUP_NAME].append(audio_emb) inputs_list.append(audio_input) linear_logit = get_linear_logit(features, linear_feature_columns, init_std=init_std, seed=seed, prefix='linear', l2_reg=l2_reg_linear) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def create_model(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): K.clear_session() #!################################################################################################################ inputs_all = [ # get_input_feature_layer(name = 'user_0',feature_shape = dense_feature_size), # get_input_feature_layer(name = 'item_0',feature_shape = dense_feature_size), get_input_feature_layer(name='user_1', feature_shape=dense_feature_size), get_input_feature_layer(name='item_1', feature_shape=dense_feature_size) ] # slotid_nettype # layer_user_0 = inputs_all[0] # layer_user_0 = K.expand_dims(layer_user_0, 1) # layer_item_0 = inputs_all[1] # layer_item_0 = K.expand_dims(layer_item_0, 1) layer_user_1 = inputs_all[0] layer_user_1 = K.expand_dims(layer_user_1, 1) layer_item_1 = inputs_all[1] layer_item_1 = K.expand_dims(layer_item_1, 1) # cross_emb_out0 = cross_net(layer_user_0,layer_item_0) cross_emb_out1 = cross_net(layer_user_1, layer_item_1) # cross_emb_out = tf.keras.layers.concatenate([cross_emb_out0,cross_emb_out1]) cross_emb_out = tf.squeeze(cross_emb_out1, [1]) #!################################################################################################################ seq_inputs_dict = get_seq_input_layers(cols=arr_name_list) inputs_all = inputs_all + list(seq_inputs_dict.values()) # 输入层list masks = tf.equal(seq_inputs_dict['task_id'], 0) # 普通序列+label序列 layers2concat = [] for index, col in enumerate(arr_name_list): print(col, 'get embedding!') emb_layer = get_emb_layer(col, trainable=TRAINABLE_DICT[col], emb_matrix=id_list_dict_emb_all[col][1]) x = emb_layer(seq_inputs_dict[col]) if conv1d_info_dict[col] > -1: cov_layer = tf.keras.layers.Conv1D(filters=conv1d_info_dict[col], kernel_size=1, activation='relu') x = cov_layer(x) layers2concat.append(x) x = keras.layers.concatenate(layers2concat) #!################################################################################################################ #!mix1 x = trans_net(x, masks, hidden_unit=256) max_pool = tf.keras.layers.GlobalMaxPooling1D() average_pool = tf.keras.layers.GlobalAveragePooling1D() xmaxpool = max_pool(x) xmeanpool = average_pool(x) trans_output = tf.keras.layers.concatenate([xmaxpool, xmeanpool]) #!################################################################################################################ #!mix2 features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) mix = concatenate([cross_emb_out, trans_output, dnn_input], axis=-1) # !#mix dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(mix) dnn_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) #!################################################################################################################ model = Model(inputs=inputs_all + [features], outputs=[output]) print(model.summary()) return model
def DeepAutoInt( linear_feature_columns, dnn_feature_columns, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, dnn_hidden_units=(256, 256), dnn_activation='relu', l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_use_bn=False, dnn_dropout=0, seed=1024, fm_group=[DEFAULT_GROUP_NAME], task='binary', ): """Instantiates the AutoInt Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param att_layer_num: int.The InteractingLayer number to be used. :param att_embedding_size: int.The embedding size in multi-head self-attention network. :param att_head_num: int.The head number in multi-head self-attention network. :param att_res: bool.Whether or not use standard residual connections before output. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if len(dnn_hidden_units) <= 0 and att_layer_num <= 0: raise ValueError("Either hidden_layer or att_layer_num must > 0") features = build_input_features(dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) sparse_embedding_list = list( chain.from_iterable(group_embedding_dict.values())) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) # sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, # l2_reg_embedding, seed) att_input = concat_func(sparse_embedding_list, axis=1) for _ in range(att_layer_num): att_input = InteractingLayer(att_embedding_size, att_head_num, att_res)(att_input) att_output = tf.keras.layers.Flatten()(att_input) dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) if len(dnn_hidden_units ) > 0 and att_layer_num > 0: # Deep & Interacting Layer deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) stack_out = tf.keras.layers.Concatenate()([att_output, deep_out]) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( stack_out) elif len(dnn_hidden_units) > 0: # Only Deep deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( deep_out) elif att_layer_num > 0: # Only Interacting Layer final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( att_output) else: # Error raise NotImplementedError # final_logit = tf.keras.layers.Dense( # 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(att_output) final_logit = add_func([linear_logit, fm_logit, final_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model