def CCPM(feature_dim_dict, embedding_size=8, conv_kernel_width=(6, 5), conv_filters=(4, 4), dnn_hidden_units=(256,), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary', ): """Instantiates the Convolutional Click Prediction Model architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param conv_kernel_width: list,list of positive integer or empty list,the width of filter in each conv layer. :param conv_filters: list,list of positive integer or empty list,the number of filters in each conv layer. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN. :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) if len(conv_kernel_width) != len(conv_filters): raise ValueError( "conv_kernel_width must have same element with conv_filters") deep_emb_list, linear_emb_list, dense_input_dict, inputs_list = preprocess_input_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, create_linear_weight=True) linear_logit = get_linear_logit( linear_emb_list, dense_input_dict, l2_reg_linear) n = len(deep_emb_list) l = len(conv_filters) conv_input = concat_fun(deep_emb_list, axis=1) pooling_result = tf.keras.layers.Lambda( lambda x: tf.expand_dims(x, axis=3))(conv_input) for i in range(1, l + 1): filters = conv_filters[i - 1] width = conv_kernel_width[i - 1] k = max(1, int((1 - pow(i / l, l - i)) * n)) if i < l else 3 conv_result = tf.keras.layers.Conv2D(filters=filters, kernel_size=(width, 1), strides=(1, 1), padding='same', activation='tanh', use_bias=True, )(pooling_result) pooling_result = KMaxPooling( k=min(k, conv_result.shape[1].value), axis=1)(conv_result) flatten_result = tf.keras.layers.Flatten()(pooling_result) final_logit = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(flatten_result) final_logit = tf.keras.layers.Dense(1, use_bias=False)(final_logit) final_logit = tf.keras.layers.add([final_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def AFM( feature_dim_dict, embedding_size=8, use_attention=True, attention_factor=8, l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, init_std=0.0001, seed=1024, task='binary', ): """Instantiates the Attentonal Factorization Machine architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine** :param attention_factor: positive integer,units in attention net :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_att: float. L2 regularizer strength applied to attention net :param afm_dropout: float in [0,1), Fraction of the attention net output units to dropout. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) deep_emb_list, linear_emb_list, dense_input_dict, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, create_linear_weight=True) linear_logit = get_linear_logit(linear_emb_list, dense_input_dict, l2_reg_linear) fm_input = concat_fun(deep_emb_list, axis=1) if use_attention: fm_logit = AFMLayer(attention_factor, l2_reg_att, afm_dropout, seed)(deep_emb_list, ) else: fm_logit = FM()(fm_input) final_logit = tf.keras.layers.add([linear_logit, fm_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def FNN( feature_dim_dict, embedding_size=8, dnn_hidden_units=(128, 128), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', task='binary', ): """Instantiates the Factorization-supported Neural Network architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear weight :param l2_reg_dnn: float . L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) deep_emb_list, linear_emb_list, dense_input_dict, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, create_linear_weight=True) linear_logit = get_linear_logit(linear_emb_list, dense_input_dict, l2_reg_linear) deep_input = tf.keras.layers.Flatten()(concat_fun(deep_emb_list)) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed)(deep_input) deep_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(deep_out) final_logit = tf.keras.layers.add([deep_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def DeepFM(feature_dim_dict, embedding_size=8, use_fm=True, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) deep_emb_list, linear_emb_list, dense_input_dict, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, create_linear_weight=True) linear_logit = get_linear_logit(linear_emb_list, dense_input_dict, l2_reg_linear) # 各种输入线性转化并相加的结果 fm_input = concat_fun(deep_emb_list, axis=1) # 各种特征进行embedding之后再拼接 (?,X,embedding_size) deep_input = tf.keras.layers.Flatten()(fm_input) # (?,X*embedding_size) fm_out = FM()(fm_input) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input) deep_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(deep_out) if len(dnn_hidden_units) == 0 and use_fm == False: # only linear final_logit = linear_logit elif len(dnn_hidden_units) == 0 and use_fm == True: # linear + FM final_logit = tf.keras.layers.add([linear_logit, fm_out]) elif len(dnn_hidden_units) > 0 and use_fm == False: # linear + Deep final_logit = tf.keras.layers.add([linear_logit, deep_logit]) elif len(dnn_hidden_units) > 0 and use_fm == True: # linear + FM + Deep final_logit = tf.keras.layers.add([linear_logit, fm_out, deep_logit]) else: raise NotImplementedError output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def DCN( feature_dim_dict, embedding_size='auto', cross_num=2, dnn_hidden_units=( 128, 128, ), l2_reg_embedding=1e-5, l2_reg_cross=1e-5, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_use_bn=False, dnn_activation='relu', task='binary', ): """Instantiates the Deep&Cross Network architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive int or str,sparse feature embedding_size.If set to "auto",it will be 6*pow(cardinality,025) :param cross_num: positive integet,cross layer number :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_cross: float. L2 regularizer strength applied to cross net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not DNN :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if len(dnn_hidden_units) == 0 and cross_num == 0: raise ValueError("Either hidden_layer or cross layer must > 0") check_feature_config_dict(feature_dim_dict) deep_emb_list, _, _, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, 0, init_std, seed, create_linear_weight=False) deep_input = tf.keras.layers.Flatten()(concat_fun(deep_emb_list)) if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input) cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(deep_input) stack_out = tf.keras.layers.Concatenate()([cross_out, deep_out]) final_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(stack_out) elif len(dnn_hidden_units) > 0: # Only Deep deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input) final_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(deep_out) elif cross_num > 0: # Only Cross cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(deep_input) final_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(cross_out) else: # Error raise NotImplementedError output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def xDeepFM( feature_dim_dict, embedding_size=8, dnn_hidden_units=(256, 256), cin_layer_size=( 128, 128, ), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, l2_reg_cin=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', ): """Instantiates the xDeepFM architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network :param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit :param cin_activation: activation function used on feature maps :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: L2 regularizer strength applied to embedding vector :param l2_reg_dnn: L2 regularizer strength applied to deep net :param l2_reg_cin: L2 regularizer strength applied to CIN. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) deep_emb_list, linear_emb_list, dense_input_dict, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed, create_linear_weight=True) linear_logit = get_linear_logit(linear_emb_list, dense_input_dict, l2_reg_linear) fm_input = concat_fun(deep_emb_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half, l2_reg_cin, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) deep_input = tf.keras.layers.Flatten()(fm_input) deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input) deep_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(deep_out) if len(dnn_hidden_units) == 0 and len(cin_layer_size) == 0: # only linear final_logit = linear_logit elif len( dnn_hidden_units) == 0 and len(cin_layer_size) > 0: # linear + CIN final_logit = tf.keras.layers.add([linear_logit, exFM_logit]) elif len(dnn_hidden_units) > 0 and len( cin_layer_size) == 0: # linear + Deep final_logit = tf.keras.layers.add([linear_logit, deep_logit]) elif len(dnn_hidden_units) > 0 and len( cin_layer_size) > 0: # linear + CIN + Deep final_logit = tf.keras.layers.add( [linear_logit, deep_logit, exFM_logit]) else: raise NotImplementedError output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def NFFM( feature_dim_dict, embedding_size=4, dnn_hidden_units=(128, 128), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, dnn_dropout=0, init_std=0.0001, seed=1024, include_linear=True, use_bn=True, reduce_sum=False, task='binary', ): """Instantiates the Field-aware Neural Factorization Machine architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear part. :param l2_reg_dnn: float . L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param include_linear: bool,whether include linear term or not :param use_bn: bool,whether use bn after ffm out or not :param reduce_sum: bool,whether apply reduce_sum on cross vector :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) if 'sequence' in feature_dim_dict and len( feature_dim_dict['sequence']) > 0: raise ValueError("now sequence input is not supported in NFFM" ) #TODO:support sequence input sparse_input_dict, dense_input_dict = create_singlefeat_inputdict( feature_dim_dict) sparse_embedding, dense_embedding, linear_embedding = create_embedding_dict( feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear, ) embed_list = [] for i, j in itertools.combinations(feature_dim_dict['sparse'], 2): i_input = sparse_input_dict[i.name] if i.hash_flag: i_input = Hash(i.dimension)(i_input) j_input = sparse_input_dict[j.name] if j.hash_flag: j_input = Hash(j.dimension)(j_input) element_wise_prod = multiply([ sparse_embedding[i.name][j.name](i_input), sparse_embedding[j.name][i.name](j_input) ]) if reduce_sum: element_wise_prod = Lambda(lambda element_wise_prod: K.sum( element_wise_prod, axis=-1))(element_wise_prod) embed_list.append(element_wise_prod) for i, j in itertools.combinations(feature_dim_dict['dense'], 2): element_wise_prod = multiply([ dense_embedding[i.name][j.name](dense_input_dict[i.name]), dense_embedding[j.name][i.name](dense_input_dict[j.name]) ]) if reduce_sum: element_wise_prod = Lambda(lambda element_wise_prod: K.sum( element_wise_prod, axis=-1))(element_wise_prod) embed_list.append( Lambda(lambda x: K.expand_dims(x, axis=1))(element_wise_prod)) for i in feature_dim_dict['sparse']: i_input = sparse_input_dict[i.name] if i.hash_flag: i_input = Hash(i.dimension)(i_input) for j in feature_dim_dict['dense']: element_wise_prod = multiply([ sparse_embedding[i.name][j.name](i_input), dense_embedding[j.name][i.name](dense_input_dict[j.name]) ]) if reduce_sum: element_wise_prod = Lambda(lambda element_wise_prod: K.sum( element_wise_prod, axis=-1))(element_wise_prod) embed_list.append(element_wise_prod) ffm_out = tf.keras.layers.Flatten()(concat_fun(embed_list, axis=1)) if use_bn: ffm_out = tf.keras.layers.BatchNormalization()(ffm_out) ffm_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(ffm_out) final_logit = Dense(1, use_bias=False)(ffm_out) linear_emb_list = get_embedding_vec_list(linear_embedding, sparse_input_dict, feature_dim_dict['sparse']) linear_logit = get_linear_logit(linear_emb_list, dense_input_dict, l2_reg_linear) if include_linear: final_logit = add([final_logit, linear_logit]) output = PredictionLayer(task)(final_logit) inputs_list = get_inputs_list([sparse_input_dict, dense_input_dict]) model = Model(inputs=inputs_list, outputs=output) return model
def PNN(feature_dim_dict, embedding_size=8, dnn_hidden_units=(128, 128), l2_reg_embedding=1e-5, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False, kernel_type='mat', task='binary'): """Instantiates the Product-based Neural Network architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param use_inner: bool,whether use inner-product or not. :param use_outter: bool,whether use outter-product or not. :param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'`` :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras whole_model instance. """ check_feature_config_dict(feature_dim_dict) if kernel_type not in ['mat', 'vec', 'num']: raise ValueError("kernel_type must be mat,vec or num") deep_emb_list, _, _, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, 0, init_std, seed, create_linear_weight=False) inner_product = tf.keras.layers.Flatten()( InnerProductLayer()(deep_emb_list)) outter_product = OutterProductLayer(kernel_type)(deep_emb_list) # ipnn deep input linear_signal = tf.keras.layers.Reshape( [len(deep_emb_list) * embedding_size])(concat_fun(deep_emb_list)) if use_inner and use_outter: deep_input = tf.keras.layers.Concatenate()( [linear_signal, inner_product, outter_product]) elif use_inner: deep_input = tf.keras.layers.Concatenate()( [linear_signal, inner_product]) elif use_outter: deep_input = tf.keras.layers.Concatenate()( [linear_signal, outter_product]) else: deep_input = linear_signal deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed)(deep_input) deep_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(deep_out) output = PredictionLayer(task)(deep_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def DIN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, dnn_use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras whole_model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], mask_feat_list=seq_feature_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) query_emb = concat_fun(query_emb_list) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist]) deep_input_emb = Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model = Model(inputs=model_input_list, outputs=output) return model
def AutoInt( feature_dim_dict, embedding_size=8, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, dnn_hidden_units=(256, 256), dnn_activation='relu', l2_reg_dnn=0, l2_reg_embedding=1e-5, dnn_use_bn=False, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary', ): """Instantiates the AutoInt Network architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param att_layer_num: int.The InteractingLayer number to be used. :param att_embedding_size: int.The embedding size in multi-head self-attention network. :param att_head_num: int.The head number in multi-head self-attention network. :param att_res: bool.Whether or not use standard residual connections before output. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras whole_model instance. """ if len(dnn_hidden_units) <= 0 and att_layer_num <= 0: raise ValueError("Either hidden_layer or att_layer_num must > 0") check_feature_config_dict(feature_dim_dict) deep_emb_list, _, _, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, 0, init_std, seed, create_linear_weight=False) att_input = concat_fun(deep_emb_list, axis=1) for _ in range(att_layer_num): att_input = InteractingLayer(att_embedding_size, att_head_num, att_res)(att_input) att_output = tf.keras.layers.Flatten()(att_input) deep_input = tf.keras.layers.Flatten()(concat_fun(deep_emb_list)) if len(dnn_hidden_units ) > 0 and att_layer_num > 0: # Deep & Interacting Layer deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input) stack_out = tf.keras.layers.Concatenate()([att_output, deep_out]) final_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(stack_out) elif len(dnn_hidden_units) > 0: # Only Deep deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input) final_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(deep_out) elif att_layer_num > 0: # Only Interacting Layer final_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(att_output) else: # Error raise NotImplementedError output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model
def DIEN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True, l2_reg_dnn=0, l2_reg_embedding=1e-5, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in enumerate(feature_dim_dict["sparse"])} query_emb_list = get_embedding_vec_list(sparse_embedding_dict,sparse_input,feature_dim_dict["sparse"],return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict,user_behavior_input,feature_dim_dict['sparse'],return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) if use_negsampling: neg_user_behavior_input = OrderedDict() for i, feat in enumerate(seq_feature_list): neg_user_behavior_input[feat] = Input(shape=(hist_len_max,), name='neg_seq_' + str(i) + '-' + feat) neg_uiseq_embed_list = get_embedding_vec_list(sparse_embedding_dict,neg_user_behavior_input,feature_dim_dict["sparse"],seq_feature_list,) # [sparse_embedding_dict[feat]( # neg_user_behavior_input[feat]) for feat in seq_feature_list] neg_concat_behavior = concat_fun(neg_uiseq_embed_list) else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, embedding_size=embedding_size, att_hidden_size=att_hidden_units, att_activation=att_activation, att_weight_normalization=att_weight_normalization, ) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) if use_negsampling: model_input_list += list(neg_user_behavior_input.values()) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def FGCNN( feature_dim_dict, embedding_size=8, conv_kernel_width=(7, 7, 7, 7), conv_filters=(14, 16, 18, 20), new_maps=(3, 3, 3, 3), pooling_width=(2, 2, 2, 2), dnn_hidden_units=(128, ), l2_reg_embedding=1e-5, l2_reg_dnn=0, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary', ): """Instantiates the Feature Generation by Convolutional Neural Network architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param conv_kernel_width: list,list of positive integer or empty list,the width of filter in each conv layer. :param conv_filters: list,list of positive integer or empty list,the number of filters in each conv layer. :param new_maps: list, list of positive integer or empty list, the feature maps of generated features. :param pooling_width: list, list of positive integer or empty list,the width of pooling layer. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net. :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras whole_model instance. """ check_feature_config_dict(feature_dim_dict) if not (len(conv_kernel_width) == len(conv_filters) == len(new_maps) == len(pooling_width)): raise ValueError( "conv_kernel_width,conv_filters,new_maps and pooling_width must have same length" ) deep_emb_list, fg_deep_emb_list, _, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, 0, init_std, seed, False) fg_input = concat_fun(fg_deep_emb_list, axis=1) origin_input = concat_fun(deep_emb_list, axis=1) if len(conv_filters) > 0: new_features = FGCNNLayer(conv_filters, conv_kernel_width, new_maps, pooling_width)(fg_input) combined_input = concat_fun([origin_input, new_features], axis=1) else: combined_input = origin_input inner_product = tf.keras.layers.Flatten()(InnerProductLayer()( tf.keras.layers.Lambda(unstack, mask=[None] * combined_input.shape[1].value)(combined_input))) linear_signal = tf.keras.layers.Flatten()(combined_input) dnn_input = tf.keras.layers.Concatenate()([linear_signal, inner_product]) dnn_input = tf.keras.layers.Flatten()(dnn_input) final_logit = DNN(dnn_hidden_units, dropout_rate=dnn_dropout, l2_reg=l2_reg_dnn)(dnn_input) final_logit = tf.keras.layers.Dense(1, use_bias=False)(final_logit) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model