def get_linear_logit(features, linear_feature_columns, units=1, use_bias=False, seed=1024, prefix='linear', l2_reg=0): features = features linear_feature_columns = linear_feature_columns units = 1 use_bias = False seed = 1024 prefix = 'linear' l2_reg = 0 for i in range(len(linear_feature_columns)): if linear_feature_columns[i]['feat_cat'] == 'sparse': linear_feature_columns[i]['embedding_dim'] = 3 linear_feature_columns[i]['embeddings_initializer'] = Zeros() linear_emb_list = [ input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix + str(i))[0] for i in range(units) ] _, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix) linear_logit_list = [] for i in range(units): if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0: sparse_input = concat_func(linear_emb_list[i]) dense_input = concat_func(dense_input_list) linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input]) elif len(linear_emb_list[i]) > 0: sparse_input = concat_func(linear_emb_list[i]) linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input) elif len(dense_input_list) > 0: dense_input = concat_func(dense_input_list) linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input) else: # raise NotImplementedError return add_func([]) linear_logit_list.append(linear_logit) return concat_func(linear_logit_list)
def get_linear_logit(features, feature_columns, units=1, use_bias=False, seed=1024, prefix='linear', l2_reg=0): linear_feature_columns = copy(feature_columns) for i in range(len(linear_feature_columns)): if isinstance(linear_feature_columns[i], SparseFeat): linear_feature_columns[i] = linear_feature_columns[i]._replace( embedding_dim=1, embeddings_initializer=Zeros()) if isinstance(linear_feature_columns[i], VarLenSparseFeat): linear_feature_columns[i] = linear_feature_columns[i]._replace( sparsefeat=linear_feature_columns[i].sparsefeat._replace( embedding_dim=1, embeddings_initializer=Zeros())) linear_emb_list = [ input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix + str(i))[0] for i in range(units) ] _, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix) linear_logit_list = [] for i in range(units): if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0: sparse_input = concat_func(linear_emb_list[i]) dense_input = concat_func(dense_input_list) linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input]) elif len(linear_emb_list[i]) > 0: sparse_input = concat_func(linear_emb_list[i]) linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input) elif len(dense_input_list) > 0: dense_input = concat_func(dense_input_list) linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input) else: # raise NotImplementedError return add_func([]) linear_logit_list.append(linear_logit) return concat_func(linear_logit_list)
######################################################################################################### print('group_embedding_dict',group_embedding_dict) print('dense_value_list',dense_value_list) cc=[] for k, v in group_embedding_dict.items(): cc.append(v) cc1=concat_func(cc[0], axis=1) cc2=FM()(cc1) # cc=[FM()(concat_func(v, axis=1)) # for k, v in group_embedding_dict.items() if k in fm_group] fm_logit = add_func([cc2]) dnn_input = combined_dnn_input(list(chain.from_iterable( group_embedding_dict.values())), dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
def DeepFM(linear_feature_columns, dnn_feature_columns, fm_group=[DEFAULT_GROUP_NAME], dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param fm_group: list, group_name of features that will be used to do feature interactions. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ #构建模型的输入张量 features = build_input_features(linear_feature_columns + dnn_feature_columns) print("#" * 10) print(features) inputs_list = list(features.values()) # 构建线性张量 linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) group_embedding_dict, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed, support_group=True) print('group_embedding_dict', group_embedding_dict) print('dense_value_list', dense_value_list) fm_logit = add_func([ FM()(concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group ]) dnn_input = combined_dnn_input( list(chain.from_iterable(group_embedding_dict.values())), dense_value_list) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) dnn_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal( seed=seed))(dnn_output) final_logit = add_func([linear_logit, fm_logit, dnn_logit]) output = PredictionLayer(task)(final_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output) return model