def SDM(user_feature_columns, item_feature_columns, history_feature_list, num_sampled=5, units=64, rnn_layers=2, dropout_rate=0.2, rnn_num_res=1, num_head=4, l2_reg_embedding=1e-6, dnn_activation='tanh', init_std=0.0001, seed=1024): """Instantiates the Sequential Deep Matching Model architecture. :param user_feature_columns: An iterable containing user's features used by the model. :param item_feature_columns: An iterable containing item's features used by the model. :param history_feature_list: list,to indicate short and prefer sequence sparse field :param num_sampled: int, the number of classes to randomly sample per batch. :param units: int, dimension for each output layer :param rnn_layers: int, layer number of rnn :param dropout_rate: float in [0,1), the probability we will drop out a given DNN coordinate. :param rnn_num_res: int. The number of residual layers in rnn layers :param num_head: int int, the number of attention head :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_activation: Activation function to use in deep net :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ if len(item_feature_columns) > 1: raise ValueError("Now MIND only support 1 item feature like item_id") item_feature_column = item_feature_columns[0] item_feature_name = item_feature_column.name item_vocabulary_size = item_feature_columns[0].vocabulary_size features = build_input_features(user_feature_columns) user_inputs_list = list(features.values()) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), user_feature_columns)) if user_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), user_feature_columns)) if user_feature_columns else [] if len(dense_feature_columns) != 0: raise ValueError("Now SDM don't support dense feature") varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), user_feature_columns)) if user_feature_columns else [] sparse_varlen_feature_columns = [] prefer_history_columns = [] short_history_columns = [] prefer_fc_names = list(map(lambda x: "prefer_" + x, history_feature_list)) short_fc_names = list(map(lambda x: "short_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in prefer_fc_names: prefer_history_columns.append(fc) elif feature_name in short_fc_names: short_history_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) embedding_matrix_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding, init_std, seed, prefix="") item_features = build_input_features(item_feature_columns) item_inputs_list = list(item_features.values()) prefer_emb_list = embedding_lookup(embedding_matrix_dict, features, prefer_history_columns, prefer_fc_names, prefer_fc_names, to_list=True) # L^u short_emb_list = embedding_lookup(embedding_matrix_dict, features, short_history_columns, short_fc_names, short_fc_names, to_list=True) # S^u # dense_value_list = get_dense_input(features, dense_feature_columns) user_emb_list = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns, to_list=True) sequence_embed_dict = varlen_embedding_lookup( embedding_matrix_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) user_emb_list += sequence_embed_list # e^u # if len(user_emb_list) > 0 or len(dense_value_list) > 0: # user_emb_feature = combined_dnn_input(user_emb_list, dense_value_list) user_emb = concat_func(user_emb_list) user_emb_output = Dense(units, activation=dnn_activation, name="user_emb_output")(user_emb) prefer_sess_length = features['prefer_sess_length'] prefer_att_outputs = [] for i, prefer_emb in enumerate(prefer_emb_list): prefer_attention_output = AttentionSequencePoolingLayer( dropout_rate=0)([user_emb_output, prefer_emb, prefer_sess_length]) prefer_att_outputs.append(prefer_attention_output) prefer_att_concat = concat_func(prefer_att_outputs) prefer_output = Dense(units, activation=dnn_activation, name="prefer_output")(prefer_att_concat) short_sess_length = features['short_sess_length'] short_emb_concat = concat_func(short_emb_list) short_emb_input = Dense(units, activation=dnn_activation, name="short_emb_input")(short_emb_concat) short_rnn_output = DynamicMultiRNN( num_units=units, return_sequence=True, num_layers=rnn_layers, num_residual_layers=rnn_num_res, dropout_rate=dropout_rate)([short_emb_input, short_sess_length]) short_att_output = SelfMultiHeadAttention( num_units=units, head_num=num_head, dropout_rate=dropout_rate, future_binding=True, use_layer_norm=True)([short_rnn_output, short_sess_length ]) # [batch_size, time, num_units] short_output = UserAttention(num_units=units, activation=dnn_activation, use_res=True, dropout_rate=dropout_rate) \ ([user_emb_output, short_att_output, short_sess_length]) gate_input = concat_func([prefer_output, short_output, user_emb_output]) gate = Dense(units, activation='sigmoid')(gate_input) gate_output = Lambda( lambda x: tf.multiply(x[0], x[1]) + tf.multiply(1 - x[0], x[2]))( [gate, short_output, prefer_output]) gate_output_reshape = Lambda(lambda x: tf.squeeze(x, 1))(gate_output) item_index = EmbeddingIndex(list(range(item_vocabulary_size)))( item_features[item_feature_name]) item_embedding_matrix = embedding_matrix_dict[item_feature_name] item_embedding_weight = NoMask()(item_embedding_matrix(item_index)) pooling_item_embedding_weight = PoolingLayer()([item_embedding_weight]) output = SampledSoftmaxLayer(num_sampled=num_sampled)([ pooling_item_embedding_weight, gate_output_reshape, item_features[item_feature_name] ]) model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output) model.__setattr__("user_input", user_inputs_list) model.__setattr__("user_embedding", gate_output_reshape) model.__setattr__("item_input", item_inputs_list) model.__setattr__( "item_embedding", get_item_embedding(pooling_item_embedding_weight, item_features[item_feature_name])) return model
def MIND(dnn_feature_columns, history_feature_list, target_song_size, k_max=2, dnn_use_bn=False, user_hidden_unit=64, dnn_activation='relu', l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024): """ :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list,to indicate sequence sparse field :param target_song_size: int, the total size of the recall songs :param k_max: int, the max size of user interest embedding :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param user_hidden_unit: int. user dnn hidden layer size :param dnn_activation: Activation function to use in deep net :param l2_reg_dnn: L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: """ features = build_input_features(dnn_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) hist_len = features['hist_len'] inputs_list = list(features.values()) embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, init_std, seed, prefix="") history_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) history_emb = concat_func(history_emb_list, mask=False) target_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, ['item'], history_feature_list, to_list=True) target_emb_tmp = concat_func(target_emb_list, mask=False) target_emb_size = target_emb_tmp.get_shape()[-1].value target_emb = tf.keras.layers.Lambda( shape_target, arguments={'target_emb_size': target_emb_size})(target_emb_tmp) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) sequence_embed_dict = varlen_embedding_lookup( embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list deep_input_emb = concat_func(dnn_input_emb_list) user_other_feature = Flatten()(deep_input_emb) max_len = history_emb.get_shape()[1].value high_capsule = CapsuleLayer(input_units=target_emb_size, out_units=target_emb_size, max_len=max_len, k_max=k_max)((history_emb, hist_len)) other_feature_tile = tf.keras.layers.Lambda( tile_user_otherfeat, arguments={'k_max': k_max})(user_other_feature) user_deep_input = Concatenate()( [NoMask()(other_feature_tile), high_capsule]) user_embeddings = DNN((user_hidden_unit, target_emb_size), dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, name="user_embedding")(user_deep_input) k_user = tf.cast(tf.maximum( 1., tf.minimum(tf.cast(k_max, dtype="float32"), tf.log1p(tf.cast(hist_len, dtype="float32")) / tf.log(2.))), dtype="int64") # [B,1] forword/Cast_2 user_embedding_final = DotProductAttentionLayer( shape=[target_emb_size, target_emb_size])( (user_embeddings, target_emb), seq_length=k_user, max_len=k_max) output = SampledSoftmaxLayer( target_song_size=target_song_size, target_emb_size=target_emb_size)(inputs=(user_embedding_final, features['item'])) model = Model(inputs=inputs_list, outputs=output) return model
def DSIN( feature_dim_dict, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', l2_reg_dnn=0, l2_reg_embedding=1e-6, task='binary', dnn_dropout=0, init_std=0.0001, seed=1024, encoding='bias', ): check_feature_config_dict(feature_dim_dict) print( 'sess_count', sess_max_count, 'encoding', encoding, ) sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input( feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], sess_feature_list, sess_feature_list) query_emb = concat_fun(query_emb_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], mask_feat_list=sess_feature_list) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Flatten()(NoMask()(deep_input_emb)) be_flag = True if encoding == 'bias' else False tr_input = sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, feature_dim_dict['sparse'], sess_feature_list, sess_max_count, bias_encoding=be_flag) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not be_flag), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( len(sess_feature_list) * embedding_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) deep_input_emb = Concatenate()([ deep_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed)(deep_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] #sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) #sess_input_length_list.append(user_behavior_length_dict[sess_name]) model_input_list = get_inputs_list( [sparse_input, dense_input]) + sess_input_list + [user_sess_length] model = Model(inputs=model_input_list, outputs=output) return model
def CapsuleNet(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=50, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', num_capsule=8, dim_capsule=2, routing_iterations=3, att_hidden_size=(64, 16), att_activation="dice", att_weight_normalization=True, att_embedding_size=1, att_head_num=8, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, alpha=1e-6, seed=1024, task='binary'): check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) scores = AttentionSequencePoolingLayer( att_hidden_units=att_hidden_size, att_activation=att_activation, weight_normalization=att_weight_normalization, return_score=True)([query_emb, keys_emb, user_behavior_length]) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=True, use_positional_encoding=True, seed=seed, supports_masking=False, blinding=True) keys_emb = Self_Attention( [keys_emb, keys_emb, user_behavior_length, user_behavior_length]) cap = Capsule(num_capsule=num_capsule, dim_capsule=dim_capsule, routings=routing_iterations, share_weights=True, supports_masking=True) hist_cap = cap(keys_emb, scores=scores) disp_loss = get_disp_loss(hist_cap) hist_cap = Reshape([1, num_capsule * dim_capsule])(NoMask()(hist_cap)) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Concatenate()([deep_input_emb, hist_cap]) deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb)) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) model.add_loss(alpha * disp_loss) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def MIND(user_feature_columns, item_feature_columns, num_sampled=5, k_max=2, p=1.0, dynamic_k=False, user_dnn_hidden_units=(64, 32), dnn_activation='relu', dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, output_activation='linear', seed=1024): """Instantiates the MIND Model architecture. :param user_feature_columns: An iterable containing user's features used by the model. :param item_feature_columns: An iterable containing item's features used by the model. :param num_sampled: int, the number of classes to randomly sample per batch. :param k_max: int, the max size of user interest embedding :param p: float,the parameter for adjusting the attention distribution in LabelAwareAttention. :param dynamic_k: bool, whether or not use dynamic interest number :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower :param dnn_activation: Activation function to use in deep net :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param seed: integer ,to use as random seed. :param output_activation: Activation function to use in output layer :return: A Keras model instance. """ if len(item_feature_columns) > 1: raise ValueError("Now MIND only support 1 item feature like item_id") item_feature_column = item_feature_columns[0] item_feature_name = item_feature_column.name item_vocabulary_size = item_feature_columns[0].vocabulary_size item_embedding_dim = item_feature_columns[0].embedding_dim # item_index = Input(tensor=tf.constant([list(range(item_vocabulary_size))])) history_feature_list = [item_feature_name] features = build_input_features(user_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), user_feature_columns)) if user_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), user_feature_columns)) if user_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), user_feature_columns)) if user_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) seq_max_len = history_feature_columns[0].maxlen inputs_list = list(features.values()) embedding_matrix_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding, seed=seed, prefix="") item_features = build_input_features(item_feature_columns) query_emb_list = embedding_lookup(embedding_matrix_dict, item_features, item_feature_columns, history_feature_list, history_feature_list, to_list=True) keys_emb_list = embedding_lookup(embedding_matrix_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup( embedding_matrix_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list # keys_emb = concat_func(keys_emb_list, mask=True) # query_emb = concat_func(query_emb_list, mask=True) history_emb = PoolingLayer()(NoMask()(keys_emb_list)) target_emb = PoolingLayer()(NoMask()(query_emb_list)) # target_emb_size = target_emb.get_shape()[-1].value # max_len = history_emb.get_shape()[1].value hist_len = features['hist_len'] high_capsule = CapsuleLayer(input_units=item_embedding_dim, out_units=item_embedding_dim, max_len=seq_max_len, k_max=k_max)((history_emb, hist_len)) if len(dnn_input_emb_list) > 0 or len(dense_value_list) > 0: user_other_feature = combined_dnn_input(dnn_input_emb_list, dense_value_list) other_feature_tile = tf.keras.layers.Lambda( tile_user_otherfeat, arguments={'k_max': k_max})(user_other_feature) user_deep_input = Concatenate()( [NoMask()(other_feature_tile), high_capsule]) else: user_deep_input = high_capsule user_embeddings = DNN(user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, output_activation=output_activation, seed=seed, name="user_embedding")(user_deep_input) item_inputs_list = list(item_features.values()) item_embedding_matrix = embedding_matrix_dict[item_feature_name] item_index = EmbeddingIndex(list(range(item_vocabulary_size)))( item_features[item_feature_name]) item_embedding_weight = NoMask()(item_embedding_matrix(item_index)) pooling_item_embedding_weight = PoolingLayer()([item_embedding_weight]) if dynamic_k: user_embedding_final = LabelAwareAttention( k_max=k_max, pow_p=p, )((user_embeddings, target_emb, hist_len)) else: user_embedding_final = LabelAwareAttention( k_max=k_max, pow_p=p, )((user_embeddings, target_emb)) output = SampledSoftmaxLayer(num_sampled=num_sampled)([ pooling_item_embedding_weight, user_embedding_final, item_features[item_feature_name] ]) model = Model(inputs=inputs_list + item_inputs_list, outputs=output) model.__setattr__("user_input", inputs_list) model.__setattr__("user_embedding", user_embeddings) model.__setattr__("item_input", item_inputs_list) model.__setattr__( "item_embedding", get_item_embedding(pooling_item_embedding_weight, item_features[item_feature_name])) return model
def KDD_DIN(dnn_feature_columns, history_feature_list, dnn_use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list,to indicate sequence sparse field :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(dnn_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) embedding_dict = kdd_create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, init_std, seed, prefix="") query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, history_feature_list, history_feature_list, to_list=True) keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup( embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list keys_emb = concat_func(keys_emb_list, mask=True) deep_input_emb = concat_func(dnn_input_emb_list) query_emb = concat_func(query_emb_list, mask=True) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist]) deep_input_emb = Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model
def _model_fn(features, labels, mode, config): train_flag = (mode == tf.estimator.ModeKeys.TRAIN) with variable_scope(DNN_SCOPE_NAME): sparse_feature_columns = [] dense_feature_columns = [] varlen_sparse_feature_columns = [] for feat in dnn_feature_columns: new_feat_name = list(feat.parse_example_spec.keys())[0] if new_feat_name in ['hist_price_id', 'hist_des_id']: varlen_sparse_feature_columns.append( VarLenSparseFeat(SparseFeat(new_feat_name, vocabulary_size=100, embedding_dim=32, use_hash=False), maxlen=3)) elif is_embedding(feat): sparse_feature_columns.append( SparseFeat(new_feat_name, vocabulary_size=feat[0]._num_buckets + 1, embedding_dim=feat.dimension)) else: dense_feature_columns.append(DenseFeat(new_feat_name)) history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list( map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) my_feature_columns = sparse_feature_columns + dense_feature_columns + varlen_sparse_feature_columns embedding_dict = create_embedding_matrix(my_feature_columns, l2_reg_embedding, seed, prefix="") query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, history_feature_list, history_feature_list, to_list=True) print('query_emb_list', query_emb_list) print('embedding_dict', embedding_dict) print('haha') print('history_feature_columns', history_feature_columns) print('haha') keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) print('keys_emb_list', keys_emb_list) dnn_input_emb_list = embedding_lookup( embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) print('dnn_input_emb_list', dnn_input_emb_list) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup( embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list keys_emb = concat_func(keys_emb_list, mask=True) deep_input_emb = concat_func(dnn_input_emb_list) query_emb = concat_func(query_emb_list, mask=True) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = tf.keras.layers.Concatenate()( [NoMask()(deep_input_emb), hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) final_logit = tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( output) # logits_list.append(final_logit) # logits = add_func(logits_list) # print(labels) # tf.summary.histogram(final_logit + '/final_logit', final_logit) return deepctr_model_fn(features, mode, final_logit, labels, task, linear_optimizer, dnn_optimizer, training_chief_hooks=training_chief_hooks)
def YoutubeDNN(user_feature_columns, item_feature_columns, num_sampled=5, user_dnn_hidden_units=(64, 32), dnn_activation='relu', dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, output_activation='linear', seed=1024, ): """Instantiates the YoutubeDNN Model architecture. :param user_feature_columns: An iterable containing user's features used by the model. :param item_feature_columns: An iterable containing item's features used by the model. :param num_sampled: int, the number of classes to randomly sample per batch. :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower :param dnn_activation: Activation function to use in deep net :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param seed: integer ,to use as random seed. :param output_activation: Activation function to use in output layer :return: A Keras model instance. """ if len(item_feature_columns) > 1: raise ValueError("Now YoutubeNN only support 1 item feature like item_id") item_feature_name = item_feature_columns[0].name item_vocabulary_size = item_feature_columns[0].vocabulary_size embedding_matrix_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding, seed=seed) user_features = build_input_features(user_feature_columns) user_inputs_list = list(user_features.values()) user_sparse_embedding_list, user_dense_value_list = input_from_feature_columns(user_features, user_feature_columns, l2_reg_embedding, seed=seed, embedding_matrix_dict=embedding_matrix_dict) user_dnn_input = combined_dnn_input(user_sparse_embedding_list, user_dense_value_list) item_features = build_input_features(item_feature_columns) item_inputs_list = list(item_features.values()) user_dnn_out = DNN(user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, output_activation=output_activation, seed=seed)(user_dnn_input) item_index = EmbeddingIndex(list(range(item_vocabulary_size)))(item_features[item_feature_name]) item_embedding_matrix = embedding_matrix_dict[ item_feature_name] item_embedding_weight = NoMask()(item_embedding_matrix(item_index)) pooling_item_embedding_weight = PoolingLayer()([item_embedding_weight]) output = SampledSoftmaxLayer(num_sampled=num_sampled)( [pooling_item_embedding_weight, user_dnn_out, item_features[item_feature_name]]) model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output) model.__setattr__("user_input", user_inputs_list) model.__setattr__("user_embedding", user_dnn_out) model.__setattr__("item_input", item_inputs_list) model.__setattr__("item_embedding", get_item_embedding(pooling_item_embedding_weight, item_features[item_feature_name])) return model
def DIN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, dnn_use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], mask_feat_list=seq_feature_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) query_emb = concat_fun(query_emb_list) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist]) deep_input_emb = Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model = Model(inputs=model_input_list, outputs=output) return model
def DSIN( feature_dim_dict, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, bias_encoding=False, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', dnn_dropout=0, dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, init_std=0.0001, seed=1024, task='binary', ): """Instantiates the Deep Session Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param sess_feature_list: list,to indicate session feature sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param sess_max_count: positive int, to indicate the max number of sessions :param sess_len_max: positive int, to indicate the max length of each session :param bias_encoding: bool. Whether use bias encoding or postional encoding :param att_embedding_size: positive int, the embedding size of each attention head :param att_head_num: positive int, the number of attention head :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) if (att_embedding_size * att_head_num != len(sess_feature_list) * embedding_size): raise ValueError( "len(session_feature_lsit) * embedding_size must equal to att_embedding_size * att_head_num ,got %d * %d != %d *%d" % (len(sess_feature_list), embedding_size, att_embedding_size, att_head_num)) sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input( feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], sess_feature_list, sess_feature_list) query_emb = concat_fun(query_emb_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], mask_feat_list=sess_feature_list) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Flatten()(NoMask()(deep_input_emb)) tr_input = sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, feature_dim_dict['sparse'], sess_feature_list, sess_max_count, bias_encoding=bias_encoding) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not bias_encoding), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( len(sess_feature_list) * embedding_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) deep_input_emb = Concatenate()([ deep_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] # sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) # sess_input_length_list.append(user_behavior_length_dict[sess_name]) model_input_list = get_inputs_list( [sparse_input, dense_input]) + sess_input_list + [user_sess_length] model = Model(inputs=model_input_list, outputs=output) return model
def BST(feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_embedding_size=1, att_head_num=8, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) # sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, # embeddings_initializer=RandomNormal( # mean=0.0, stddev=init_std, seed=seed), # embeddings_regularizer=l2( # l2_reg_embedding), # name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in # enumerate(feature_dim_dict["sparse"])} # print(sparse_embedding_dict) sparse_embedding_dict = {feat.name: Embedding(tf.cast(feat.dimension, tf.int32), embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"])} # deep_emb_list = get_embedding_vec_list( # deep_sparse_emb_dict, sparse_input_dict, feature_dim_dict['sparse']) query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) print("prev: {0}".format(keys_emb)) # hist_cap = Capsule( # num_capsule=8, dim_capsule=2, # routings=3, share_weights=True)(NoMask()(keys_emb)) # print("now: {0}".format(hist_cap)) # # exit(0) # # keys_emb = concat_fun(keys_emb_list) # hist_cap = Reshape([1, 16])(hist_cap) deep_input_emb = concat_fun(deep_input_emb_list) print("deep input emb: ", deep_input_emb) # print("hist_cap: ", hist_cap) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=True, seed=seed, supports_masking=False, blinding=True) # print("now: {0}".format(hist)) hists = [] for key_emb in keys_emb_list: hist = Self_Attention([key_emb, key_emb, user_behavior_length, user_behavior_length]) hists.append(hist) hist = concat_fun(hists) # Tensor("concatenate_2/concat:0", shape=(?, 50, 8), dtype=float32) # <tf.Tensor 'concatenate_3/concat:0' shape=(?, 4, 8) dtype=float32> deep_input_emb = Concatenate()([deep_input_emb, hist]) # print(deep_input_emb) deep_input_emb = tf.keras.layers.Flatten()(NoMask()(deep_input_emb)) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model