def get_state(self, time_last): with tf.variable_scope('cstm_get_emb', reuse=tf.AUTO_REUSE): ctsm_input = tf.concat( [self.type_lst_embedding, tf.expand_dims(time_last, 2)], axis=2) # TODO 需要修改输入 output = self.ctsm_model.ctsm_net(hidden_units=self.num_units, input_data=ctsm_input, input_length=tf.add( self.seq_len, -1)) h_i_minus = gather_indexes( batch_size=self.now_batch_size, seq_length=self.max_seq_len, width=self.num_units, sequence_tensor=output[:, :, -self.num_units:], positions=self.mask_index - 1) # 把上一个时刻的各种信息取出来 state = gather_indexes( batch_size=self.now_batch_size, seq_length=self.max_seq_len, width=self.num_units * 4, sequence_tensor=output[:, :, :-self.num_units], positions=self.mask_index - 1) # 把上一个时刻的各种信息取出来 o_i, c_i, c_i_bar, delta_i = array_ops.split( value=state, num_or_size_splits=4, axis=1) # batch_size, num_units return o_i, c_i, c_i_bar, delta_i, h_i_minus
def get_state(self): with tf.variable_scope('cstm_get_emb', reuse=tf.AUTO_REUSE): time_aware_gru_net_input = tf.concat([ self.behavior_list_embedding_dense, tf.expand_dims(self.timelast_list, 2) ], axis=2) output = self.ctsm_model.ctsm_net( hidden_units=self.num_units, input_data=time_aware_gru_net_input, input_length=self.seq_length - 1) # TODO h_i_minus = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=output[:, :, -self.num_units:], positions=self.mask_index - 1) # TODO 把上一个时刻的各种信息取出来 state = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units * 4, sequence_tensor=output[:, :, :-self.num_units], positions=self.mask_index - 1) # 把上一个时刻的各种信息取出来 o_i, c_i, c_i_bar, delta_i = array_ops.split( value=state, num_or_size_splits=4, axis=1) # batch_size, num_units return o_i, c_i, c_i_bar, delta_i, h_i_minus
def build_model(self): print('--------------------num blocks-------------------------'+str(self.num_blocks)) self.gru_net_ins = GRU() with tf.variable_scope('ShortTermIntentEncoder',reuse=tf.AUTO_REUSE): timefirst_lst = tf.reshape(self.time_list[:, 0], [-1, 1]) idx = tf.range(start=1., limit=self.max_len , delta=1) idx0 = tf.constant([1.]) idx = tf.concat([idx0, idx], axis=0) avg_interval_lst = (self.time_list - timefirst_lst) / idx time_emb = self.embedding.get_thp_time_embedding(M=self.num_units,time_lst=self.timelast_list) time_aware_gru_net_input = tf.concat([self.behavior_list_embedding_dense, time_emb, tf.expand_dims(self.timelast_list, 2), tf.expand_dims(avg_interval_lst,2)], axis=2) self.short_term_intent_temp = self.gru_net_ins.only_time_prediction_gru_net(hidden_units=self.num_units, input_data=time_aware_gru_net_input, input_length=tf.add(self.seq_length,-1), # type='new', scope='gru') # batch_size, max_len, num_units short_term_intent = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width= self.num_units+1, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index -1 )#batch_size, num_units self.interval_bar = short_term_intent[:,-1] # 最后一位是interval batch_size, self.last_time = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=1, sequence_tensor=self.time_list, positions=self.mask_index - 1) # 上一个时间 self.interval = self.interval_bar self.predict_time = tf.reshape(tf.reshape(self.last_time,[-1,])+tf.reshape(self.interval,[-1,]),[-1,]) # masks = tf.sequence_mask(lengths=self.seq_length-1,maxlen=self.max_len,dtype=tf.float32) # timelast_list = masks * self.timelast_list # self.interval = tf.layers.dense(inputs = timelast_list, units=1, activation=tf.nn.relu) # self.predict_time = tf.reshape(tf.reshape(self.last_time,[-1,])+tf.reshape(self.interval,[-1,]),[-1,]) # self.predict_time = self.target[2] self.output()
def build_model(self): with tf.variable_scope('intensity_calculation', reuse=tf.AUTO_REUSE): intensity_model = hp_intensity_calculation( ) self.target_lambda = intensity_model.cal_target_intensity(timenow_lst=self.target_time_now_lst, type_lst=self.type_lst, type_num = self.type_num, seq_len = self.seq_len, max_seq_len = self.max_seq_len ) # batch_size, type_num # self.sims_lambda = intensity_model.cal_sims_intensity(sims_timenow_lst = self.sim_time_now_lst, # type_lst=self.type_lst, # type_num=self.type_num, # seq_len=self.seq_len, # max_seq_len=self.max_seq_len, # sims_len=self.sims_len) last_time = tf.squeeze(gather_indexes(batch_size=self.now_batch_size, seq_length=self.max_seq_len, width=1, sequence_tensor=self.time_lst, positions=self.mask_index - 1) ) # 上一个时间 self.integral_lambda = intensity_model.cal_integral_intensity(t_last = last_time, t_target=self.target_time, time_lst = self.time_lst, type_lst = self.type_lst, type_num=self.type_num, seq_len=self.seq_len, max_seq_len=self.max_seq_len) # self.test = self.target_lambda # self.f_t = self.target_lambda * tf.exp(-self.integral_lambda) with tf.variable_scope('type_time_calculation', reuse=tf.AUTO_REUSE): last_time = tf.squeeze(gather_indexes(batch_size=self.now_batch_size, seq_length=self.max_seq_len, width=1, sequence_tensor=tf.expand_dims(self.time_lst, axis=-1), positions=self.mask_index - 1)) # target_time 上个时刻 time_predictor = hp_time_predictor(f = intensity_model, type_num = self.type_num, max_seq_len = self.max_seq_len, type_lst = self.type_lst, last_time = last_time,time_lst=self.time_lst, seq_len = self.seq_len) self.predict_time = time_predictor.predict_time(outer_sims_len=self.FLAGS.outer_sims_len) self.predict_type_prob = self.target_lambda# batch_size, type_num self.output()
def build_model(self): self.gru_net_ins = GRU() with tf.variable_scope('ShortTermIntentEncoder'): timenext_list = self.timelast_list[:, 1:] zeros = tf.zeros(shape=(self.now_bacth_data_size, 1)) timenext_list = tf.concat([timenext_list, zeros], axis=1) self.time_aware_gru_net_input = tf.concat([ self.behavior_list_embedding_dense, tf.expand_dims(self.timelast_list, 2), tf.expand_dims(timenext_list, 2) ], 2) self.short_term_intent_temp = self.gru_net_ins.time_aware_gru_net( hidden_units=self.num_units, input_data=self.time_aware_gru_net_input, input_length=tf.add(self.seq_length, -1), type='T_Gru_Extend') self.short_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) self.predict_behavior_emb = layer_norm(self.short_term_intent) self.output()
def build_model(self): num_units = self.FLAGS.num_units gru_net_ins = GRU() self.sequence_embedding, self.positive_embedding, \ self.behavior_embedding_result_dense, self.positive_embedding_result_dense, \ self.mask_index, self.label_ids, \ self.seq_length, user_embedding, self.time = self.embedding.get_embedding(num_units) with tf.variable_scope("EnhanceUserPreferenceIntentEncoder"): user_preference_temp = gru_net_ins.gru_net_initial(hidden_units=num_units, input_length=self.mask_index, input_data=self.behavior_embedding_result_dense, initial_state=user_embedding) self.user_preference = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.FLAGS.max_len, width=self.FLAGS.num_units, sequence_tensor=user_preference_temp, positions=tf.add(self.mask_index, -1)) with tf.variable_scope("OutputLayer"): self.predict_behavior_emb = layer_norm(self.user_preference) # self.mf_auc = tf.reduce_mean(tf.to_float((tf.reduce_sum(tf.multiply(tf.expand_dims(self.predict_behavior_emb, 1), # tf.expand_dims(self.positive_embedding_result_dense, 1) - self.negative_embedding_result_dense), 2)) > 0)) l2_norm = tf.add_n([ tf.nn.l2_loss(self.sequence_embedding), tf.nn.l2_loss(self.positive_embedding) ]) regulation_rate = self.FLAGS.regulation_rate item_lookup_table_T = tf.transpose(self.embedding.item_emb_lookup_table) logits = tf.matmul(self.predict_behavior_emb, item_lookup_table_T) log_probs = tf.nn.log_softmax(logits) label_ids = tf.reshape(self.label_ids, [-1]) one_hot_labels = tf.one_hot(label_ids, depth=self.embedding.item_count+3, dtype=tf.float32) self.loss_origin = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1]) lstur_loss = regulation_rate * l2_norm + tf.reduce_mean(self.loss_origin) with tf.name_scope("LearningtoRankLoss"): self.loss = lstur_loss tf.summary.scalar("l2_norm", l2_norm) tf.summary.scalar("Training Loss", self.loss) tf.summary.scalar("Learning_rate", self.learning_rate) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm(gradients, self.FLAGS.max_gradient_norm) self.train_op = self.opt.apply_gradients(zip(clip_gradients, trainable_params)) self.summery()
def build_model(self): self.gru_net_ins = GraphRNN() self.gated_gnn_model = modified_gated_GNN() with tf.variable_scope('user_behavior_emb'): user_behavior_list_embedding = self.behavior_list_embedding_dense with tf.variable_scope('neighbor_emb', reuse=tf.AUTO_REUSE): structure_emb = self.gated_gnn_model.generate_graph_emb( init_emb=user_behavior_list_embedding, now_batch_size=self.now_bacth_data_size, num_units=self.num_units, adj_in=self.adj_in, adj_out=self.adj_out, step=self.FLAGS.graph_step ) # batch_size, max_len, num_units * 2 with tf.variable_scope('ShortTermIntentEncoder', reuse=tf.AUTO_REUSE): grnn_inputs = tf.concat( [user_behavior_list_embedding, structure_emb], axis=2) user_behavior_list_embedding = self.gru_net_ins.modified_grnn_net( hidden_units=self.num_units, input_data=grnn_inputs, input_length=tf.add(self.seq_length, -1)) self.short_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=user_behavior_list_embedding, positions=self.mask_index - 1) self.short_term_intent = self.short_term_intent self.predict_behavior_emb = layer_norm(self.short_term_intent) self.output()
def get_emb(self, time_emb, type_emb): """ :param time_emb: batch_size, seq_len, num_units :param type_emb: batch_size, seq_len, num_units :return: """ X = time_emb + type_emb M = self.FLAGS.THP_M Mv = self.FLAGS.THP_Mv L = self.max_seq_len hidden_emb = self.sahp_att_model.multistack_multihead_self_attention( X=X, M=M, Mv=Mv, L=L, N=self.now_batch_size, head_num=self.FLAGS.THP_head_num, stack_num=self.FLAGS.THP_stack_num, dropout_rate=self.dropout_rate, ) h = gather_indexes(batch_size=self.now_batch_size, seq_length=self.max_seq_len, width=M, sequence_tensor=hidden_emb, positions=self.mask_index - 1) # 取上一个时刻的emb return h
def build_model(self): time_aware_attention = Time_Aware_Attention() with tf.variable_scope("UserHistoryEncoder"): user_history = time_aware_attention.self_attention( enc=self.behavior_list_embedding_dense, num_units=self.num_units, num_heads=self.num_heads, num_blocks=self.num_blocks, dropout_rate=self.dropout_rate, is_training=True, reuse=False, key_length=self.seq_length, query_length=self.seq_length, t_querys=self.time_list, t_keys=self.time_list, t_keys_length=self.max_len, t_querys_length=self.max_len) long_term_prefernce = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=user_history, positions=self.mask_index) self.predict_behavior_emb = long_term_prefernce self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) self.output()
def build_model(self): self.ctsm_model = ContinuousLSTM() with tf.variable_scope('ShortTermIntentEncoder', reuse=tf.AUTO_REUSE): time_aware_gru_net_input = tf.concat([ self.behavior_list_embedding_dense, ], axis=2) self.short_term_intent_temp = self.ctsm_model.my_lstm_net( hidden_units=self.num_units, input_data=time_aware_gru_net_input, input_length=tf.add(self.seq_length, -1)) # batch_size, max_len, num_units emb = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) #batch_size, num_units self.predict_behavior_emb = layer_norm( emb) # batch_size, num_units self.output()
def build_model(self): time_aware_attention = Time_Aware_Attention() self.gru_net_ins = GRU() with tf.variable_scope('ShortTermIntentEncoder'): self.time_aware_gru_net_input = tf.concat([self.behavior_list_embedding_dense, tf.expand_dims(self.timelast_list, 2), tf.expand_dims(self.timenow_list, 2)], 2) self.short_term_intent_temp = self.gru_net_ins.time_aware_gru_net(hidden_units=self.num_units, input_data=self.time_aware_gru_net_input, input_length=tf.add(self.seq_length, -1), type='new') user_history = self.short_term_intent_temp self.short_term_intent = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) self.short_term_intent = layer_norm(self.short_term_intent) short_term_intent4vallina = tf.expand_dims(self.short_term_intent, 1) with tf.variable_scope('NextItemDecoder'): hybird_preference = time_aware_attention.vanilla_attention(user_history, short_term_intent4vallina, self.num_units, 1, 1, self.dropout_rate,is_training=True, reuse=False,key_length=self.seq_length, query_length = tf.ones_like(short_term_intent4vallina[:, 0, 0], dtype=tf.int32), t_querys = tf.expand_dims(self.target[2],1),t_keys = self.time_list, t_keys_length=self.max_len,t_querys_length=1 ) self.predict_behavior_emb = tf.concat([self.short_term_intent, hybird_preference], 1) self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) self.output_concat()
def build_model(self): time_aware_attention = Time_Aware_Attention() self.gru_net_ins = GRU() with tf.variable_scope("UserHistoryEncoder"): user_history = self.behavior_list_embedding_dense with tf.variable_scope('ShortTermIntentEncoder'): self.short_term_intent_temp = self.gru_net_ins.gru_net(hidden_units=self.num_units, input_data=self.behavior_list_embedding_dense, input_length=tf.add(self.seq_length, -1)) self.short_term_intent = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) self.short_term_intent = self.short_term_intent short_term_intent4vallina = tf.expand_dims(self.short_term_intent, 1) with tf.variable_scope('NextItemDecoder'): hybird_preference = time_aware_attention.vanilla_attention(user_history, short_term_intent4vallina, self.num_units, self.num_heads, self.num_blocks, self.dropout_rate,is_training=True, reuse=False,key_length=self.seq_length, query_length = tf.ones_like(short_term_intent4vallina[:, 0, 0], dtype=tf.int32), t_querys = tf.expand_dims(self.target[2],1),t_keys = self.time_list, t_keys_length=self.max_len,t_querys_length=1 ) #z = tf.concat([self.short_term_intent, hybird_preference], 1) #z = tf.layers.dropout(tf.layers.dense(z, self.num_units, activation=tf.nn.relu), rate=self.FLAGS.dropout, #training=True) #z = tf.sigmoid(tf.layers.dense(z, 1)) #self.predict_behavior_emb = layer_norm(z * hybird_preference + (1 - z) * self.short_term_intent) #self.output() self.predict_behavior_emb = layer_norm(hybird_preference) self.output()
def build_model(self): print('--------------------num blocks-------------------------'+str(self.num_blocks)) self.gru_net_ins = GRU() with tf.variable_scope('ShortTermIntentEncoder',reuse=tf.AUTO_REUSE): time_aware_gru_net_input = tf.concat([self.behavior_list_embedding_dense, self.reconsume_lst_embedding, tf.expand_dims(self.timelast_list, 2), tf.expand_dims(self.reconsume_list,2)], axis=2) self.short_term_intent_temp = self.gru_net_ins.reconsume_prediction_gru_net(hidden_units=self.num_units, input_data=time_aware_gru_net_input, input_length=tf.add(self.seq_length,-1), # type='new', scope='gru') # batch_size, max_len, num_units emb = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width= self.num_units * 2 , sequence_tensor=self.short_term_intent_temp, positions=self.mask_index -1 )#batch_size, num_units # self.predict_is_reconsume = short_term_intent[:,-1] # 最后一位是interval batch_size, short_term_intent = emb[:,:self.num_units]# 前面是h # short_term_intent = tf.layers.dense(short_term_intent, self.num_units) self.predict_behavior_emb = layer_norm(short_term_intent) # batch_size, num_units predict_reconsume_emb = emb[:,self.num_units:] reconsume_table = self.embedding.reconsume_emb_lookup_table reconsume_scores = tf.nn.softmax(tf.matmul(predict_reconsume_emb,reconsume_table,transpose_b=True)) self.predict_is_reconsume = reconsume_scores[:,1] def cosine(q, a): pooled_len_1 = tf.sqrt(tf.reduce_sum(q * q, 1)) pooled_len_2 = tf.sqrt(tf.reduce_sum(a * a, 1)) pooled_mul_12 = tf.reduce_sum(q * a, 1) score = tf.div(pooled_mul_12, pooled_len_1 * pooled_len_2 + 1e-8, name="scores") return score item_embs = tf.reshape(self.item_list_emb,[-1,self.num_units]) # batch_size * max_len, num_units predict_target_embs = tf.tile(self.predict_behavior_emb,[self.max_len,1]) # batch_size * max_len, num_units reconsume_scores = cosine(predict_target_embs,item_embs) # batch_size * max_len self.reconsume_scores = tf.reshape(reconsume_scores,[-1,self.max_len]) self.output()
def build_model(self): self.sahp_att_model = SahpSelfAttention() h = self.get_emb( self.sahp_time_lst_embedding, self.type_lst_embedding) # TODO time_lst_embedidng 需要更改 M = self.FLAGS.THP_M dtype = h.dtype with tf.variable_scope('prepare_parameter', reuse=tf.AUTO_REUSE): W_mu = tf.get_variable('W_mu', shape=(M, 1), dtype=dtype) W_eta = tf.get_variable('W_eta', shape=(M, 1), dtype=dtype) W_gamma = tf.get_variable('W_gamma', shape=(M, 1), dtype=dtype) b_mu = tf.get_variable('b_mu', shape=(1, ), dtype=dtype) b_eta = tf.get_variable('b_eta', shape=(1, ), dtype=dtype) b_gamma = tf.get_variable('b_gamma', shape=(1, ), dtype=dtype) mu = self.gelu(tf.matmul(h, W_mu) + b_mu) # TODO 这个函数需要检查 eta = self.gelu(tf.matmul(h, W_eta) + b_eta) gamma = tf.nn.softplus(tf.matmul(h, W_gamma) + b_gamma) with tf.variable_scope('intensity_calculation', reuse=tf.AUTO_REUSE): last_time = tf.squeeze( gather_indexes(batch_size=self.now_batch_size, seq_length=self.max_seq_len, width=1, sequence_tensor=tf.expand_dims(self.time_lst, axis=-1), positions=self.mask_index - 1)) # target_time 上个时刻 intensity_model = sahp_intensity_calculation(mu=mu, eta=eta, gamma=gamma) self.target_lambda = intensity_model.cal_target_intensity( target_time=self.target_time, last_time=last_time, type_num=self.type_num) self.sims_lambda = intensity_model.cal_sims_intensity( sims_time=self.sims_time_lst, last_time=last_time, sims_len=self.sims_len, type_num=self.type_num) with tf.variable_scope('predict_time_type', reuse=tf.AUTO_REUSE): time_predictor = thp_time_predictor() self.predict_time = time_predictor.predict_time( emb=h, num_units=self.FLAGS.THP_M) # batch_size, 1 type_predictor = thp_type_predictor() self.predict_type_prob = type_predictor.predict_type( emb=h, num_units=self.FLAGS.THP_M, type_num=self.type_num) # batch_size, type_num self.output()
def build_model(self): attention = Attention() self.ggnn_model = gated_GNN() with tf.variable_scope('user_behavior_emb'): user_behavior_list_embedding = self.behavior_list_embedding_dense with tf.variable_scope('ggnn_encoding',reuse=tf.AUTO_REUSE): self.gnn_emb_vec = self.ggnn_model.generate_graph_emb(init_emb=user_behavior_list_embedding, now_batch_size=self.now_bacth_data_size, num_units=self.num_units, adj_in=self.adj_in, adj_out=self.adj_out, step=self.FLAGS.graph_step) self.short_term_intent = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.gnn_emb_vec, positions=self.mask_index - 1) # batch_size, num_units with tf.variable_scope('self_attention',reuse=tf.AUTO_REUSE): self.att_emb_vec = attention.self_attention(enc = self.gnn_emb_vec, num_units = self.num_units, num_heads = self.num_heads, num_blocks = self.num_blocks, dropout_rate = self.dropout_rate, is_training = True, reuse = None, key_length= self.seq_length, query_length = self.seq_length) self.long_term_intent = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.att_emb_vec, positions=self.mask_index ) # batch_size, num_units with tf.variable_scope('sess_emb', reuse=tf.AUTO_REUSE): eps = tf.get_variable('eps',[1],dtype=tf.float32) self.predict_behavior_emb = eps*self.short_term_intent +(1-eps) * self.long_term_intent self.output()
def build_model(self): attention = Attention() self.gru_net_ins = GRU() self.ggnn_model = gated_GNN() with tf.variable_scope('user_behavior_emb'): user_behavior_list_embedding = self.behavior_list_embedding_dense with tf.variable_scope('ggnn_encoding'): self.short_term_intent_temp = self.ggnn_model.generate_time_aware_emb( init_emb=user_behavior_list_embedding, adj_avg_time=self.adj_avg_time, now_batch_size=self.now_bacth_data_size, num_units=self.num_units, adj_in=self.adj_in, adj_out=self.adj_out, step=self.FLAGS.graph_step) # with tf.variable_scope('ShortTermIntentEncoder'): # self.short_term_intent_temp = self.gru_net_ins.gru_net(hidden_units=self.num_units, # input_data=self.behavior_list_embedding_dense, # input_length=tf.add(self.seq_length, -1)) user_history = self.short_term_intent_temp self.short_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) self.short_term_intent = layer_norm(self.short_term_intent) short_term_intent4vallina = tf.expand_dims(self.short_term_intent, 1) with tf.variable_scope('NextItemDecoder'): hybird_preference = attention.vanilla_attention( user_history, short_term_intent4vallina, self.num_units, 1, 1, self.dropout_rate, is_training=True, reuse=False, key_length=self.seq_length, query_length=tf.ones_like(short_term_intent4vallina[:, 0, 0], dtype=tf.int32)) self.predict_behavior_emb = tf.concat( [self.short_term_intent, hybird_preference], 1) self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) self.output_concat()
def build_model(self): self.gru_net_ins = GRU() with tf.variable_scope('ShortTermIntentEncoder'): self.short_term_intent_temp = self.gru_net_ins.gru_net_initial(hidden_units=self.num_units, initial_state=self.user_embedding, input_data=self.behavior_list_embedding_dense, input_length=tf.add(self.seq_length, -1)) self.short_term_intent = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) self.predict_behavior_emb = layer_norm(self.short_term_intent) self.output()
def build_model(self): time_aware_attention = Time_Aware_Attention() with tf.variable_scope("UserHistoryEncoder"): user_history = time_aware_attention.self_attention( enc=self.behavior_list_embedding_dense, num_units=128, num_heads=self.num_heads, num_blocks=self.num_blocks, dropout_rate=self.dropout_rate, is_training=True, reuse=False, key_length=self.seq_length, query_length=self.seq_length, t_querys=self.time_list, t_keys=self.time_list, t_keys_length=self.max_len, t_querys_length=self.max_len) long_term_prefernce = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.FLAGS.num_units, sequence_tensor=user_history, positions=self.mask_index) self.predict_behavior_emb = long_term_prefernce self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) with tf.name_scope('CrossEntropyLoss'): l2_norm = tf.add_n([ tf.nn.l2_loss(self.item_list_emb), tf.nn.l2_loss(self.category_list_emb), tf.nn.l2_loss(self.position_list_emb) ]) regulation_rate = self.FLAGS.regulation_rate item_lookup_table_T = tf.transpose( self.embedding.item_emb_lookup_table) logits = tf.matmul(self.predict_behavior_emb, item_lookup_table_T) log_probs = tf.nn.log_softmax(logits) label_ids = tf.reshape(self.target[0], [-1]) one_hot_labels = tf.one_hot(label_ids, depth=self.embedding.item_count + 3, dtype=tf.float32) self.loss_origin = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1]) self.loss = regulation_rate * l2_norm + tf.reduce_mean( self.loss_origin) tf.summary.scalar('l2_norm', l2_norm) tf.summary.scalar('Training Loss', self.loss) tf.summary.scalar('Learning_rate', self.learning_rate) self.cal_gradient(tf.trainable_variables())
def build_model(self): user_history = self.behavior_list_embedding_dense external_memory = layer_norm(tf.reduce_sum(user_history,1)) last_click = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=user_history, positions=self.mask_index-1) with tf.variable_scope('AttentionNet'): att_w0 = variable_scope.get_variable("att_w0", shape=[self.num_units,1], dtype=user_history.dtype) att_w1 = variable_scope.get_variable("att_w1", shape=[self.num_units, self.num_units], dtype=user_history.dtype) att_w2 = variable_scope.get_variable("att_w2", shape=[self.num_units, self.num_units], dtype=user_history.dtype) att_w3 = variable_scope.get_variable("att_w3", shape=[self.num_units, self.num_units], dtype=user_history.dtype) att_b = variable_scope.get_variable("att_b", shape=[ 1,self.num_units], dtype=user_history.dtype) a_history= tf.matmul(user_history,att_w1) a_external_memory = tf.matmul(external_memory, att_w2) a_last_click = tf.matmul(last_click, att_w3) att = a_history+ tf.expand_dims(a_external_memory,1) att = att + tf.expand_dims(a_last_click,1) att = tf.sigmoid(att) #att=tf.sigmoid(tf.matmul(user_history,att_w1)+\ #tf.matmul(external_memory,att_w2)+ \ #tf.matmul(last_click, att_w3)+att_b) att=tf.squeeze(tf.matmul(att,att_w0),2) ms= tf.matmul(att,user_history) ms = tf.reduce_sum(ms,1) with tf.variable_scope('MLPcellA'): self.hs = tf.layers.dense(ms, self.num_units, activation=tf.nn.relu, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-5)) with tf.variable_scope('MLPcellB'): self.ht = tf.layers.dense(last_click, self.num_units, activation=tf.nn.relu, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-5)) self.predict_behavior_emb =layer_norm(self.hs*self.ht) self.output()
def build_model(self): self.gru_net_ins = GraphRNN() self.gated_gnn_model = ordered_gated_GNN() with tf.variable_scope('user_behavior_emb'): user_behavior_list_embedding = self.behavior_list_embedding_dense with tf.variable_scope('neighbor_emb', reuse=tf.AUTO_REUSE): structure_emb = self.gated_gnn_model.generate_graph_emb( init_emb=user_behavior_list_embedding, now_batch_size=self.now_bacth_data_size, num_units=self.num_units, adj_in=self.adj_in, adj_out=self.adj_out, eid_emb_in=self.in_eid_embedding, eid_emb_out=self.out_eid_embedding, mask_adj_in=self.mask_adj_in, mask_adj_out=self.mask_adj_out, step=self.FLAGS.graph_step ) # batch_size, max_len, num_units * 2 with tf.variable_scope('ShortTermIntentEncoder'): # in_emb, out_emb = array_ops.split(value=structure_emb, num_or_size_splits=2, axis=2) # # structure_emb = in_emb+out_emb # structure_emb = tf.layers.dense(structure_emb,units = self.num_units) grnn_inputs = tf.concat( [user_behavior_list_embedding, structure_emb], axis=2) self.short_term_intent_temp = self.gru_net_ins.simple_grnn_net( hidden_units=self.num_units, input_data=grnn_inputs, input_length=tf.add(self.seq_length, -1)) self.short_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) self.short_term_intent = self.short_term_intent self.predict_behavior_emb = layer_norm(self.short_term_intent) self.output()
def build_model(self): self.ctsm_model = ContinuousLSTM() last_time = tf.squeeze(gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=1, sequence_tensor=self.time_list, positions=self.mask_index - 1), axis=1) h_i, h_i_bar, delta_i = self.get_state() predict_target_lambda_emb = self.cal_ht(h_i, h_i_bar, delta_i, self.target[2] - last_time) self.predict_behavior_emb = predict_target_lambda_emb self.output()
def build_model(self): self.gru_net_ins = GRU() self.ggnn_model = gated_GNN() with tf.variable_scope('user_behavior_emb'): user_behavior_list_embedding = self.behavior_list_embedding_dense with tf.variable_scope('ggnn_encoding'): gnn_emb = self.ggnn_model.generate_graph_emb( init_emb=user_behavior_list_embedding, now_batch_size=self.now_bacth_data_size, num_units=self.num_units, adj_in=self.adj_in, adj_out=self.adj_out, step=1) time_aware_attention = Time_Aware_Attention() with tf.variable_scope("UserHistoryEncoder"): user_history = time_aware_attention.self_attention( gnn_emb, self.num_units, self.num_heads, self.num_blocks, self.dropout_rate, is_training=True, reuse=False, key_length=self.seq_length, query_length=self.seq_length, t_querys=self.time_list, t_keys=self.time_list, t_keys_length=self.max_len, t_querys_length=self.max_len) long_term_prefernce = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.FLAGS.num_units, sequence_tensor=user_history, positions=self.mask_index) self.predict_behavior_emb = long_term_prefernce self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) self.output()
def build_model(self): self.gru_net_ins = GRU() self.ggnn_model = gated_GNN() with tf.variable_scope('user_behavior_emb'): user_behavior_list_embedding = self.behavior_list_embedding_dense with tf.variable_scope('ggnn_encoding'): gnn_emb = self.ggnn_model.generate_graph_emb( init_emb=user_behavior_list_embedding, now_batch_size=self.now_bacth_data_size, num_units=self.num_units, adj_in=self.adj_in, adj_out=self.adj_out, step=1) with tf.variable_scope('ShortTermIntentEncoder'): timenext_list = self.timelast_list[:, 1:] zeros = tf.zeros(shape=(self.now_bacth_data_size, 1)) timenext_list = tf.concat([timenext_list, zeros], axis=1) self.time_aware_gru_net_input = tf.concat([ gnn_emb, tf.expand_dims(self.timelast_list, 2), tf.expand_dims(timenext_list, 2) ], 2) self.short_term_intent_temp = self.gru_net_ins.time_aware_gru_net( hidden_units=self.num_units, input_data=self.time_aware_gru_net_input, input_length=tf.add(self.seq_length, -1), type='new') self.short_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) self.predict_behavior_emb = layer_norm(self.short_term_intent) self.output()
def build_model(self): transformer_model = transformer_encoder() with tf.variable_scope('transformer_encoding', reuse=tf.AUTO_REUSE): S = transformer_model.stack_multihead_self_attention( stack_num=self.FLAGS.THP_stack_num, type_enc=self.type_lst_embedding, time_enc=self.time_lst_embedding, M=self.FLAGS.THP_M, Mk=self.FLAGS.THP_Mk, Mv=self.FLAGS.THP_Mv, Mi=self.FLAGS.THP_Mi, L=self.max_seq_len, N=self.now_batch_size, head_num=self.FLAGS.THP_head_num, dropout_rate=self.dropout_rate, ) # batch_size, seq_len, M M = self.FLAGS.THP_M discrete_emb = gather_indexes(batch_size=self.now_batch_size, seq_length=self.max_seq_len, width=M, sequence_tensor=S, positions=self.mask_index - 1) # batch_size, M TODO 到底应该取哪一个 self.predict_target_emb = discrete_emb with tf.variable_scope('prepare_emb', reuse=tf.AUTO_REUSE): emb_for_time = self.predict_target_emb # emb_for_intensity = tf.layers.dense(inputs=self.predict_target_emb,units = self.num_units) emb_for_intensity = self.predict_target_emb emb_for_type = self.predict_target_emb with tf.variable_scope('lambda_calculation', reuse=tf.AUTO_REUSE): col_idx = self.mask_index - 1 row_idx = tf.reshape( tf.range(start=0, limit=self.now_batch_size, delta=1), [-1, 1]) idx = tf.concat([row_idx, col_idx], axis=1) last_time = tf.gather_nd(self.time_lst, idx) self.last_time = last_time # TODO for testing intensity_model = thp_intensity_calculation() self.target_lambda = intensity_model.cal_target_intensity( hidden_emb=emb_for_intensity, target_time=self.target_time, last_time=last_time, type_num=self.type_num) self.sims_lambda = intensity_model.cal_sims_intensity( hidden_emb=emb_for_intensity, sims_time=self.sims_time_lst, last_time=last_time, sims_len=self.sims_len, type_num=self.type_num) with tf.variable_scope('type_time_prediction', reuse=tf.AUTO_REUSE): time_predictor = thp_time_predictor() self.predict_time = time_predictor.predict_time( emb=emb_for_time, num_units=self.FLAGS.THP_M) # batch_size, 1 type_predictor = thp_type_predictor() self.predict_type_prob = type_predictor.predict_type( emb=emb_for_type, num_units=self.FLAGS.THP_M, type_num=self.type_num) # batch_size, type_num self.output()
def build_model(self): num_units = self.FLAGS.num_units num_heads = self.FLAGS.num_heads num_blocks = self.FLAGS.num_blocks dropout_rate = self.FLAGS.dropout attention_net = Attention() # self.sequence_embedding, self.positive_embedding, self.negative_embedding, \ # self.behavior_embedding_result_dense, self.positive_embedding_result_dense, \ # self.negative_embedding_result_dense, self.mask_index, self.label_ids, \ # self.seq_length = self.embedding.get_embedding(num_units) self.sequence_embedding, self.positive_embedding, \ self.behavior_embedding_result_dense, self.positive_embedding_result_dense, \ self.mask_index, self.label_ids, \ self.seq_length, user_embedding, self.time = self.embedding.get_embedding(num_units) with tf.variable_scope('ShortTermIntentEncoder'): long_term_intent_temp = attention_net.self_attention_single( enc=self.behavior_embedding_result_dense, num_units=128, num_heads=num_heads, num_blocks=num_blocks, dropout_rate=dropout_rate, is_training=True, reuse=False) long_term_intent_dense = tf.layers.dense( long_term_intent_temp, num_units, activation=tf.nn.relu, use_bias=False, kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-5), name='mlp', reuse=False) self.long_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.FLAGS.max_len, width=self.FLAGS.num_units, sequence_tensor=long_term_intent_dense, positions=self.mask_index) with tf.variable_scope("OutputLayer"): self.predict_behavior_emb = self.long_term_intent self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) # self.mf_auc = tf.reduce_mean(tf.to_float((tf.reduce_sum(tf.multiply(tf.expand_dims(self.predict_behavior_emb, 1), # tf.expand_dims(self.positive_embedding_result_dense, 1) - self.negative_embedding_result_dense), 2)) > 0)) l2_norm = tf.add_n([ tf.nn.l2_loss(self.sequence_embedding), tf.nn.l2_loss(self.positive_embedding) ]) regulation_rate = self.FLAGS.regulation_rate item_lookup_table_T = tf.transpose( self.embedding.item_emb_lookup_table) logits = tf.matmul(self.predict_behavior_emb, item_lookup_table_T) log_probs = tf.nn.log_softmax(logits) label_ids = tf.reshape(self.label_ids, [-1]) one_hot_labels = tf.one_hot(label_ids, depth=self.embedding.item_count + 3, dtype=tf.float32) self.loss_origin = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1]) lstur_loss = regulation_rate * l2_norm + tf.reduce_mean( self.loss_origin) with tf.name_scope("LearningtoRankLoss"): self.loss = lstur_loss tf.summary.scalar("l2_norm", l2_norm) tf.summary.scalar("Training Loss", self.loss) tf.summary.scalar("Learning_rate", self.learning_rate) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm( gradients, self.FLAGS.max_gradient_norm) self.train_op = self.opt.apply_gradients( zip(clip_gradients, trainable_params)) self.summery()
def build_model(self): self.ctsm_model = ContinuousLSTM() self.transformer_model = transformer_encoder() last_time = tf.squeeze(gather_indexes(batch_size=self.now_batch_size, seq_length=self.max_seq_len, width=1, sequence_tensor=self.time_lst, positions=self.mask_index - 1), axis=1) o_i, c_i, c_i_bar, delta_i, h_i_minus = self.get_state( self.target_time_last_lst) predict_target_lambda_emb = self.cal_ht(o_i, c_i, c_i_bar, delta_i, self.target_time - last_time) # sims_time_lst: batch_size, sims_len predict_sims_emb = tf.zeros([self.now_batch_size, 1]) self.test = tf.split(self.sims_time_lst, self.sims_len, 1) sims_time = tf.squeeze(tf.split(self.sims_time_lst, self.sims_len, 1), 2) for i in range(self.sims_len): # 第i个时间 batch_size, num_units cur_sims_emb = self.cal_ht(o_i, c_i, c_i_bar, delta_i, sims_time[i] - last_time) predict_sims_emb = tf.concat([predict_sims_emb, cur_sims_emb], axis=1) predict_sims_emb = predict_sims_emb[:, 1:] # batch_size, sims_len * num_units predict_sims_emb = tf.reshape(predict_sims_emb, [-1, self.sims_len, self.num_units ]) # batch_size, sims_len , num_units self.predict_target_emb = predict_target_lambda_emb # self.predict_sims_emb = predict_sims_emb with tf.variable_scope('prepare_emb'): emb_for_type = self.predict_target_emb emb_for_time = h_i_minus with tf.variable_scope('intensity_calculation', reuse=tf.AUTO_REUSE): intensity_model = nhp_intensity_calculation() self.target_lambda = intensity_model.cal_target_intensity( hidden_emb=self.predict_target_emb, type_num=self.type_num) self.sims_lambda = intensity_model.cal_sims_intensity( hidden_emb=self.predict_sims_emb, sims_len=self.sims_len, type_num=self.type_num) with tf.variable_scope('type_time_calculation', reuse=tf.AUTO_REUSE): time_predictor = thp_time_predictor() self.predict_time = time_predictor.predict_time( emb=emb_for_time, num_units=self.num_units, ) type_predictor = thp_type_predictor() self.predict_type_prob = type_predictor.predict_type( emb=emb_for_type, num_units=self.num_units, type_num=self.type_num) # self.predict_type_prob = tf.matmul(self.predict_type_prob, self.embedding.type_emb_lookup_table[:-3, :], # transpose_b=True) self.output()
def build_model(self): time_aware_attention = Time_Aware_Attention() self.gru_net_ins = GRU() with tf.variable_scope("UserHistoryEncoder"): user_history = time_aware_attention.self_attention( enc=self.behavior_list_embedding_dense, num_units=self.num_units, num_heads=self.num_heads, num_blocks=self.num_blocks, dropout_rate=self.dropout_rate, is_training=True, reuse=False, key_length=self.seq_length, query_length=self.seq_length, t_querys=self.time_list, t_keys=self.time_list, t_keys_length=self.max_len, t_querys_length=self.max_len) with tf.variable_scope('ShortTermIntentEncoder'): self.time_aware_gru_net_input = tf.concat([ self.behavior_list_embedding_dense, tf.expand_dims(self.timelast_list, 2), tf.expand_dims(self.timenow_list, 2) ], 2) self.short_term_intent_temp = self.gru_net_ins.time_aware_gru_net( hidden_units=self.num_units, input_data=self.time_aware_gru_net_input, input_length=tf.add(self.seq_length, -1), type='time-aware') self.short_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) self.short_term_intent = self.short_term_intent short_term_intent4vallina = tf.expand_dims(self.short_term_intent, 1) with tf.variable_scope('NextItemDecoder'): hybird_preference = time_aware_attention.vanilla_attention( user_history, short_term_intent4vallina, self.num_units, self.num_heads, self.num_blocks, self.dropout_rate, is_training=True, reuse=False, key_length=self.seq_length, query_length=tf.ones_like(short_term_intent4vallina[:, 0, 0], dtype=tf.int32), t_querys=tf.expand_dims(self.target[2], 1), t_keys=self.time_list, t_keys_length=self.max_len, t_querys_length=1) self.predict_behavior_emb = hybird_preference self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) with tf.variable_scope('OutputLayer'): long_term_prefernce = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=user_history, positions=self.mask_index) self.long_term_prefernce = long_term_prefernce self.short_term_intent = self.short_term_intent self.hybird_preference = hybird_preference self.z_concate = tf.concat([ self.long_term_prefernce, self.short_term_intent, self.hybird_preference ], 1) self.z = tf.layers.dense( inputs=self.z_concate, units=3, kernel_regularizer=tf.contrib.layers.l2_regularizer( self.regulation_rate)) self.z = tf.nn.softmax(self.z) if self.FLAGS.PISTRec_type == 'hard': if tf.argmax(self.z) == 0: self.predict_behavior_emb = self.long_term_prefernce elif tf.argmax(self.z) == 1: self.predict_behavior_emb = self.short_term_intent else: self.predict_behavior_emb = self.hybird_preference elif self.FLAGS.PISTRec_type == 'soft': self.predict_behavior_emb = tf.multiply(tf.slice(self.z,[0,0],[-1,1]),self.long_term_prefernce)+\ tf.multiply(tf.slice(self.z, [0, 1], [-1, 1]), self.short_term_intent)+\ tf.multiply(tf.slice(self.z, [0, 2], [-1, 1]), self.hybird_preference) elif self.FLAGS.PISTRec_type == 'short': self.predict_behavior_emb = self.short_term_intent elif self.FLAGS.PISTRec_type == 'long': self.predict_behavior_emb = self.long_term_prefernce elif self.FLAGS.PISTRec_type == 'hybird': self.predict_behavior_emb = self.hybird_preference self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) self.output()
def build_model(self): time_aware_attention = Time_Aware_Attention() self.gru_net_ins = GRU() with tf.variable_scope("UserHistoryEncoder"): user_history = self.behavior_list_embedding_dense with tf.variable_scope('ShortTermIntentEncoder'): self.time_aware_gru_net_input = tf.concat([ self.behavior_list_embedding_dense, tf.expand_dims(self.timelast_list, 2), tf.expand_dims(self.timenow_list, 2) ], 2) self.short_term_intent_temp = self.gru_net_ins.time_aware_gru_net( hidden_units=self.num_units, input_data=self.time_aware_gru_net_input, input_length=tf.add(self.seq_length, -1), type='new') self.short_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.short_term_intent_temp, positions=self.mask_index - 1) self.short_term_intent = layer_norm(self.short_term_intent) with tf.variable_scope('StateEncoder'): self.time_aware_gru_net_input = tf.concat([ self.behavior_list_embedding_dense, tf.expand_dims(self.timelast_list, 2), tf.expand_dims(self.timenow_list, 2) ], 2) self.state_temp = self.gru_net_ins.time_aware_gru_net( hidden_units=self.num_units, input_data=self.time_aware_gru_net_input, input_length=tf.add(self.seq_length, -1), type='new') self.state = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.max_len, width=self.num_units, sequence_tensor=self.state_temp, positions=self.mask_index - 1) self.state = layer_norm(self.state) short_term_intent4vallina = tf.expand_dims(self.short_term_intent, 1) with tf.variable_scope('NextItemDecoder'): hybird_preference = time_aware_attention.vanilla_attention( user_history, short_term_intent4vallina, self.num_units, self.num_heads, self.num_blocks, self.dropout_rate, is_training=True, reuse=False, key_length=self.seq_length, query_length=tf.ones_like(short_term_intent4vallina[:, 0, 0], dtype=tf.int32), t_querys=tf.expand_dims(self.target[2], 1), t_keys=self.time_list, t_keys_length=self.max_len, t_querys_length=1) hybird_preference = layer_norm(hybird_preference) with tf.variable_scope('Switch'): self.long_term_prefernce = hybird_preference self.short_term_intent = self.short_term_intent self.z_concate = tf.concat( [self.long_term_prefernce, self.short_term_intent, self.state], 1) self.z = tf.layers.dense( inputs=self.z_concate, units=32, kernel_regularizer=tf.contrib.layers.l2_regularizer( self.regulation_rate), activation=tf.nn.relu) self.z = tf.layers.dense( inputs=self.z, units=1, kernel_regularizer=tf.contrib.layers.l2_regularizer( self.regulation_rate)) self.z = tf.nn.sigmoid(self.z) self.predict_behavior_emb = self.z * self.long_term_prefernce + ( 1 - self.z) * self.short_term_intent self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) self.output()
def build_model(self): num_units = self.FLAGS.num_units num_heads = self.FLAGS.num_heads num_blocks = self.FLAGS.num_blocks dropout_rate = self.FLAGS.dropout attention_net = Attention() gru_net_ins = GRU() self.sequence_embedding, self.positive_embedding, self.negative_embedding, \ self.behavior_embedding_result_dense, self.positive_embedding_result_dense, \ self.negative_embedding_result_dense, self.mask_index, self.label_ids, \ self.seq_length, user_embedding, time = self.embedding.get_embedding(num_units) with tf.variable_scope("LongTermIntentEncoder"): long_term_intent_temp = attention_net.self_attention( enc=self.behavior_embedding_result_dense, num_units=128, num_heads=num_heads, num_blocks=num_blocks, dropout_rate=dropout_rate, is_training=True, reuse=False, key_length=self.seq_length, query_length=self.seq_length) self.long_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.FLAGS.max_len, width=self.FLAGS.num_units, sequence_tensor=long_term_intent_temp, positions=self.mask_index) # average pooling for long_term_intent_temp self.long_term_preference = tf.reduce_mean(long_term_intent_temp, axis=1) with tf.variable_scope('ShortTermIntentEncoder'): short_term_intent_temp = gru_net_ins.gru_net( hidden_units=num_units, input_data=self.behavior_embedding_result_dense, input_length=self.mask_index) self.short_term_intent = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.FLAGS.max_len, width=self.FLAGS.num_units, sequence_tensor=short_term_intent_temp, positions=tf.add(self.mask_index, -1)) with tf.variable_scope("EnhancePreferenceIntentEncoder"): user_enhance_preference_temp = gru_net_ins.gru_net_initial( hidden_units=num_units, input_length=self.mask_index, input_data=long_term_intent_temp, initial_state=self.long_term_preference) self.user_enhance_preference = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.FLAGS.max_len, width=self.FLAGS.num_units, sequence_tensor=user_enhance_preference_temp, positions=tf.add(self.mask_index, -1)) with tf.variable_scope("EnhanceUserPreferenceIntentEncoder"): user_enhance_preference_temp_user = gru_net_ins.gru_net_initial( hidden_units=num_units, input_length=self.mask_index, input_data=long_term_intent_temp, initial_state=user_embedding) self.user_enhance_preference_user = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.FLAGS.max_len, width=self.FLAGS.num_units, sequence_tensor=user_enhance_preference_temp_user, positions=tf.add(self.mask_index, -1)) with tf.variable_scope("PreferenceEncoder"): user_preference_temp = gru_net_ins.gru_net( hidden_units=num_units, input_data=long_term_intent_temp, input_length=self.mask_index) self.user_preference = gather_indexes( batch_size=self.now_bacth_data_size, seq_length=self.FLAGS.max_len, width=self.FLAGS.num_units, sequence_tensor=user_preference_temp, positions=tf.add(self.mask_index, -1)) with tf.variable_scope("OutputLayer"): self.predict_behavior_emb = self.user_enhance_preference_user self.predict_behavior_emb = layer_norm(self.predict_behavior_emb) self.mf_auc = tf.reduce_mean( tf.to_float((tf.reduce_sum( tf.multiply( tf.expand_dims(self.predict_behavior_emb, 1), tf.expand_dims(self.positive_embedding_result_dense, 1) - self.negative_embedding_result_dense), 2)) > 0)) l2_norm = tf.add_n([ tf.nn.l2_loss(self.sequence_embedding), tf.nn.l2_loss(self.positive_embedding), tf.nn.l2_loss(self.negative_embedding) ]) regulation_rate = self.FLAGS.regulation_rate item_lookup_table_T = tf.transpose( self.embedding.item_emb_lookup_table) logits = tf.matmul(self.predict_behavior_emb, item_lookup_table_T) log_probs = tf.nn.log_softmax(logits) label_ids = tf.reshape(self.label_ids, [-1]) one_hot_labels = tf.one_hot(label_ids, depth=500000, dtype=tf.float32) self.loss_origin = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1]) lstur_loss = regulation_rate * l2_norm + tf.reduce_mean( self.loss_origin) with tf.name_scope("LearningtoRankLoss"): self.loss = lstur_loss tf.summary.scalar("l2_norm", l2_norm) tf.summary.scalar("Training Loss", self.loss) tf.summary.scalar("Learning_rate", self.learning_rate) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm( gradients, self.FLAGS.max_gradient_norm) self.train_op = self.opt.apply_gradients( zip(clip_gradients, trainable_params)) self.summery()
def build_model(self): num_units = self.FLAGS.num_units num_heads = self.FLAGS.num_heads num_blocks = self.FLAGS.num_blocks dropout_rate = self.FLAGS.dropout embedding_size = self.FLAGS.itemid_embedding_size use_mmoe = self.FLAGS.use_mmoe attention_net = Attention() gru_net_ins = GRU() self.sequence_embedding, self.positive_embedding, \ self.behavior_embedding_result_dense, self.positive_embedding_result_dense, \ self.mask_index, self.label_ids, \ self.seq_length, user_embedding, self.time_interval, \ self.time, self.pos_last_list = self.embedding.get_embedding(num_units) with tf.variable_scope("LongTermIntentEncoder"): attention_output = attention_net.self_attention(enc=self.behavior_embedding_result_dense, num_units=128, num_heads=num_heads, num_blocks=num_blocks, dropout_rate=dropout_rate, is_training=True, reuse=False, key_length=self.seq_length, query_length=self.seq_length) attention_pooling = tf.reduce_sum(attention_output, 1) # flat_attention_output = tf.reshape(attention_output, shape=[-1, embedding_size]) # flat_behavior_emb = tf.reshape(self.behavior_embedding_result_dense, shape=[-1, embedding_size]) # concat_emb = tf.concat([flat_attention_output, flat_behavior_emb], axis=1) # net = tf.layers.dense(concat_emb, embedding_size, activation=tf.nn.relu, use_bias=False) # att_wgt = tf.layers.dense(net, 1, activation=tf.nn.relu, use_bias=False) # att_wgt = tf.reshape(att_wgt, shape=[-1, seq_len]) # att_wgt = att_wgt / (embedding_size ** 0.5) # att_wgt = tf.nn.softmax(att_wgt) # att_wgt = tf.reshape(att_wgt, shape=[-1, seq_len, 1]) # output = tf.multiply(attention_output, att_wgt) # attention_pooling = tf.reduce_sum(output, 1) with tf.variable_scope("EnhanceUserPreferenceIntentEncoder"): gru_input = tf.concat([self.behavior_embedding_result_dense, tf.expand_dims(self.time_interval, 2), tf.expand_dims(self.pos_last_list, 2)], 2) self.gru_output = gru_net_ins.time_aware_gru_net(hidden_units=num_units, input_data=gru_input, input_length=tf.add(self.seq_length, -1)) self.gru_output = gather_indexes(batch_size=self.now_bacth_data_size, seq_length=self.FLAGS.max_len, width=self.FLAGS.num_units, sequence_tensor=self.gru_output, positions=tf.add(self.mask_index, -1)) _, seq_len, size = self.behavior_embedding_result_dense.get_shape().as_list() feature_emb = tf.reshape(self.behavior_embedding_result_dense, [-1, seq_len * size]) concat_output = tf.concat([self.gru_output, attention_pooling, feature_emb], axis=1) dence1 = tf.layers.dense(concat_output, concat_output.get_shape().as_list()[1] // 2, activation=tf.nn.relu, use_bias=False) self.user_preference = tf.layers.dense(dence1, num_units, activation=tf.nn.relu, use_bias=False) if use_mmoe: print("mmoe") with tf.variable_scope("mmoe"): num_expert = 8 expert_outputs = [] for _ in range(num_expert): expert_output = tf.layers.dense(self.user_preference, embedding_size, activation=tf.nn.relu, use_bias=False) expert_output = tf.expand_dims(expert_output, axis=2) # [B, 64, 1] expert_outputs.append(expert_output) expert_outputs = tf.concat(expert_outputs, axis=2) # [B, 64, 8] gate_network = tf.layers.dense(self.user_preference, num_expert, activation=tf.nn.softmax, use_bias=False) gate_network_dim = tf.expand_dims(gate_network, axis=1) # [B, 1, 8] weighted_expert_ouptputs = tf.tile(gate_network_dim, [1, embedding_size, 1]) * expert_outputs final_output = tf.reduce_sum(weighted_expert_ouptputs, axis=2) self.user_preference = tf.layers.dense(final_output, num_units, activation=tf.nn.relu, use_bias=False) with tf.variable_scope("OutputLayer"): self.predict_behavior_emb = layer_norm(self.user_preference) l2_norm = tf.add_n([ tf.nn.l2_loss(self.sequence_embedding), tf.nn.l2_loss(self.positive_embedding), tf.nn.l2_loss(user_embedding) ]) regulation_rate = self.FLAGS.regulation_rate item_lookup_table_T = tf.transpose(self.embedding.item_emb_lookup_table) #print("item_embedding:", item_lookup_table_T.get_shape().as_list()) logits = tf.matmul(self.predict_behavior_emb, item_lookup_table_T) log_probs = tf.nn.log_softmax(logits) label_ids = tf.reshape(self.label_ids, [-1]) one_hot_labels = tf.one_hot(label_ids, depth=self.embedding.item_count+3, dtype=tf.float32) self.loss_origin = -tf.reduce_sum(log_probs * one_hot_labels, axis=[-1]) lstur_loss = regulation_rate * l2_norm + tf.reduce_mean(self.loss_origin) with tf.name_scope("LearningtoRankLoss"): self.loss = lstur_loss if self.FLAGS.add_summary: tf.summary.scalar("l2_norm", l2_norm) tf.summary.scalar("Training Loss", self.loss) tf.summary.scalar("Learning_rate", self.learning_rate) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm(gradients, self.FLAGS.max_gradient_norm) self.train_op = self.opt.apply_gradients(zip(clip_gradients, trainable_params)) self.summery()