def train_valid_or_test(pas): """ 主程序 :return: """ # 建立参数、数据、模型、模型最佳值 p = pas.p model, model_name = pas.build_model_one_by_one(flag=p['gru']) best = GlobalBest(at_nums=p['at_nums']) # 存放最优数据 _, starts_ends_tes = pas.compute_start_end(flag='test') _, starts_ends_auc = pas.compute_start_end(flag='test_auc') # 直接取出来部分变量,后边就不用加'pas.'了。 user_num, item_num, dist_num = pas.user_num, pas.item_num, pas.dist_num tra_buys_masks, tra_masks, tra_buys_neg_masks = pas.tra_buys_masks, pas.tra_masks, pas.tra_buys_neg_masks tes_buys_masks, tes_masks, tes_buys_neg_masks = pas.tes_buys_masks, pas.tes_masks, pas.tes_buys_neg_masks dd = p['dd'] pois_cordis = pas.pois_cordis ulptai = pas.ulptai del pas ini_epoch = 0 if 2 == p['gru']: ini_epoch = p['load_epoch'] if p['load_epoch'] != 0: print('Loading model ...') m_path = './model/' + p['dataset'] + '/' + model_name + '_size' + \ str(p['latent_size']) + '_UD' + str(p['UD']) + '_dd' + str(p['dd']) + '_epoch' + str( p['load_epoch']) with open(m_path, 'rb') as f: loaded_objects = cPickle.load(f) model.load_params(loaded_objects) ini_epoch = p['load_epoch'] + 1 # 主循环 losses = [] times0, times1, times2, times3 = [], [], [], [] for epoch in np.arange(ini_epoch, p['epochs']): print( "Epoch {val} ==================================".format(val=epoch)) # 每次epoch,都要重新选择负样本。都要把数据打乱重排,这样会以随机方式选择样本计算梯度,可得到精确结果 if epoch > 0: # epoch=0的负样本已在循环前生成,且已用于类的初始化 tra_buys_neg_masks = fun_random_neg_masks_tra( item_num, tra_buys_masks) tes_buys_neg_masks = fun_random_neg_masks_tes( item_num, tra_buys_masks, tes_buys_masks) if p['gru'] in [0, 1]: model.update_neg_masks(tra_buys_neg_masks, tes_buys_neg_masks) else: tra_dist_neg_masks = fun_compute_dist_neg( tra_buys_masks, tra_masks, tra_buys_neg_masks, pois_cordis, dd, dist_num) model.s_update_neg_masks(tra_buys_neg_masks, tes_buys_neg_masks, tra_dist_neg_masks) # ---------------------------------------------------------------------------------------------------------- print("\tTraining ...") t0 = time.time() loss = 0. ls = [0, 0] total_ls = [] random.seed(str(123 + epoch)) user_idxs_tra = np.arange(user_num, dtype=np.int32) random.shuffle(user_idxs_tra) # 每个epoch都打乱user_id输入顺序 if 0 == p['gru']: for uidx in user_idxs_tra: tra = tra_buys_masks[uidx] neg = tra_buys_neg_masks[uidx] for i in np.arange(sum(tra_masks[uidx])): loss += model.train(uidx, [tra[i], neg[i]]) elif 1 == p['gru'] or 3 == p['gru']: for uidx in user_idxs_tra: loss += model.train(uidx) else: for uidx in user_idxs_tra: los, a, b, ls = model.train(uidx) loss += los a_b = [a, b] a_b.extend(ls) total_ls.append(a_b) rnn_l2_sqr = model.l2.eval() # model.l2是'TensorVariable',无法直接显示其值 # 把loss及loss_weight保存下来. def cut2(x): return '%0.2f' % x print('\t\tsum_loss = {val} = {v1} + {v2}'.format(val=loss + rnn_l2_sqr, v1=loss, v2=rnn_l2_sqr)) losses.append('{v1}'.format(v1=int(loss + rnn_l2_sqr))) # ls = model.loss_weight print('\t\tloss_weight = {v1}, {v2}'.format(v1=ls[0], v2=ls[1])) t1 = time.time() times0.append(t1 - t0) # ---------------------------------------------------------------------------------------------------------- print("\tPredicting ...") # 计算:所有用户、商品的表达 if 0 == p['gru']: model.update_trained_items() model.update_trained_users() elif 1 == p['gru']: model.update_trained_items() # 对于MV-GRU,这里会先算出来图文融合特征。 all_hus = [] for start_end in starts_ends_tes: sub_all_hus = model.predict(start_end) all_hus.extend(sub_all_hus) model.update_trained_users(all_hus) elif 2 == p['gru']: model.update_trained_items() model.update_trained_dists() all_hus = [] all_sus = [] for start_end in starts_ends_tes: [sub_all_hus, sub_all_sus] = model.predict(start_end) all_hus.extend(sub_all_hus) all_sus.extend(sub_all_sus) probs = fun_acquire_prob( all_sus, ulptai, dist_num) # 输入shape=(2310, 1520), (2310, 5528) model.update_trained_users(all_hus) model.update_prob(probs) else: model.update_trained_items() model.update_trained_dists() all_hus = [] for start_end in starts_ends_tes: sub_all_hus = model.predict(start_end) all_hus.extend(sub_all_hus) model.update_trained_users(all_hus) t2 = time.time() times1.append(t2 - t1) # 计算各种指标,并输出当前最优值。 fun_predict_auc_recall_map_ndcg(p, model, best, epoch, starts_ends_auc, starts_ends_tes, tes_buys_masks, tes_masks) best.fun_print_best(epoch) # 每次都只输出当前最优的结果 t3 = time.time() times2.append(t3 - t2) print( '\tavg. time (train, user, test): %0.0fs,' % np.average(times0), '%0.0fs,' % np.average(times1), '%0.0fs' % np.average(times2), '| alpha, lam: {v1}'.format( v1=', '.join([str(lam) for lam in [p['alpha'], p['lambda']]])), '| model: {v1}'.format(v1=model_name)) # ---------------------------------------------------------------------------------------------------------- if epoch == p['epochs'] - 1: # 保存最优值、所有的损失值。 print("\tBest and losses saving ...") path = os.path.join( os.path.split(__file__)[0], '..', 'Results_best_and_losses', PATH.split('/')[-2]) fun_save_best_and_losses(path, model_name, epoch, p, best, losses) if 2 == p['gru']: size = p['latent_size'] fil_name = 'size' + str(size) + 'UD' + str( p['UD']) + 'dd' + str(p['dd']) + 'loss.txt' fil = os.path.join(path, fil_name) np.savetxt(fil, total_ls) if 2 == p['gru'] and epoch % p['save_per_epoch'] == 0 and epoch != 0: # 保存模型 m_path = './model/' + p['dataset'] + '/' + model_name + '_size' + \ str(p['latent_size']) + '_UD' + str(p['UD']) + '_dd' + str(p['dd']) + '_epoch' + str(epoch) with open(m_path, 'wb') as file: save_model = [ model.loss_weight.get_value(), model.wd.get_value(), model.lt.get_value(), model.di.get_value(), model.ui.get_value(), model.wh.get_value(), model.bi.get_value(), model.vs.get_value(), model.bs.get_value() ] cPickle.dump(save_model, file, protocol=cPickle.HIGHEST_PROTOCOL) for i in p.items(): print(i) print('\t the current Class name is: {val}'.format(val=model_name))
def train_valid_or_test(): """ 主程序 :return: """ # 建立参数、数据、模型、模型最佳值 pas = Params() p = pas.p model, model_name = pas.build_model_one_by_one(flag=p['mvgru']) best = GlobalBest(at_nums=p['at_nums'], intervals=p['intervals']) # 存放最优数据 _, starts_ends_tes = pas.compute_start_end(flag='test_top_k') # tes时顺序处理即可。 _, starts_ends_auc = pas.compute_start_end(flag='test_auc') # 直接取出来部分变量,后边就不用加'pas.'了。 user_num, item_num = pas.user_num, pas.item_num tra_buys_masks, tra_masks = np.asarray(pas.tra_buys_masks), np.asarray(pas.tra_masks) tes_buys_masks, tes_masks = np.asarray(pas.tes_buys_masks), np.asarray(pas.tes_masks) tra_buys_neg_masks = np.asarray(pas.tra_buys_neg_masks) test_i_cou, test_i_intervals_cumsum, test_i_cold_active = pas.tic, pas.tiic, pas.tica del pas # 主循环 losses = [] times0, times1, times2 = [], [], [] for epoch in np.arange(p['epochs']): print("Epoch {val} ==================================".format(val=epoch)) # 每次epoch,都要重新选择负样本。都要把数据打乱重排,这样会以随机方式选择样本计算梯度,可得到精确结果 if epoch > 0: # epoch=0的负样本已在循环前生成,且已用于类的初始化 tra_buys_neg_masks = fun_random_neg_masks_tra(item_num, tra_buys_masks) tes_buys_neg_masks = fun_random_neg_masks_tes(item_num, tra_buys_masks, tes_buys_masks) model.update_neg_masks(tra_buys_neg_masks, tes_buys_neg_masks) # -------------------------------------------------------------------------------------------------------------- print("\tTraining ...") t0 = time.time() loss = 0. random.seed(str(123 + epoch)) user_idxs_tra = np.arange(user_num, dtype=np.int32) random.shuffle(user_idxs_tra) # 每个epoch都打乱user_id输入顺序 for uidx in user_idxs_tra: tra = tra_buys_masks[uidx] neg = tra_buys_neg_masks[uidx] for i in np.arange(sum(tra_masks[uidx])): loss += model.train(uidx, [tra[i], neg[i]]) rnn_l2_sqr = model.l2.eval() # model.l2是'TensorVariable',无法直接显示其值 print('\t\tsum_loss = {val} = {v1} + {v2}'.format(val=loss + rnn_l2_sqr, v1=loss, v2=rnn_l2_sqr)) losses.append('%0.2f' % (loss + rnn_l2_sqr)) t1 = time.time() times0.append(t1 - t0) # -------------------------------------------------------------------------------------------------------------- print("\tPredicting ...") # 计算:所有用户、商品的表达 model.update_trained_items() # 要先运行这个更新items特征。对于MV-GRU,这里会先算出来图文融合特征。 model.update_trained_users() t2 = time.time() times1.append(t2 - t1) # 计算各种指标,并输出当前最优值。 fun_predict_auc_recall_map_ndcg( p, model, best, epoch, starts_ends_auc, starts_ends_tes, tes_buys_masks, tes_masks, test_i_cou, test_i_intervals_cumsum, test_i_cold_active) best.fun_print_best(epoch) # 每次都只输出当前最优的结果 t3 = time.time() times2.append(t3-t2) tmp1 = '| lam: %s' % ', '.join([str(lam) for lam in [p['lambda'], p['lambda_ev'], p['lambda_ae']]]) tmp2 = '| model: {v1}'.format(v1=model_name) tmp3 = '| tra_fea_zero: %0.1f' % p['train_fea_zero'] print('\tavg. time (train, user, test): %0.0fs,' % np.average(times0), '%0.0fs,' % np.average(times1), '%0.0fs' % np.average(times2), tmp1, tmp2, tmp3) # -------------------------------------------------------------------------------------------------------------- # 保存epoch=29/49时的最优值。 if epoch == p['epochs'] - 1: # 保存最优值、所有的损失值。 print("\t-----------------------------------------------------------------") print("\tBest and losses saving ...") path = os.path.join(os.path.split(__file__)[0], '..', 'Results_best_and_losses', PATH.split('/')[-2]) fun_save_best_and_losses(path, model_name + ' - denoise', epoch, p, best, losses) for i in p.items(): print(i) print('\t the current Class name is: {val}'.format(val=model_name))
def train_valid_or_test(): """ 主程序 :return: """ # 建立参数、数据、模型、模型最佳值 pas = Params() p = pas.p model, model_name = pas.build_model_one_by_one() best = GlobalBest(at_nums=p['at_nums']) # 存放最优数据 _, starts_ends_tes = pas.compute_start_end(flag='test') _, starts_ends_auc = pas.compute_start_end(flag='test_auc') # 直接取出来部分变量,后边就不用加'pas.'了。 user_num, item_num = pas.user_num, pas.item_num tra_pois, tra_pois_negs = pas.tra_pois, pas.tra_pois_negs tes_pois_masks, tes_masks, tes_pois_neg_masks = pas.tes_pois_masks, pas.tes_masks, pas.tes_pois_neg_masks del pas # 主循环 losses = [] times0, times1, times2, times3 = [], [], [], [] for epoch in np.arange(p['epochs']): print("Epoch {val} ==================================".format(val=epoch)) # 每次epoch,都要重新选择负样本。都要把数据打乱重排,这样会以随机方式选择样本计算梯度,可得到精确结果 if epoch > 0: # epoch=0的负样本已在循环前生成,且已用于类的初始化 tes_pois_neg_masks = fun_random_neg_masks_tes(item_num, tra_pois, tes_pois_masks) model.update_neg_masks(tes_pois_neg_masks) # ---------------------------------------------------------------------------------------------------------- print("\tTraining ...") t0 = time.time() loss = 0. random.seed(str(123 + epoch)) user_idxs_tra = np.arange(user_num, dtype=np.int32) random.shuffle(user_idxs_tra) # 每个epoch都打乱user_id输入顺序 for uidx in user_idxs_tra: tra = tra_pois[uidx] # list negs = tra_pois_negs[uidx] # 嵌套list for i in np.arange(len(tra)-1): # i是t-1时刻,i+1是t时刻。 # 注意:负样本是从截断距离内所有邻居里随机取了1个,和BPR、RNN一样只取一个。 loss += model.train(uidx, tra[i], tra[i+1], random.sample(negs[i+1], 1)) rnn_l2_sqr = model.l2.eval() # model.l2是'TensorVariable',无法直接显示其值 print('\t\tsum_loss = {val} = {v1} - {v2}'.format(val=loss + rnn_l2_sqr, v1=loss, v2=rnn_l2_sqr)) losses.append('{v1}'.format(v1=int(loss - rnn_l2_sqr))) t1 = time.time() times0.append(t1 - t0) # ---------------------------------------------------------------------------------------------------------- print("\tPredicting ...") # 计算各种指标,并输出当前最优值。 fun_predict_auc_recall_map_ndcg( p, model, best, epoch, starts_ends_auc, starts_ends_tes, tes_pois_masks, tes_masks) best.fun_print_best(epoch) # 每次都只输出当前最优的结果 t2 = time.time() times1.append(t2-t1) print('\tavg. time (train, test): %0.0fs,' % np.average(times0), '%0.0fs,' % np.average(times1), '| alpha, lam: {v1}'.format(v1=', '.join([str(lam) for lam in [p['alpha'], p['lambda']]])), '| model: {v1}'.format(v1=model_name)) # ---------------------------------------------------------------------------------------------------------- if epoch == p['epochs'] - 1: # 保存最优值、所有的损失值。 print("\tBest and losses saving ...") path = os.path.join(os.path.split(__file__)[0], '..', 'Results_best_and_losses', PATH.split('/')[-2]) fun_save_best_and_losses(path, model_name, epoch, p, best, losses) for i in p.items(): print(i) print('\t the current Class name is: {val}'.format(val=model_name))
def train_valid_or_test(): """ 主程序 :return: """ # 建立参数、数据、模型、模型最佳值 pas = Params() p = pas.p if 0 == p['mini_batch']: model, model_name, size = pas.build_model_one_by_one(flag=p['marank']) else: model, model_name, size = pas.build_model_mini_batch(flag=p['marank']) best = GlobalBest(at_nums=p['at_nums']) # 存放最优数据 batch_idxs_tra, starts_ends_tra = pas.compute_start_end(flag='train') _, starts_ends_tes = pas.compute_start_end(flag='test') _, starts_ends_auc = pas.compute_start_end(flag='test_auc') # 直接取出来部分变量,后边就不用加'pas.'了。 user_num, item_num = pas.user_num, pas.item_num tra_buys_masks, tra_masks, tra_buys_neg_masks = pas.tra_buys_masks, pas.tra_masks, pas.tra_buys_neg_masks tes_buys_masks, tes_masks, tes_buys_neg_masks = pas.tes_buys_masks, pas.tes_masks, pas.tes_buys_neg_masks tra_set_masks = pas.tra_set_masks del pas # 主循环 losses = [] times0, times1, times2, times3 = [], [], [], [] for epoch in np.arange(p['epochs']): print("Epoch {val} ==================================".format(val=epoch)) # 每次epoch,都要重新选择负样本。都要把数据打乱重排,这样会以随机方式选择样本计算梯度,可得到精确结果 if epoch > 0: # epoch=0的负样本已在循环前生成,且已用于类的初始化 tra_buys_neg_masks = fun_random_neg_masks_tra(item_num, tra_buys_masks) tes_buys_neg_masks = fun_random_neg_masks_tes(item_num, tra_buys_masks, tes_buys_masks) model.update_neg_masks(tra_buys_neg_masks, tes_buys_neg_masks) # ---------------------------------------------------------------------------------------------------------- print("\tTraining ...") t0 = time.time() loss = 0. random.seed(str(123 + epoch)) if 0 == p['mini_batch']: user_idxs_tra = np.arange(user_num, dtype=np.int32) random.shuffle(user_idxs_tra) # 每个epoch都打乱user_id输入顺序 for uidx in user_idxs_tra: tra = tra_buys_masks[uidx] neg = tra_buys_neg_masks[uidx] u_set = tra_set_masks[uidx] for i in np.arange(1, sum(tra_masks[uidx])): # 正负样本是t=1的,item集用t=0的。 # tra = [0, 1, 2, 3] # u_set = [[10, 0], [0, 1], [1, 2], [2, 3]] pq = [tra[i], neg[i]] loss += model.train(uidx, pq, list(u_set[i-1])) # 得用list,不然传入theano会有问题。 else: random.shuffle(batch_idxs_tra) # 每个epoch都打乱batch_idx输入顺序 for bidx in batch_idxs_tra: start_end = starts_ends_tra[bidx] random.shuffle(start_end) # 打乱batch内的indexes usrs = start_end tras = tra_buys_masks[start_end] # 每个用户的正样本序列是一行。共多行。 negs = tra_buys_neg_masks[start_end] u_sets = tra_set_masks[start_end] msks = tra_masks[start_end] for j in np.arange(1, max(np.sum(msks, axis=1))): pqs = [tras[:, j], negs[:, j]] # shape=(2, n) cidxs = u_sets[:, j-1, :] # (n, set_size) cidxs = [list(e) for e in cidxs] loss += model.train(usrs, pqs, cidxs, list(msks[:, j])) # 每次取某一列(各usr的第j个item) rnn_l2_sqr = model.l2.eval() # model.l2是'TensorVariable',无法直接显示其值 print('\t\tsum_loss = {val} = {v1} + {v2}'.format(val=loss + rnn_l2_sqr, v1=loss, v2=rnn_l2_sqr)) losses.append('{v1}'.format(v1=int(loss + rnn_l2_sqr))) t1 = time.time() times0.append(t1 - t0) # ---------------------------------------------------------------------------------------------------------- if 0 == epoch % 3 or epoch >= (p['epochs'] - 10): print("\tPredicting ...") # 计算:所有用户、商品的表达 model.update_trained_items() # 对于MV-GRU,这里会先算出来图文融合特征。 model.update_trained_users() all_usr_c = [] all_usr_l = [] for start_end in starts_ends_tes: sub_usr_c, sub_usr_l = model.predict(start_end) all_usr_c.extend(sub_usr_c) all_usr_l.extend(sub_usr_l) model.update_trained_users_att(all_usr_c, all_usr_l) t2 = time.time() times1.append(t2 - t1) # 计算各种指标,并输出当前最优值。 fun_predict_auc_recall_map_ndcg( p, model, best, epoch, starts_ends_auc, starts_ends_tes, tes_buys_masks, tes_masks) best.fun_print_best(epoch) # 每次都只输出当前最优的结果 t3 = time.time() times2.append(t3-t2) print('\tavg. time (train, user, test): %0.0fs,' % np.average(times0), '%0.0fs,' % np.average(times1), '%0.0fs' % np.average(times2), '| alpha, lam: {v1}'.format(v1=', '.join([str(lam) for lam in [p['alpha'], p['lambda']]])), '| model: {v1}, S{v2}_L{v3}'.format(v1=model_name, v2=p['set_len'], v3=p['layer'])) # ---------------------------------------------------------------------------------------------------------- if epoch in [p['epochs'] - 1, 99, 199]: # 保存最优值、所有的损失值。 print("\tBest and losses saving ...") path = os.path.join(os.path.split(__file__)[0], '..', 'Results_best_and_losses', PATH.split('/')[-2]) fun_save_best_and_losses(path, model_name, epoch, p, best, losses) for i in p.items(): print(i) print('\t the current Class name is: {val}'.format(val=model_name))
def train_valid_or_test(): """ 主程序 :return: """ # 建立参数、数据、模型、模型最佳值 pas = Params() p = pas.p model, model_name = pas.build_model_one_by_one(flag=p['gru']) best = GlobalBest(at_nums=p['at_nums']) # 存放最优数据 _, starts_ends_tes = pas.compute_start_end(flag='test') _, starts_ends_auc = pas.compute_start_end(flag='test_auc') # 直接取出来部分变量,后边就不用加'pas.'了。 user_num, item_num, dist_num = pas.user_num, pas.item_num, pas.dist_num tra_buys_masks, tra_masks, tra_buys_neg_masks = pas.tra_buys_masks, pas.tra_masks, pas.tra_buys_neg_masks tes_buys_masks, tes_masks, tes_buys_neg_masks = pas.tes_buys_masks, pas.tes_masks, pas.tes_buys_neg_masks dd = p['dd'] pois_cordis = pas.pois_cordis ulptai = pas.ulptai del pas # 主循环 losses = [] wds = [] times0, times1, times2, times3 = [], [], [], [] for epoch in np.arange(p['epochs']): print( "Epoch {val} ==================================".format(val=epoch)) # 每次epoch,都要重新选择负样本。都要把数据打乱重排,这样会以随机方式选择样本计算梯度,可得到精确结果 if epoch > 0: # epoch=0的负样本已在循环前生成,且已用于类的初始化 tra_buys_neg_masks = fun_random_neg_masks_tra( item_num, tra_buys_masks) tes_buys_neg_masks = fun_random_neg_masks_tes( item_num, tra_buys_masks, tes_buys_masks) if p['gru'] in [0, 1]: model.update_neg_masks(tra_buys_neg_masks, tes_buys_neg_masks) else: tra_dist_neg_masks = fun_compute_dist_neg( tra_buys_masks, tra_masks, tra_buys_neg_masks, pois_cordis, dd, dist_num) model.s_update_neg_masks(tra_buys_neg_masks, tes_buys_neg_masks, tra_dist_neg_masks) # ---------------------------------------------------------------------------------------------------------- print("\tTraining ...") t0 = time.time() loss = 0. random.seed(str(123 + epoch)) user_idxs_tra = np.arange(user_num, dtype=np.int32) random.shuffle(user_idxs_tra) # 每个epoch都打乱user_id输入顺序 if 0 == p['gru']: for uidx in user_idxs_tra: tra = tra_buys_masks[uidx] neg = tra_buys_neg_masks[uidx] for i in np.arange(sum(tra_masks[uidx])): loss += model.train(uidx, [tra[i], neg[i]]) else: for uidx in user_idxs_tra: loss += model.train(uidx) rnn_l2_sqr = model.l2.eval() # model.l2是'TensorVariable',无法直接显示其值 # 把loss及loss_weight保存下来. wd = model.wd.eval() print('\t\twd = {v1}'.format(v1=wd)) print('\t\tsum_loss = {val} = {v1} + {v2}'.format(val=loss + rnn_l2_sqr, v1=loss, v2=rnn_l2_sqr)) losses.append('{v1}'.format(v1=int(loss + rnn_l2_sqr))) wds.append('{v1}'.format(v1=wd)) t1 = time.time() times0.append(t1 - t0) # ---------------------------------------------------------------------------------------------------------- print("\tPredicting ...") # 计算:所有用户、商品的表达 if 0 == p['gru']: model.update_trained_items() model.update_trained_users() elif 1 == p['gru']: model.update_trained_items() # 对于MV-GRU,这里会先算出来图文融合特征。 all_hus = [] for start_end in starts_ends_tes: sub_all_hus = model.predict(start_end) all_hus.extend(sub_all_hus) model.update_trained_users(all_hus) else: model.update_trained_items() model.update_trained_dists() all_hus = [] all_sus = [] for start_end in starts_ends_tes: [sub_all_hus, sub_all_sus] = model.predict(start_end) all_hus.extend(sub_all_hus) all_sus.extend(sub_all_sus) probs = fun_acquire_prob( all_sus, ulptai, dist_num) # 输入shape=(user_num, dist_num), (user_num, item_num) model.update_trained_users(all_hus) model.update_prob(probs) t2 = time.time() times1.append(t2 - t1) # 计算各种指标,并输出当前最优值。 fun_predict_auc_recall_map_ndcg(p, model, best, epoch, starts_ends_auc, starts_ends_tes, tes_buys_masks, tes_masks) best.fun_print_best(epoch) # 每次都只输出当前最优的结果 t3 = time.time() times2.append(t3 - t2) print( '\tavg. time (train, user, test): %0.0fs,' % np.average(times0), '%0.0fs,' % np.average(times1), '%0.0fs' % np.average(times2), '| alpha, lam: {v1}'.format( v1=', '.join([str(lam) for lam in [p['alpha'], p['lambda']]])), '| model: {v1}'.format(v1=model_name)) # ---------------------------------------------------------------------------------------------------------- if epoch in [p['epochs'] - 1, 99]: # 保存最优值、所有的损失值。 print("\tBest and losses saving ...") path = os.path.join( os.path.split(__file__)[0], '..', 'Results_best_and_losses', PATH.split('/')[-2]) fun_save_best_and_losses(path, model_name, epoch, p, best, [losses, wds]) for i in p.items(): print(i) print('\t the current Class name is: {val}'.format(val=model_name))
def train_valid_or_test(pas): """ 主程序 :return: """ p = pas.p model, model_name = pas.build_model_one_by_one(flag=p['GeoIE']) best = GlobalBest(at_nums=p['at_nums']) _, starts_ends_tes = pas.compute_start_end(flag='test') _, starts_ends_auc = pas.compute_start_end(flag='test_auc') user_num, item_num = pas.user_num, pas.item_num tra_masks, tes_masks = pas.tra_masks, pas.tes_masks tra_buys_masks, tes_buys_masks = pas.tra_buys_masks, pas.tes_buys_masks tra_dist_pos_masks, tra_dist_neg_masks, tra_dist_masks = pas.tra_dist_pos_masks, pas.tra_dist_neg_masks, pas.tra_dist_masks pois_cordis = pas.pois_cordis del pas # 主循环 losses = [] times0, times1, times2, times3 = [], [], [], [] for epoch in np.arange(0, p['epochs']): print( "Epoch {val} ==================================".format(val=epoch)) if epoch > 0: tra_buys_neg_masks = fun_random_neg_masks_tra( item_num, tra_buys_masks) tra_dist_pos_masks, tra_dist_neg_masks, tra_dist_masks = fun_compute_dist_neg( tra_buys_masks, tra_masks, tra_buys_neg_masks, pois_cordis) # ---------------------------------------------------------------------------------------------------------- print("\tTraining ...") t0 = time.time() loss = 0. ls = [0, 0] total_ls = [] random.seed(str(123 + epoch)) user_idxs_tra = np.arange(user_num, dtype=np.int32) random.shuffle(user_idxs_tra) for uidx in user_idxs_tra: print(model.a.eval(), model.b.eval()) dist_pos = tra_dist_pos_masks[uidx] dist_neg = tra_dist_neg_masks[uidx] msk = tra_dist_masks[uidx] tmp = model.train(uidx, dist_pos, dist_neg, msk) loss += tmp print(tmp) rnn_l2_sqr = model.l2.eval() def cut2(x): return '%0.2f' % x print('\t\tsum_loss = {val} = {v1} + {v2}'.format(val=loss + rnn_l2_sqr, v1=loss, v2=rnn_l2_sqr)) losses.append('{v1}'.format(v1=int(loss + rnn_l2_sqr))) # ls = model.loss_weight print('\t\tloss_weight = {v1}, {v2}'.format(v1=ls[0], v2=ls[1])) t1 = time.time() times0.append(t1 - t0) # ---------------------------------------------------------------------------------------------------------- print("\tPredicting ...") model.update_trained() t2 = time.time() times1.append(t2 - t1) fun_predict_auc_recall_map_ndcg(p, model, best, epoch, starts_ends_auc, starts_ends_tes, tes_buys_masks, tes_masks) best.fun_print_best(epoch) t3 = time.time() times2.append(t3 - t2) print( '\tavg. time (train, user, test): %0.0fs,' % np.average(times0), '%0.0fs,' % np.average(times1), '%0.0fs' % np.average(times2), '| alpha, lam: {v1}'.format( v1=', '.join([str(lam) for lam in [p['alpha'], p['lambda']]])), '| model: {v1}'.format(v1=model_name)) # ---------------------------------------------------------------------------------------------------------- if epoch == p['epochs'] - 1: print("\tBest and losses saving ...") path = os.path.join( os.path.split(__file__)[0], '..', 'Results_best_and_losses', PATH.split('/')[-2]) fun_save_best_and_losses(path, model_name, epoch, p, best, losses) if 2 == p['gru']: size = p['latent_size'] fil_name = 'size' + str(size) + 'UD' + str( p['UD']) + 'dd' + str(p['dd']) + 'loss.txt' fil = os.path.join(path, fil_name) np.savetxt(fil, total_ls) if 2 == p['gru'] and epoch % p['save_per_epoch'] == 0 and epoch != 0: m_path = './model/' + p['dataset'] + '/' + model_name + '_size' + \ str(p['latent_size']) + '_UD' + str(p['UD']) + '_dd' + str(p['dd']) + '_epoch' + str(epoch) with open(m_path, 'wb') as file: save_model = [ model.loss_weight.get_value(), model.wd.get_value(), model.lt.get_value(), model.di.get_value(), model.ui.get_value(), model.wh.get_value(), model.bi.get_value(), model.vs.get_value(), model.bs.get_value() ] cPickle.dump(save_model, file, protocol=cPickle.HIGHEST_PROTOCOL) for i in p.items(): print(i) print('\t the current Class name is: {val}'.format(val=model_name))
def train_valid_or_test(): """ 主程序 :return: """ # 建立参数、数据、模型、模型最佳值 pas = Params() p = pas.p model, model_name, size = pas.build_model_one_by_one(flag=p['hcagru']) best = GlobalBest(at_nums=p['at_nums']) # 存放最优数据 batch_idxs_tra, starts_ends_tra = pas.compute_start_end(flag='train') _, starts_ends_tes = pas.compute_start_end(flag='test') _, starts_ends_auc = pas.compute_start_end(flag='test_auc') # 直接取出来部分变量,后边就不用加'pas.'了。 user_num, item_num = pas.user_num, pas.item_num tra_buys_masks, tra_masks, tra_buys_neg_masks = pas.tra_buys_masks, pas.tra_masks, pas.tra_buys_neg_masks tes_buys_masks, tes_masks, tes_buys_neg_masks = pas.tes_buys_masks, pas.tes_masks, pas.tes_buys_neg_masks del pas # 主循环 losses = [] times0, times1, times2, times3 = [], [], [], [] for epoch in np.arange(p['epochs']): print( "Epoch {val} ==================================".format(val=epoch)) # 每次epoch,都要重新选择负样本。都要把数据打乱重排,这样会以随机方式选择样本计算梯度,可得到精确结果 if epoch > 0: # epoch=0的负样本已在循环前生成,且已用于类的初始化 tra_buys_neg_masks = fun_random_neg_masks_tra( item_num, tra_buys_masks) tes_buys_neg_masks = fun_random_neg_masks_tes( item_num, tra_buys_masks, tes_buys_masks) model.update_neg_masks(tra_buys_neg_masks, tes_buys_neg_masks) # ---------------------------------------------------------------------------------------------------------- print("\tTraining ...") t0 = time.time() loss = 0. random.seed(str(123 + epoch)) if 0 == p['mini_batch']: user_idxs_tra = np.arange(user_num, dtype=np.int32) random.shuffle(user_idxs_tra) # 每个epoch都打乱user_id输入顺序 for uidx in user_idxs_tra: loss += model.train(uidx) else: random.shuffle(batch_idxs_tra) # 每个epoch都打乱batch_idx输入顺序 for bidx in batch_idxs_tra: start_end = starts_ends_tra[bidx] random.shuffle(start_end) # 打乱batch内的indexes loss += model.train(start_end) rnn_l2_sqr = model.l2.eval() # model.l2是'TensorVariable',无法直接显示其值 print('\t\tsum_loss = {val} = {v1} + {v2}'.format(val=loss + rnn_l2_sqr, v1=loss, v2=rnn_l2_sqr)) losses.append('{v1}'.format(v1=int(loss + rnn_l2_sqr))) t1 = time.time() times0.append(t1 - t0) # ---------------------------------------------------------------------------------------------------------- # 计算:所有用户、商品的表达 model.update_trained_items() # 对于MV-GRU,这里会先算出来图文融合特征。 all_hus, all_ats = [], [] if model_name in ['OboHcaGru', 'HcaGru']: # 只对完整的HcaGru做权重计算、保存 for start_end in starts_ends_tes: sub_all_hus, sub_all_ats = model.predict(start_end) all_hus.extend(sub_all_hus) all_ats.extend(sub_all_ats) else: # HcaX, HcaH for start_end in starts_ends_tes: sub_all_hus = model.predict(start_end) all_hus.extend(sub_all_hus) model.update_trained_users(all_hus) t2 = time.time() times1.append(t2 - t1) # ---------------------------------------------------------------------------------------------------------- # if epoch >= 85 or (epoch % 10) == 9: if 0 == epoch % 4 or epoch >= (p['epochs'] - 15): print("\tPredicting ...") # 计算各种指标,并输出当前最优值。 fun_predict_auc_recall_map_ndcg(p, model, best, epoch, starts_ends_auc, starts_ends_tes, tes_buys_masks, tes_masks) best.fun_print_best(epoch) # 每次都只输出当前最优的结果 t3 = time.time() times2.append(t3 - t2) print( '\tavg. time (train, user, test): %0.0fs,' % np.average(times0), '%0.0fs,' % np.average(times1), '%0.0fs' % np.average(times2), '| alpha, lam: {v1}'.format(v1=', '.join( [str(lam) for lam in [p['alpha'], p['lambda']]])), '| model: {v1}, x{v2}_h{v3}'.format(v1=model_name, v2=p['window_x'], v3=p['window_h'])) # ---------------------------------------------------------------------------------------------------------- if epoch == p['epochs'] - 1: # 保存所有用户的attention权重值。 # 取数据集的前三列基本信息作为权重文件的前三列,权重在后边的列里 if model_name in ['OboHcaGru', 'HcaGru']: print("\tAttention Weights saving ...") path = os.path.join( os.path.split(__file__)[0], '..', 'Results_attention_weights', PATH.split('/')[-2]) fun_save_atts(path, model_name, epoch, p, best, all_ats, os.path.join(PATH, p['dataset'])) # 保存最优值、所有的损失值。 print("\tBest and losses saving ...") path = os.path.join( os.path.split(__file__)[0], '..', 'Results_best_and_losses', PATH.split('/')[-2]) fun_save_best_and_losses(path, model_name, epoch, p, best, losses) for i in p.items(): print(i) print('\t the current Class name is: {val}'.format(val=model_name))