def trainModel(self): self.model.train() train_loader = self.train_loader log("start negative sample...") train_loader.dataset.ng_sample() log("finish negative sample...") epoch_loss = 0 for user, item_i, item_j in train_loader: user = user.long().cuda() item_i = item_i.long().cuda() item_j = item_j.long().cuda() user_embed, item_embed = self.model() userEmbed = user_embed[user] posEmbed = item_embed[item_i] negEmbed = item_embed[item_j] pred_i, pred_j = self.innerProduct(userEmbed, posEmbed, negEmbed) bprloss = -(pred_i.view(-1) - pred_j.view(-1)).sigmoid().log().sum() regLoss = (t.norm(userEmbed)**2 + t.norm(posEmbed)**2 + t.norm(negEmbed)**2) loss = (bprloss + self.args.reg * regLoss) / self.args.batch # loss = bprloss/self.args.batch epoch_loss += bprloss.item() self.opt.zero_grad() loss.backward() self.opt.step() log('step_loss = %f' % (loss.item()), save=False, oneline=True) log("\n") log("finish train") return epoch_loss
def loadData(datasetStr, cv): if datasetStr == "Tianchi_time": return loadData2(datasetStr, cv) DIR = os.path.join(os.path.dirname(os.getcwd()), "dataset", datasetStr, 'implicit', "cv{0}".format(cv)) log(DIR) with open(DIR + '/train.pkl', 'rb') as fs: trainMat = pk.load(fs) return trainMat
def matDropOut(mat, rate): assert rate < 1.0 log("mat nnz = %d"%(mat.nnz)) row_idx, col_idx = mat.nonzero() nums = int(mat.nnz * rate) idx = np.random.permutation(row_idx.shape[0])[: nums] res = sp.csr_matrix((np.ones_like(row_idx[idx]), (row_idx[idx], col_idx[idx])), shape=mat.shape) res = (res + sp.eye(mat.shape[0]) != 0) *1 assert res.max() == 1 log("mat nnz after dropout= %d"%(res.nnz)) return res
def loadData(datasetStr): DIR = os.path.join(os.getcwd(), "dataset", datasetStr) log(DIR) with open(DIR + '/train.pkl', 'rb') as fs: trainMat = pk.load(fs) with open(DIR + '/trust.pkl', 'rb') as fs: trustMat = pk.load(fs) with open(DIR + '/test_data.pkl', 'rb') as fs: testData = pk.load(fs) with open(DIR + '/valid_data.pkl', 'rb') as fs: validData = pk.load(fs) return (trainMat, testData, validData, trustMat)
def loadModel(self, modelPath): checkpoint = t.load(r'./Model/' + args.dataset + r'/' + modelPath + r'.pth') self.curEpoch = checkpoint['epoch'] + 1 self.lr = checkpoint['lr'] self.model = checkpoint['model'] self.args.reg = checkpoint['reg'] #恢复history history = checkpoint['history'] self.train_loss = history['loss'] self.his_hr = history['HR'] self.his_ndcg = history['NDCG'] log("load model %s in epoch %d" % (modelPath, checkpoint['epoch']))
def trainModel(self, trainMat, trustMat, trusteeMat, trainMask, op): num = trainMat.shape[0] shuffledIds = np.random.permutation(num) steps = int(np.ceil(num / BATCH_SIZE)) epoch_loss = 0 epoch_rmse = 0 epoch_rmse_num = 0 for i in range(steps): ed = min((i + 1) * BATCH_SIZE, num) batch_ids = shuffledIds[i * BATCH_SIZE:ed] cur_batch_size = len(batch_ids) tmpTrain = trainMat[batch_ids].toarray() tmpTrust = trustMat[batch_ids].toarray() tmpTrustee = trusteeMat[batch_ids].toarray() #fakeTrust = np.zeros_like(tmpTrust) tmpMask = trainMask[batch_ids].toarray() train = t.from_numpy(tmpTrain).float().to(device) trust = t.from_numpy(tmpTrust).float().to(device) trustee = t.from_numpy(tmpTrustee).float().to(device) mask = t.from_numpy(1 * tmpMask).float().to( device) #将bool转为int,否则会报错 y_pred = self.model(train, trust, trustee) #loss = self.loss(y_pred * mask, train) loss = self.loss_rmse(y_pred * mask, train) / cur_batch_size epoch_loss += loss.item() #tem = (y_pred * mask - train) #a = t.sum(tem * tem).item() #tem = self.loss_rmse(y_pred * mask, train) tem = loss * cur_batch_size epoch_rmse += tem.item() epoch_rmse_num += t.sum(mask).item() log('setp %d/%d, step_loss = %f' % (i, steps, loss), save=False, oneline=True) op.zero_grad() loss.backward() op.step() epoch_rmse = np.sqrt(epoch_rmse / epoch_rmse_num) #epoch_loss = epoch_loss / steps return epoch_loss, epoch_rmse
def matExpand(uuMat, rate=0.001): # rate = 0.001 log("expand rate = %.4f" % (rate)) row, col = uuMat.shape for i in range(row): tmpMat = (sp.random(1, col, density=rate, format='csr') != 0) * 1 if i == 0: res = tmpMat else: res = sp.vstack((res, tmpMat)) res2 = res + uuMat res2 = (res2 != 0) * 1 log("expand count = %d" % (res2.nnz - uuMat.nnz)) return res
def __init__(self, args, isLoad=False): self.args = args self.datasetDir = os.path.join(os.getcwd(), "dataset", args.dataset) trainMat, testData, validData, trustMat = self.getData(args) self.userNum, self.itemNum = trainMat.shape self.trainMat = trainMat self.trustMat = ((trustMat + trustMat.T) != 0) * 1 #train data train_u, train_v = self.trainMat.nonzero() assert np.sum(self.trainMat.data == 0) == 0 log("train data size = %d" % (train_u.size)) train_data = np.hstack( (train_u.reshape(-1, 1), train_v.reshape(-1, 1))).tolist() train_dataset = BPRData(train_data, self.itemNum, self.trainMat, self.args.num_ng, True) self.train_loader = dataloader.DataLoader(train_dataset, batch_size=self.args.batch, shuffle=True, num_workers=0) #valid data # valid_dataset = BPRData(validData, self.itemNum, self.trainMat, 0, False) # self.valid_loader = dataloader.DataLoader(valid_dataset, batch_size=args.test_batch*101, shuffle=False, num_workers=0) #test_data test_dataset = BPRData(testData, self.itemNum, self.trainMat, 0, False) self.test_loader = dataloader.DataLoader(test_dataset, batch_size=args.test_batch * 101, shuffle=False, num_workers=0) self.lr = self.args.lr #0.001 self.curEpoch = 0 self.isLoadModel = isLoad #history self.train_loss = [] self.his_hr = [] self.his_ndcg = [] gc.collect() log("gc.collect()")
def trainModel(self,trainMat,trainMask,op): #一个epoch num=trainMat.shape[0] shuffledIds=np.random.permutation(num) steps=int(np.ceil(num/BATCH_SIZE)) #没有整除,取上值 epoch_loss=0 epoch_rmse_loss=0 epoch_rmse_num=0 for i in range(steps): ed=min((i+1)*BATCH_SIZE,num) batch_Ids=shuffledIds[i*BATCH_SIZE:ed] batch_len=len(batch_Ids) #后面计算batch_rmse需要用到 #准备第i步训练所用的数据 ##转换为numpy格式 tmpTrain=trainMat[batch_Ids].toarray() tmpMask=trainMask[batch_Ids].toarray() ##转换为tensor格式 train=t.from_numpy(tmpTrain).float().to(device) mask=t.from_numpy(1*tmpMask).float().to(device) y_pred=self.model(train) #第i步所用数据的预测值 #目标函数 pred_loss=self.loss_mse(y_pred*mask,train)/batch_len #为什么这里要除一个batchsize呢,原文的损失函数好像没有除 v_loss=t.sum(self.V*self.V) #t.sum而不是t.mm w_loss=t.sum(self.W*self.W) #*是element-wise操作 batch_loss=pred_loss + self.V_regularWeight*v_loss +self.W_regularWeight*w_loss #计算误差rmse epoch_loss+=batch_loss.item() epoch_rmse_loss+=self.RMSE(y_pred.cpu().detach().numpy(),tmpTrain,tmpMask)#//////////////////②?????///////////// epoch_rmse_num+=t.sum(mask).item() log('step %d/%d, step_loss=%f'%(i,steps,batch_loss.item()),save=False,oneline=True) op.zero_grad() #清空梯度,不累加 batch_loss.backward() op.step() epoch_rmse=np.sqrt(epoch_rmse_loss/epoch_rmse_num) return epoch_loss,epoch_rmse
def loadData(datasetStr, rate): DIR = os.path.join(os.getcwd(), "data", datasetStr, 'mats') TRAIN_FILE = DIR + '/{0}_train.pkl'.format(rate) TEST_FILE = DIR + '/{0}_test.pkl'.format(rate) VALID_FILE = DIR + '/{0}_valid.pkl'.format(rate) TRUST_FILE = DIR + '/{0}_trust.pkl'.format(rate) log(TRAIN_FILE) log(TEST_FILE) log(VALID_FILE) log(TRUST_FILE) with open(TRAIN_FILE, 'rb') as fs: train = pk.load(fs) with open(TEST_FILE, 'rb') as fs: test = pk.load(fs) with open(VALID_FILE, 'rb') as fs: valid = pk.load(fs) with open(TRUST_FILE, 'rb') as fs: trust = pk.load(fs) return train, test, valid, trust
def run(self): self.prepareModel() if self.isLoadModel == True: self.loadModel(LOAD_MODEL_PATH) HR, NDCG = self.test() return cvWait = 0 best_HR = 0.1 for e in range(self.curEpoch, self.args.epochs + 1): self.curEpoch = e log("**************************************************************" ) log("start train") epoch_loss, epoch_uu_dgi_loss, epoch_ii_dgi_loss = self.trainModel( ) log("end train") self.train_loss.append(epoch_loss) log("epoch %d/%d, epoch_loss=%.2f, dgi_uu_loss=%.4f, dgi_ii_loss=%.4f"% \ (e, self.args.epochs, epoch_loss, epoch_uu_dgi_loss, epoch_ii_dgi_loss)) if e < self.args.startTest: HR, NDCG = 0, 0 cvWait = 0 else: HR, NDCG = self.validModel(self.valid_loader) self.his_hr.append(HR) self.his_ndcg.append(NDCG) log("epoch %d/%d, valid HR = %.4f, valid NDCG = %.4f" % (e, self.args.epochs, HR, NDCG)) if e % 10 == 0 and e != 0: testHR, testNDCG = self.test() log("test HR = %.4f, test NDCG = %.4f" % (testHR, testNDCG)) self.adjust_learning_rate(self.opt, e) if HR > best_HR: best_HR = HR cvWait = 0 best_epoch = self.curEpoch self.saveModel() else: cvWait += 1 log("cvWait = %d" % (cvWait)) self.saveHistory() if cvWait == self.args.patience: log('Early stopping! best epoch = %d' % (best_epoch)) self.loadModel(self.modelName) testHR, testNDCG = self.test() log("test HR = %.4f, test NDCG = %.4f" % (testHR, testNDCG)) break
def run(self): self.prePareModel() # if self.isLoadModel: #先不管这个 # self.loadModel(LOAD_MODEL_PATH) #训练 for e in range(self.curEpoch,EPOCH+1): epoch_loss,epoch_rmse=self.trainModel(self.trainMat,self.trainMask,self.optimizer) log("epoch %d/%d, epoch_loss=%.2f, epoch_rmse=%.4f"%(e,EPOCH,epoch_loss,epoch_rmse)) self.train_losses.append(epoch_loss) self.train_RMSEs.append(epoch_rmse) #打印正则损失 log('V_Loss = %.2f, W_Loss = %.2f'%(t.sum(self.V * self.V), t.sum(self.W * self.W))) #交叉验证 cv_epoch_loss,cv_epoch_rmse=self.testModel(self.trainMat,self.testMat,self.testMask,5)#//////////①???/////////// log("epoch %d/%d, cv_epoch_loss=%.2f,cv_epoch_rmse=%.4f"%(e, EPOCH, cv_epoch_loss,cv_epoch_rmse)) log("\n") self.test_losses.append(cv_epoch_loss) self.test_RMSEs.append(cv_epoch_rmse) #调整学习率并保存 self.curLr = self.adjust_learning_rate(self.optimizer, e) self.curEpoch=e #记录当前的EPOCH,用于保存model # if e%10==0 and e!=0: # self.saveModel() # test_epoch_loss=self.testModel(self.trainMat,self.testMat,self.testMask,1) # log("epoch %d/%d, test_epoch_loss=%.2f"%(e, EPOCH, test_epoch_loss)) # self.step_losses.append(test_epoch_loss) # # for i in range(len(self.step_losses)): # # print("***************************") # # print("rmse = %.4f"%(self.step_rmse[i])) # # print("***************************") #测试 _,test_rmse=self.testModel(self.trainMat,self.testMat,self.testMask,1) self.writeResult(test_rmse) log("\n") log("test_rmse=%.4f"%(test_rmse)) self.getModelName()
def __init__(self, args): self.args = args train, test, valid, trust, multi_adj = self.getData(self.args) tmpMat = (trust + trust.T) userNum = trust.shape[0] social_adj = (tmpMat != 0) * 1 #add self->self social_adj = trust + sp.eye(trust.shape[0]) social_adj = (social_adj != 0) * 1 social_adj = social_adj.tocsr() edge_src, edge_dst = social_adj.nonzero() self.social_graph = dgl.graph(data=(edge_src, edge_dst), idtype=t.int32, num_nodes=trust.shape[0], device=device_gpu) #pre train social self.dgi_path = self.preTrain(trust) self.userNum, self.itemNum = train.shape self.ratingClass = np.unique(train.data).size log("user num =%d, item num =%d" % (self.userNum, self.itemNum)) self.multi_adj = multi_adj item_degree = t.from_numpy((np.sum(multi_adj, axis=1).A != 0) * 1) self.att_mask = item_degree.view( -1, self.ratingClass).float().to(device_gpu) tmpTrust = (trust + trust.T) tmpTrust = (tmpTrust != 0) * 1 a = csr_matrix((multi_adj.shape[1], multi_adj.shape[1])) b = csr_matrix((multi_adj.shape[0], multi_adj.shape[0])) multi_uv_adj = sp.vstack( [sp.hstack([a, multi_adj.T]), sp.hstack([multi_adj, b])]) #train test valid data train_coo = train.tocoo() test_coo = test.tocoo() valid_coo = valid.tocoo() self.train_u, self.train_v, self.train_r = train_coo.row, train_coo.col, train_coo.data self.test_u, self.test_v, self.test_r = test_coo.row, test_coo.col, test_coo.data self.valid_u, self.valid_v, self.valid_r = valid_coo.row, valid_coo.col, valid_coo.data self.MyDataLoader = MyData(train, trust, self.args.seed, num_ng=1, is_training=True) assert np.sum(self.train_r == 0) == 0 assert np.sum(self.test_r == 0) == 0 assert np.sum(self.valid_r == 0) == 0 self.trainMat = train self.testMat = test self.validMat = valid self.trustMat = trust #normalize self.adj = normalize_adj(multi_uv_adj + sp.eye(multi_uv_adj.shape[0])) self.adj_sp_tensor = sparse_mx_to_torch_sparse_tensor(self.adj).cuda() self.att_adj = sparse_mx_to_torch_sparse_tensor( self.trainMat.T != 0).float().cuda() self.att_adj_norm = t.from_numpy( np.sum(self.trainMat.T != 0, axis=1).astype(np.float)).float().cuda() self.hide_dim = eval(self.args.layer)[0] self.r_weight = self.args.r self.loss_rmse = nn.MSELoss(reduction='sum') #不求平均 self.lr = self.args.lr #0.001 self.decay = self.args.decay self.curEpoch = 0 #history self.train_losses = [] self.train_RMSEs = [] self.train_MAEs = [] self.test_losses = [] self.test_RMSEs = [] self.test_MAEs = [] self.step_rmse = [] self.step_mae = []
def prepareModel(self): self.modelName = self.getModelName() #set random seed self.setRandomSeed() self.out_dim = sum(eval(self.args.layer)) self.embed_layer = HGNN(self.userNum, self.itemNum, \ self.userNum, self.args.dgi_hide_dim, \ self.itemNum*self.ratingClass, self.hide_dim, \ layer=self.args.layer, alpha=0.1).cuda() self.predLayer = nn.Sequential( nn.Linear(self.out_dim * 2, self.out_dim * 1), nn.ReLU(), nn.Linear(self.out_dim * 1, 1), nn.ReLU()).cuda() self.w_r = nn.Sequential(nn.Linear(self.out_dim * 2, self.out_dim), nn.ReLU(), nn.Linear(self.out_dim, 1, bias=False)).cuda() self.w_t = nn.Sequential(nn.Linear(self.out_dim * 2, self.out_dim), nn.ReLU(), nn.Linear(self.out_dim, 1, bias=False)).cuda() #one-hot feature self.item_feat_sp_tensor = generate_sp_ont_hot( self.itemNum * self.ratingClass).cuda() self.user_feat_sp_tensor = generate_sp_ont_hot(self.userNum).cuda() self.dgi = DGI(self.social_graph, self.userNum, self.args.dgi_hide_dim, nn.PReLU()).cuda() self.dgi.load_state_dict(t.load(self.dgi_path)) log("load dgi model %s" % (self.dgi_path)) self.user_dgi_feat = self.dgi.encoder( self.user_feat_sp_tensor).detach() if self.args.dgi_norm == 1: self.user_dgi_feat = F.normalize(self.user_dgi_feat, p=2, dim=1) #weight_dict have different reg weight weight_dict_params = list( map(id, self.embed_layer.weight_dict.parameters())) base_params = filter(lambda p: id(p) not in weight_dict_params, self.embed_layer.parameters()) self.opt = t.optim.Adam([ { 'params': base_params, 'weight_decay': self.args.r }, { 'params': self.embed_layer.weight_dict.parameters(), 'weight_decay': self.args.r2 }, { 'params': self.predLayer.parameters(), 'weight_decay': self.args.r3 }, { 'params': self.w_r.parameters(), 'weight_decay': self.args.r }, { 'params': self.w_t.parameters(), 'weight_decay': self.args.r }, ], lr=self.args.lr)
def run(self): #判断是导入模型还是重新训练模型 self.prepareModel() validWait = 0 best_rmse = 9999.0 best_mae = 9999.0 rewait_r = 0 rewait_t = 0 best_reconstruct_loss_r = 1000000000 best_reconstruct_loss_t = 1000000000 for e in range(self.curEpoch, self.args.epochs + 1): #记录当前epoch,用于保存Model self.curEpoch = e log("**************************************************************" ) #训练 epoch_reconstruct_loss_r = 0 epoch_loss, epoch_rmse, epoch_mae, reconstruct_ui_loss, reconstruct_uu_loss = self.trainModel( ) log("epoch %d/%d, epoch_loss=%.2f, reconstruct_ui_loss=%.4f, reconstruct_uu_loss=%.4f, epoch_rmse=%.4f, epoch_mae=%.4f"% \ (e,self.args.epochs, epoch_loss, reconstruct_ui_loss, reconstruct_uu_loss, epoch_rmse, epoch_mae)) if reconstruct_ui_loss > 0: if reconstruct_ui_loss < best_reconstruct_loss_r: best_reconstruct_loss_r = reconstruct_ui_loss rewait_r = 0 else: rewait_r += 1 log("rewait_r={0}".format(rewait_r)) if rewait_r == self.args.rewait: self.args.lam_r = 0 log("stop uv reconstruction") if reconstruct_uu_loss > 0: if reconstruct_uu_loss < best_reconstruct_loss_t: best_reconstruct_loss_t = reconstruct_uu_loss rewait_t = 0 else: rewait_t += 1 log("rewait_t={0}".format(rewait_t)) if rewait_t == self.args.rewait: self.args.lam_t = 0 log("stop uu reconstruction") self.curLr = self.adjust_learning_rate(self.opt, e + 1) self.train_losses.append(epoch_loss) self.train_RMSEs.append(epoch_rmse) self.train_MAEs.append(epoch_mae) # valid valid_epoch_loss, valid_epoch_rmse, valid_epoch_mae = self.testModel( self.validMat, (self.valid_u, self.valid_v, self.valid_r)) log("epoch %d/%d, valid_epoch_loss=%.2f, valid_epoch_rmse=%.4f, valid_epoch_mae=%.4f" % (e, self.args.epochs, valid_epoch_loss, valid_epoch_rmse, valid_epoch_mae)) self.test_losses.append(valid_epoch_loss) self.test_RMSEs.append(valid_epoch_rmse) self.test_MAEs.append(valid_epoch_mae) # test test_epoch_loss, test_epoch_rmse, test_epoch_mae = self.testModel( self.testMat, (self.test_u, self.test_v, self.test_r)) log("epoch %d/%d, test_epoch_loss=%.2f, test_epoch_rmse=%.4f, test_epoch_mae=%.4f" % (e, self.args.epochs, test_epoch_loss, test_epoch_rmse, test_epoch_mae)) self.step_rmse.append(test_epoch_rmse) self.step_mae.append(test_epoch_mae) if best_rmse > valid_epoch_rmse: best_rmse = valid_epoch_rmse best_mae = valid_epoch_mae validWait = 0 best_epoch = self.curEpoch else: validWait += 1 log("validWait = %d" % (validWait)) if self.args.early == 1 and validWait == self.args.patience: log('Early stopping! best epoch = %d' % (best_epoch)) break
def test(self): #load test dataset HR, NDCG = self.validModel(self.test_loader) log("test HR = %.4f, test NDCG = %.4f" % (HR, NDCG)) log("model name : %s" % (self.modelName))
def run(self): #判断是导入模型还是重新训练模型 self.prepareModel() if self.isLoadModel == True: self.loadModel(LOAD_MODEL_PATH) HR, NDCG = self.validModel(self.test_loader) log("HR = %.4f, NDCG = %.4f" % (np.mean(HR), np.mean(NDCG))) cvWait = 0 best_HR = 0.1 for e in range(self.curEpoch, self.args.epochs + 1): #记录当前epoch,用于保存Model self.curEpoch = e log("**************************************************************" ) #训练 epoch_loss = self.trainModel() self.train_loss.append(epoch_loss) log("epoch %d/%d, epoch_loss=%.2f" % (e, self.args.epochs, epoch_loss)) HR, NDCG = self.validModel(self.test_loader) self.his_hr.append(HR) self.his_ndcg.append(NDCG) log("epoch %d/%d, test HR = %.4f, test NDCG = %.4f" % (e, self.args.epochs, HR, NDCG)) # if e%10 == 0 and e != 0: # log(self.getModelName()) # HR, NDCG = self.test() # self.adjust_learning_rate(self.opt, e) if HR > best_HR: best_HR = HR cvWait = 0 best_epoch = self.curEpoch # self.saveModel() else: cvWait += 1 log("cvWait = %d" % (cvWait)) self.saveHistory() if cvWait == self.args.patience: log('Early stopping! best epoch = %d' % (best_epoch)) log("model name : %s" % (self.modelName)) # self.loadModel(self.modelName) # HR, NDCG = self.validModel(self.test_loader) # log("epoch %d/%d, test HR = %.4f, test NDCG = %.4f"%(e, self.args.epochs, HR, NDCG)) break
def __init__(self, args, isLoad=False): self.args = args self.datasetDir = os.path.join(os.getcwd(), "dataset", args.dataset) trainMat, validData, multi_adj_time, uuMat, iiMat = self.getData(args) self.userNum, self.itemNum = trainMat.shape log("uu num = %d" % (uuMat.nnz)) log("ii num = %d" % (iiMat.nnz)) self.trainMat = trainMat # self.uu_graph = DGLGraph(uuMat) uuMat_edge_src, uuMat_edge_dst = uuMat.nonzero() self.uu_graph = dgl.graph(data=(uuMat_edge_src, uuMat_edge_dst), idtype=t.int32, num_nodes=uuMat.shape[0], device=device_gpu) # self.ii_graph = DGLGraph(iiMat) iiMat_edge_src, iiMat_edge_dst = iiMat.nonzero() self.ii_graph = dgl.graph(data=(iiMat_edge_src, iiMat_edge_dst), idtype=t.int32, num_nodes=iiMat.shape[0], device=device_gpu) #get sub graph message uu_subGraph_data = self.datasetDir + '/uuMat_subGraph_data.pkl' if self.args.clear == 1: if os.path.exists(uu_subGraph_data): log("clear uu sub graph message") os.remove(uu_subGraph_data) if os.path.exists(uu_subGraph_data): data = load(uu_subGraph_data) self.uu_node_subGraph, self.uu_subGraph_adj, self.uu_dgi_node = data else: log("rebuild uu sub graph message") _, self.uu_node_subGraph, self.uu_subGraph_adj, self.uu_dgi_node = buildSubGraph( uuMat, self.args.subNode) data = (self.uu_node_subGraph, self.uu_subGraph_adj, self.uu_dgi_node) with open(uu_subGraph_data, 'wb') as fs: pickle.dump(data, fs) ii_subGraph_data = self.datasetDir + '/iiMat_subGraph_data.pkl' if self.args.clear == 1: if os.path.exists(ii_subGraph_data): log("clear ii sub graph message") os.remove(ii_subGraph_data) if os.path.exists(ii_subGraph_data): data = load(ii_subGraph_data) self.ii_node_subGraph, self.ii_subGraph_adj, self.ii_dgi_node = data else: log("rebuild ii sub graph message") _, self.ii_node_subGraph, self.ii_subGraph_adj, self.ii_dgi_node = buildSubGraph( iiMat, self.args.subNode) data = (self.ii_node_subGraph, self.ii_subGraph_adj, self.ii_dgi_node) with open(ii_subGraph_data, 'wb') as fs: pickle.dump(data, fs) self.uu_subGraph_adj_tensor = sparse_mx_to_torch_sparse_tensor( self.uu_subGraph_adj).cuda() self.uu_subGraph_adj_norm = t.from_numpy( np.sum(self.uu_subGraph_adj, axis=1)).float().cuda() self.ii_subGraph_adj_tensor = sparse_mx_to_torch_sparse_tensor( self.ii_subGraph_adj).cuda() self.ii_subGraph_adj_norm = t.from_numpy( np.sum(self.ii_subGraph_adj, axis=1)).float().cuda() self.uu_dgi_node_mask = np.zeros(self.userNum) self.uu_dgi_node_mask[self.uu_dgi_node] = 1 self.uu_dgi_node_mask = t.from_numpy( self.uu_dgi_node_mask).float().cuda() self.ii_dgi_node_mask = np.zeros(self.itemNum) self.ii_dgi_node_mask[self.ii_dgi_node] = 1 self.ii_dgi_node_mask = t.from_numpy( self.ii_dgi_node_mask).float().cuda() #norm time value log("time process") self.time_step = self.args.time_step log("time step = %.1f hour" % (self.time_step)) time_step = 3600 * self.time_step row, col = multi_adj_time.nonzero() data = multi_adj_time.data minUTC = data.min() #data.min = 2 data = ((data - minUTC) / time_step).astype(np.int) + 2 assert np.sum(row == col) == 0 multi_adj_time_norm = sp.coo_matrix( (data, (row, col)), dtype=np.int, shape=multi_adj_time.shape).tocsr() self.maxTime = multi_adj_time_norm.max() + 1 log("max time = %d" % (self.maxTime)) num = multi_adj_time_norm.shape[0] multi_adj_time_norm = multi_adj_time_norm + sp.eye(num) print("uv graph link num = %d" % (multi_adj_time_norm.nnz)) edge_src, edge_dst = multi_adj_time_norm.nonzero() time_seq = multi_adj_time_norm.tocoo().data self.time_seq_tensor = t.from_numpy(time_seq.astype( np.float)).long().to(device_gpu) self.ratingClass = np.unique(trainMat.data).size log("user num =%d, item num =%d" % (self.userNum, self.itemNum)) self.uv_g = dgl.graph(data=(edge_src, edge_dst), idtype=t.int32, num_nodes=multi_adj_time_norm.shape[0], device=device_gpu) #train data train_u, train_v = self.trainMat.nonzero() assert np.sum(self.trainMat.data == 0) == 0 log("train data size = %d" % (train_u.size)) train_data = np.hstack( (train_u.reshape(-1, 1), train_v.reshape(-1, 1))).tolist() train_dataset = BPRData(train_data, self.itemNum, self.trainMat, self.args.num_ng, True) self.train_loader = dataloader.DataLoader(train_dataset, batch_size=self.args.batch, shuffle=True, num_workers=0) #valid data valid_dataset = BPRData(validData, self.itemNum, self.trainMat, 0, False) self.valid_loader = dataloader.DataLoader(valid_dataset, batch_size=args.test_batch * 101, shuffle=False, num_workers=0) self.lr = self.args.lr #0.001 self.curEpoch = 0 self.isLoadModel = isLoad #history self.train_loss = [] self.his_hr = [] self.his_ndcg = [] gc.collect() log("gc.collect()")
def trainModel(self): train_loader = self.train_loader log("start negative sample...") train_loader.dataset.ng_sample() log("finish negative sample...") epoch_loss = 0 epoch_uu_dgi_loss = 0 epoch_ii_dgi_loss = 0 for user, item_i, item_j in train_loader: user = user.long().cuda() item_i = item_i.long().cuda() item_j = item_j.long().cuda() user_embed, item_embed = self.model(self.uv_g, self.time_seq_tensor, self.out_dim, self.ratingClass, True) userEmbed = user_embed[user] posEmbed = item_embed[item_i] negEmbed = item_embed[item_j] pred_i, pred_j = self.innerProduct(userEmbed, posEmbed, negEmbed) bprloss = -(pred_i.view(-1) - pred_j.view(-1)).sigmoid().log().sum() regLoss = (t.norm(userEmbed)**2 + t.norm(posEmbed)**2 + t.norm(negEmbed)**2) loss = 0.5 * (bprloss + self.args.reg * regLoss) / self.args.batch uu_dgi_loss = 0 ii_dgi_loss = 0 if self.args.lam[0] != 0: uu_dgi_pos_loss, uu_dgi_neg_loss = self.uu_dgi(user_embed, self.uu_subGraph_adj_tensor, \ self.uu_subGraph_adj_norm, self.uu_node_subGraph, self.uu_dgi_node) userMask = t.zeros(self.userNum).cuda() userMask[user] = 1 userMask = userMask * self.uu_dgi_node_mask uu_dgi_loss = ( (uu_dgi_pos_loss * userMask).sum() + (uu_dgi_neg_loss * userMask).sum()) / t.sum(userMask) epoch_uu_dgi_loss += uu_dgi_loss.item() if self.args.lam[1] != 0: ii_dgi_pos_loss, ii_dgi_neg_loss = self.ii_dgi(item_embed, self.ii_subGraph_adj_tensor, \ self.ii_subGraph_adj_norm, self.ii_node_subGraph, self.ii_dgi_node) iiMask = t.zeros(self.itemNum).cuda() iiMask[item_i] = 1 iiMask[item_j] = 1 iiMask = iiMask * self.ii_dgi_node_mask ii_dgi_loss = ( (ii_dgi_pos_loss * iiMask).sum() + (ii_dgi_neg_loss * iiMask).sum()) / t.sum(iiMask) epoch_ii_dgi_loss += ii_dgi_loss.item() loss = loss + self.args.lam[0] * uu_dgi_loss + self.args.lam[ 1] * ii_dgi_loss epoch_loss += bprloss.item() self.opt.zero_grad() loss.backward() self.opt.step() # log('setp %d/%d, step_loss = %f'%(i, loss.item()), save=False, oneline=True) return epoch_loss, epoch_uu_dgi_loss, epoch_ii_dgi_loss
def __init__(self, args, isLoad=False): self.args = args self.datasetDir = os.path.join(os.path.dirname(os.getcwd()), "dataset", args.dataset, 'implicit', "cv{0}".format(args.cv)) trainMat, uuMat, iiMat = self.getData(args) self.userNum, self.itemNum = trainMat.shape log("user num =%d, item num =%d" % (self.userNum, self.itemNum)) u_i_adj = (trainMat != 0) * 1 i_u_adj = u_i_adj.T a = sp.csr_matrix((self.userNum, self.userNum)) b = sp.csr_matrix((self.itemNum, self.itemNum)) if args.trust == 1: adj = sp.vstack( [sp.hstack([uuMat, u_i_adj]), sp.hstack([i_u_adj, b])]).tocsr() else: adj = sp.vstack([sp.hstack([a, u_i_adj]), sp.hstack([i_u_adj, b])]).tocsr() log("uu num = %d" % (uuMat.nnz)) log("ii num = %d" % (iiMat.nnz)) self.trainMat = trainMat edge_src, edge_dst = adj.nonzero() self.uv_g = dgl.graph(data=(edge_src, edge_dst), idtype=t.int32, num_nodes=adj.shape[0], device=device_gpu) #train data train_u, train_v = self.trainMat.nonzero() assert np.sum(self.trainMat.data == 0) == 0 log("train data size = %d" % (train_u.size)) train_data = np.hstack( (train_u.reshape(-1, 1), train_v.reshape(-1, 1))).tolist() train_dataset = BPRData(train_data, self.itemNum, self.trainMat, self.args.num_ng, True) self.train_loader = dataloader.DataLoader(train_dataset, batch_size=self.args.batch, shuffle=True, num_workers=0) #test_data with open(self.datasetDir + "/test_data.pkl", 'rb') as fs: test_data = pickle.load(fs) test_dataset = BPRData(test_data, self.itemNum, self.trainMat, 0, False) self.test_loader = dataloader.DataLoader(test_dataset, batch_size=args.test_batch * 101, shuffle=False, num_workers=0) #valid data with open(self.datasetDir + "/valid_data.pkl", 'rb') as fs: valid_data = pickle.load(fs) valid_dataset = BPRData(valid_data, self.itemNum, self.trainMat, 0, False) self.valid_loader = dataloader.DataLoader(valid_dataset, batch_size=args.test_batch * 101, shuffle=False, num_workers=0) self.lr = self.args.lr #0.001 self.curEpoch = 0 self.isLoadModel = isLoad #history self.train_loss = [] self.his_hr = [] self.his_ndcg = [] gc.collect() log("gc.collect()")
def trainModel(self): train_loader = self.MyDataLoader log("start negative sample...") train_loader.neg_sample() log("finish negative sample...") userShuffleList = np.random.permutation(self.userNum) batch = self.args.batch length = self.userNum stepCount = math.ceil(length / batch) epoch_rmse_loss = 0 epoch_rmse_num = 0 epoch_mae_loss = 0 epoch_reconstruct_ui_loss = 0 epoch_reconstruct_uu_loss = 0 for step in range(stepCount): beginIdx = step * batch endIdx = min((step + 1) * batch, length) curStepUserIdx = userShuffleList[beginIdx:endIdx] ui_train, uu_train = train_loader.getTrainInstance(curStepUserIdx) batch_nodes_u = ui_train[:, 0] batch_nodes_v = ui_train[:, 1] labels = t.from_numpy(ui_train[:, 2]).float().to(device_gpu) neg_label = t.from_numpy(ui_train[:, 3]).float().to(device_gpu) user_embed, item_muliti_embed = self.embed_layer( self.user_dgi_feat, self.user_feat_sp_tensor, self.item_feat_sp_tensor, self.adj_sp_tensor) item_muliti_embed = item_muliti_embed.view(-1, self.ratingClass, self.out_dim) #mean or attention item_embed = t.div(t.sum(item_muliti_embed, dim=1), self.ratingClass) if self.args.lam_r != 0: reconstruct_pos = self.w_r( t.cat( (user_embed[batch_nodes_u], item_muliti_embed[batch_nodes_v, ui_train[:, 2] - 1]), dim=1)) reconstruct_neg = self.w_r( t.cat( (user_embed[batch_nodes_u], item_muliti_embed[batch_nodes_v, ui_train[:, 3] - 1]), dim=1)) reconstruct_loss = ( -(reconstruct_pos.view(-1) - reconstruct_neg.view(-1)).sigmoid().log().sum()) epoch_reconstruct_ui_loss += reconstruct_loss.item() if self.args.lam_t != 0: trust_uid = uu_train[:, 0] trust_tid = uu_train[:, 1] trust_neg_uid = uu_train[:, 2] reconstruct_pos_t = self.w_t( t.cat((user_embed[trust_uid], user_embed[trust_tid]), dim=1)) reconstruct_neg_t = self.w_t( t.cat((user_embed[trust_uid], user_embed[trust_neg_uid]), dim=1)) trust_reconstruct_loss = ( -(reconstruct_pos_t.view(-1) - reconstruct_neg_t.view(-1)).sigmoid().log().sum()) epoch_reconstruct_uu_loss += trust_reconstruct_loss.item() userEmbed = user_embed[batch_nodes_u] itemEmbed = item_embed[batch_nodes_v] pred = self.preModel(userEmbed, itemEmbed) loss = self.loss_rmse(pred.view(-1), labels) epoch_rmse_loss += loss.item() epoch_mae_loss += t.sum(t.abs(pred.view(-1) - labels)).item() epoch_rmse_num += batch_nodes_u.size curBathch = ui_train.shape[0] loss = loss / curBathch if self.args.lam_r != 0: loss += ((reconstruct_loss * self.args.lam_r) / curBathch) if self.args.lam_t != 0: loss += ((trust_reconstruct_loss * self.args.lam_t) / uu_train.shape[0]) self.opt.zero_grad() loss.backward() self.opt.step() log('setp %d/%d, step_loss = %f' % (step, stepCount, loss.item()), save=False, oneline=True) epoch_rmse = np.sqrt(epoch_rmse_loss / epoch_rmse_num) epoch_mae = epoch_mae_loss / epoch_rmse_num epoch_reconstruct_ui_loss = epoch_reconstruct_ui_loss / stepCount epoch_reconstruct_uu_loss = epoch_reconstruct_uu_loss / stepCount return epoch_rmse_loss, epoch_rmse, epoch_mae, epoch_reconstruct_ui_loss, epoch_reconstruct_uu_loss
def preTrain(self, trust): tmpMat = (trust + trust.T) userNum = trust.shape[0] # userNum, itemNum = train.shape adj = (tmpMat != 0) * 1 adj = adj + sp.eye(adj.shape[0]) adj = adj.tocsr() nodeDegree = np.sum(adj, axis=1) degreeSum = np.sum(nodeDegree) dgi_weight = t.from_numpy( (nodeDegree + 1e-6) / degreeSum).float().cuda() user_feat_sp_tensor = generate_sp_ont_hot(userNum).cuda() in_feats = userNum # self.social_graph = dgl.graph(adj) edge_src, edge_dst = adj.nonzero() self.social_graph = dgl.graph(data=(edge_src, edge_dst), idtype=t.int32, num_nodes=trust.shape[0], device=device_gpu) dgi = DGI(self.social_graph, in_feats, args.dgi_hide_dim, nn.PReLU()).cuda() dgi_optimizer = t.optim.Adam(dgi.parameters(), lr=args.dgi_lr, weight_decay=args.dgi_reg) cnt_wait = 0 best = 1e9 best_t = 0 for epoch in range(500): dgi.train() dgi_optimizer.zero_grad() idx = np.random.permutation(userNum) shuf_feat = sparse_mx_to_torch_sparse_tensor( sp.eye(userNum).tocsr()[idx]).cuda() loss = dgi(user_feat_sp_tensor, shuf_feat, dgi_weight) loss.backward() dgi_optimizer.step() log("%.4f" % (loss.item()), save=False, oneline=True) if loss < best: best = loss best_t = epoch cnt_wait = 0 DIR = os.path.join(os.getcwd(), "Model", self.args.dataset) path = DIR + r"/dgi_" + modelUTCStr + "_" + args.dataset + "_" + str( args.rate) + "_" + str(args.dgi_hide_dim) + "_" + str( args.dgi_reg) path += '.pth' t.save(dgi.state_dict(), path) # t.save(dgi, path) else: cnt_wait += 1 if cnt_wait == 5: print('DGI Early stopping!') print(path) return path
def run(self): #判断是导入模型还是重新训练模型 if self.isLoadModel == True: self.loadModel(LOAD_MODEL_PATH) for e in range(self.curEpoch, EPOCH + 1): #训练 epoch_loss, epoch_rmse = self.trainModel(self.trainMat, self.trustMat, self.trusteeMat, self.trainMask, self.optimizer) regularLoss = self.getRegularLoss(self.model) log("\n") log("epoch %d/%d, epoch_loss=%.2f, epoch_rmse=%.4f" % (e, EPOCH, epoch_loss, epoch_rmse)) self.train_losses.append(epoch_loss) self.train_RMSEs.append(epoch_rmse) #打印正则损失 log('W1_Loss = %.2f, W2_Loss = %.2f, W3_Loss = %.2f' % (regularLoss[0], regularLoss[1], regularLoss[2])) #交叉验证 cv_epoch_loss, cv_epoch_rmse = self.testModel( self.trainMat, self.trustMat, self.trusteeMat, self.cvMat, self.cvMask, 5) log("\n") log("epoch %d/%d, cv_epoch_loss=%.2f, cv_epoch_rmse=%f" % (e, EPOCH, cv_epoch_loss, cv_epoch_rmse)) self.test_losses.append(cv_epoch_loss) self.test_RMSEs.append(cv_epoch_rmse) #每一个Epoch调整学习率 self.adjust_learning_rate(self.optimizer, e) #记录当前epoch,用于保存Model self.curEpoch = e if e % 5 == 0 and e != 0: self.saveModel() #测试 test_epoch_loss, test_epoch_rmse = self.testModel( self.trainMat, self.trustMat, self.trusteeMat, self.testMat, self.testMask) log("\n") log("test_rmse=%f" % (test_epoch_rmse))
def run(self): #判断是导入模型还是重新训练模型 self.prepareModel() if self.isLoadModel == True: self.loadModel(LOAD_MODEL_PATH) # HR, NDCG = self.validModel(self.test_loader,save=False) HR, NDCG = self.validModel(self.valid_loader, save=False) # with open(self.datasetDir + "/test_data.pkl".format(self.args.cv), 'rb') as fs: # test_data = pickle.load(fs) # uids = np.array(test_data[::101])[:,0] # data = {} # assert len(uids) == len(HR) # assert len(uids) == len(np.unique(uids)) # for i in range(len(uids)): # uid = uids[i] # data[uid] = [HR[i], NDCG[i]] # with open("KCGN-{0}-cv{1}-test.pkl".format(self.args.dataset, self.args.cv), 'wb') as fs: # pickle.dump(data, fs) log("HR = %.4f, NDCG = %.4f" % (np.mean(HR), np.mean(NDCG))) # return cvWait = 0 best_HR = 0.1 for e in range(self.curEpoch, self.args.epochs + 1): #记录当前epoch,用于保存Model self.curEpoch = e log("**************************************************************" ) #训练 log("start train") epoch_loss = self.trainModel() log("end train") self.train_loss.append(epoch_loss) log("epoch %d/%d, epoch_loss=%.2f" % (e, self.args.epochs, epoch_loss)) # if e < 10 and e != 0: # else: if e < self.args.startTest: HR, NDCG = 0, 0 cvWait = 0 else: HR, NDCG = self.validModel(self.valid_loader) log("epoch %d/%d, valid HR = %.4f, valid NDCG = %.4f" % (e, self.args.epochs, HR, NDCG)) self.his_hr.append(HR) self.his_ndcg.append(NDCG) self.adjust_learning_rate(self.opt, e) if HR > best_HR: best_HR = HR cvWait = 0 best_epoch = self.curEpoch self.saveModel() else: cvWait += 1 log("cvWait = %d" % (cvWait)) self.saveHistory() if cvWait == self.args.patience: log('Early stopping! best epoch = %d' % (best_epoch)) self.loadModel(self.modelName) break