def train_model(self, k): super(IntegSVD, self).train_model(k) iteration = 0 while iteration < self.config.maxIter: self.loss = 0 for index, line in enumerate(self.rg.trainSet()): user, item, rating = line u = self.rg.user[user] i = self.rg.item[item] ui_neighbors = self.get_neighbor(user, item) ui_nei_len = len(ui_neighbors) error = rating - self.predict(user, item) self.loss += error**2 p, q = self.P[u], self.Q[i] # nu, sum_y = self.get_sum_y(user) # update latent vectors self.Bu[u] += self.config.lr * ( error - self.config.lambdaB * self.Bu[u]) self.Bi[i] += self.config.lr * ( error - self.config.lambdaB * self.Bi[i]) self.P[u] += self.config.lr * (error * q - self.config.lambdaP * p) self.Q[i] += self.config.lr * ( error * p - self.config.lambdaQ * q) # + sum_y # 更新Y # u_items = self.rg.user_rated_items(u) # for j in u_items: # idj = self.rg.item[j] # self.Y[idj] += self.config.lr * (error / np.sqrt(nu) * q - self.config.lambdaY * self.Y[idj]) # 更新W,C for neighbor in ui_neighbors: j = self.rg.item[neighbor] ruj = self.rg.trainSet_u[user][neighbor] buj = self.rg.globalMean + self.Bu[u] + self.Bi[j] self.W[i][j] += self.config.lr * ( error / (ui_nei_len**0.5) * (ruj - buj) - self.config.lambdaW * self.W[i][j]) # self.C[i][j] += self.config.lr * (error / (ui_nei_len ** 0.5) - self.config.lambdaC * self.C[i][j]) self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \ + self.config.lambdaB * ( \ (self.Bu * self.Bu).sum() + (self.Bi * self.Bi).sum()) + self.config.lambdaW * ( self.W * self.W).sum() # + self.config.lambdaY * (self.Y * self.Y).sum() \ # +self.config.lambdaC * (self.C * self.C).sum() iteration += 1 if self.isConverged(iteration): break util.save_data(self.user_item_nei, '../data/neibor/ft_intsvd_useritemnei_08.pkl')
def init_model(self): super(GEMF, self).init_model() print('starting initialization...') #1、extract user corpus with user's social network - netwalker print('=' * 5 + 'extracting user corpus with users social network' + '=' * 5) ########################## # G = netwalker.load_edgelist_without_weight(self.implict_trust_path, undirected=self.config.undirected) # self.walks = netwalker.build_deepwalk_corpus(G, self.config.number_walks, self.config.path_length, self.config.restart_pro, self.config.random_state) # weight_dic, G = netwalker.load_edgelist_with_weight(self.implict_trust_path, undirected=self.config.undirected) # self.walks = netwalker.build_deepwalk_corpus(G, self.config.number_walks, self.config.path_length, self.config.restart_pro, self.config.random_state) ########################## # weight_dic, G = netwalker.load_edgelist_with_weight(self.implict_trust_path, undirected=self.config.undirected) # self.walks = netwalker.deepwalk_with_alpha(G, weight_dic, self.config.number_walks, self.config.path_length, self.config.restart_pro, self.config.random_state) weight_dic, G = netwalker.load_edgelist_with_weight( self.implict_trust_path, undirected=self.config.undirected) self.walks = netwalker.deepwalk_without_alpha(G, weight_dic, self.config.number_walks, self.config.path_length, self.config.restart_pro, self.config.random_state) # # shuffle the walks np.random.shuffle(self.walks) # cpprint(walks) netwalker.save_walks(self.walks, self.config.walk_result_path) print('=' * 5 + 'generating inverted index...' + '=' * 5) self.inverted_index() # print(self.node_inverted_index) #2、initialize the w and w' in graph embedding print('=' * 5 + 'read social corpus' + '=' * 5) fi = open(self.config.walk_result_path, 'r') # training corpus self.social_vocab = Vocab( fi, self.config.min_count) # user node and their index #social 的用户是否都在ui矩阵中出现,若是子集比较好说,若非子集则需将该用户随机初始化 print('=' * 5 + 'initialize network for word2vec' + '=' * 5) self.reset_index() self.init_net() print('=' * 5 + 'generate the unigram table for word2vec' + '=' * 5) if not os.path.exists(self.config.table_path): # if exists, continue self.table = UnigramTable(self.social_vocab) util.save_data(self.table, self.config.table_path) else: self.table = util.load_data(self.config.table_path)
def build_user_item_sim_CF(self): from collections import defaultdict self.user_sim = SimMatrix() self.item_sim = SimMatrix() self.user_k_neibor = defaultdict(dict) self.item_k_neibor = defaultdict(dict) # compute item-item similarity matrix print('constructing user-user similarity matrix...') # self.user_sim = util.load_data('../data/sim/ft_08_uu_tricf.pkl') for u1 in self.rg.user: for u2 in self.rg.user: if u1 != u2: if self.user_sim.contains(u1, u2): continue sim = pearson_sp(self.rg.get_row(u1), self.rg.get_row(u2)) sim = round(sim, 5) self.user_sim.set(u1, u2, sim) util.save_data(self.user_sim, '../data/sim/ft_08_uu_tricf_cv0.pkl') # compute the k neighbors of user # self.user_k_neibor = util.load_data( # '../data/neibor/ft_08_uu_' + str(self.config.user_near_num) + '_neibor_tricf.pkl') for user in self.rg.user: matchUsers = sorted(self.user_sim[user].items(), key=lambda x: x[1], reverse=True)[:self.config.user_near_num] matchUsers = matchUsers[:self.config.user_near_num] self.user_k_neibor[user] = dict(matchUsers) util.save_data( self.user_k_neibor, '../data/neibor/ft_08_uu_' + str(self.config.user_near_num) + '_neibor_tricf_cv0.pkl') # compute item-item similarity matrix print('constructing item-item similarity matrix...') # self.item_sim = util.load_data('../data/sim/ft_08_ii_tricf.pkl') for i1 in self.rg.item: for i2 in self.rg.item: if i1 != i2: if self.item_sim.contains(i1, i2): continue sim = pearson_sp(self.rg.get_col(i1), self.rg.get_col(i2)) sim = round(sim, 5) self.item_sim.set(i1, i2, sim) util.save_data(self.item_sim, '../data/sim/ft_08_ii_tricf_cv0.pkl') # compute the k neighbors of item # self.item_k_neibor = util.load_data( # '../data/neibor/ft_08_ii_' + str(self.config.item_near_num) + '_neibor_tricf.pkl') for item in self.rg.item: matchItems = sorted(self.item_sim[item].items(), key=lambda x: x[1], reverse=True)[:self.config.item_near_num] matchItems = matchItems[:self.config.item_near_num] self.item_k_neibor[item] = dict(matchItems) util.save_data( self.item_k_neibor, '../data/neibor/ft_08_ii_' + str(self.config.item_near_num) + '_neibor_tricf_cv0.pkl') pass
def build_user_item_sim_CF(self, kfold, user_near_num=50, item_near_num=50, load_save_sim=False): self.rg = RatingGetter(kfold) self.mg = MetaGetter(kfold) from collections import defaultdict # compute item-item similarity matrix print('构建 item-item 相似度矩阵 ...') if load_save_sim: self.item_sim = util.load_data( '../data/sim/%s_08_ii_gemf_cv0.pkl' % self.config.dataset_name) else: # 封装 item 相似度计算 self.item_sim = self.mg.getSimMatrix(jaccard_sim) util.save_data( self.item_sim, '../data/sim/%s_08_ii_gemf_cv0.pkl' % self.config.dataset_name) # compute the k neighbors of item if load_save_sim: self.item_k_neibor = util.load_data( '../data/neibor/%s_08_ii_%s_neibor_gemf_cv0.pkl' % (self.config.dataset_name, item_near_num)) for item in self.mg.item: matchItems = sorted(self.item_sim[item].items(), key=lambda x: x[1], reverse=True)[:item_near_num] matchItems = matchItems[:item_near_num] self.item_k_neibor[item] = dict(matchItems) util.save_data( self.item_k_neibor, '../data/neibor/%s_08_ii_%s_neibor_gemf_cv0.pkl' % (self.config.dataset_name, item_near_num)) # compute user-user similarity matrix print('构建 user-user 相似度矩阵 ...') if load_save_sim: # if True: self.user_sim = util.load_data( '../data/sim/%s_08_uu_gemf_cv0.pkl' % self.config.dataset_name) else: itemNet = {} for item in self.rg.trainSet_i: if len(self.rg.trainSet_i[item]) > 1: itemNet[item] = self.rg.trainSet_i[item] filteredRatings = defaultdict(list) for item in itemNet: for user in itemNet[item]: if itemNet[item][user] > 0: filteredRatings[user].append(item) self.CUNet = defaultdict(list) for user1 in tqdm(filteredRatings): s1 = set(filteredRatings[user1]) for user2 in filteredRatings: if user1 != user2: s2 = set(filteredRatings[user2]) weight = len(s1.intersection(s2)) if weight > 0: self.CUNet[user1] += [user2] print('Generating random deep walks...') self.walks = [] self.visited = defaultdict(dict) for user in tqdm(self.CUNet): for t in range(self.config.walkCount): path = [str(user)] lastNode = user for i in range(1, self.config.walkLength): nextNode = choice(self.CUNet[lastNode]) count = 0 while (nextNode in self.visited[lastNode]): nextNode = choice(self.CUNet[lastNode]) #break infinite loop count += 1 if count == self.config.walkLength: # 10 break path.append(str(nextNode)) self.visited[user][nextNode] = 1 lastNode = nextNode self.walks.append(path) self.model = w2v.Word2Vec(self.walks, size=self.config.walkDim, window=5, min_count=0, iter=3) self.topKSim = defaultdict(dict) i = 0 for u1 in tqdm(self.CUNet): sims = {} for u2 in self.CUNet: if user1 != user2: if self.user_sim.contains(u1, u2): continue wu1 = self.model[str(u1)] wu2 = self.model[str(u2)] sims[u2] = cosine(wu1, wu2) #若为空咋整 self.user_sim.set(u1, u2, sims[u2]) i += 1 if i % 200 == 0: print('progress:', i, '/', len(self.CUNet)) if not os.path.exists('../data/sim'): os.makedirs('../data/sim') print('../data/sim folder has been established.') util.save_data( self.user_sim, '../data/sim/%s_08_uu_gemf_cv0.pkl' % self.config.dataset_name) # compute the k neighbors of user if load_save_sim: self.user_k_neibor = util.load_data( '../data/neibor/%s_08_uu_%s_neibor_gemf_cv0.pkl' % (self.config.dataset_name, user_near_num)) for user in self.rg.user: self.topKSim[u1] = sorted(sims.items(), key=lambda d: d[1], reverse=True)[:self.config.topK] self.topKSim[u1] = self.topKSim[u1][:user_near_num] self.user_k_neibor[user] = dict(self.topKSim[u1]) if not os.path.exists('../data/neibor'): os.makedirs('../data/neibor') print('../data/neibor folder has been established.') util.save_data( self.user_k_neibor, '../data/neibor/%s_08_uu_%s_neibor_gemf_cv0.pkl' % (self.config.dataset_name, user_near_num))
def build_user_item_sim_CF(self, kfold, user_near_num=50, item_near_num=50, load_save_sim=False): self.rg = RatingGetter(kfold) self.mg = MetaGetter(kfold) from collections import defaultdict # compute item-item similarity matrix print('构建 item-item 相似度矩阵 ...') if load_save_sim: self.item_sim = util.load_data( '../data/sim/%s_08_ii_cucmemf_cv0.pkl' % self.config.dataset_name) else: # 封装 item 相似度计算 self.item_sim = self.mg.getSimMatrix(jaccard_sim) util.save_data( self.item_sim, '../data/sim/%s_08_ii_cucmemf_cv0.pkl' % self.config.dataset_name) # compute the k neighbors of item if load_save_sim: self.item_k_neibor = util.load_data( '../data/neibor/%s_08_ii_%s_neibor_cucmemf_cv0.pkl' % (self.config.dataset_name, item_near_num)) for item in self.mg.item: matchItems = sorted(self.item_sim[item].items(), key=lambda x: x[1], reverse=True)[:item_near_num] matchItems = matchItems[:item_near_num] self.item_k_neibor[item] = dict(matchItems) util.save_data( self.item_k_neibor, '../data/neibor/%s_08_ii_%s_neibor_cucmemf_cv0.pkl' % (self.config.dataset_name, item_near_num)) # compute user-user similarity matrix print('构建 user-user 相似度矩阵 ...') if load_save_sim: # if True: self.user_sim = util.load_data( '../data/sim/%s_08_uu_cucmemf_cv0.pkl' % self.config.dataset_name) else: for u1 in tqdm(self.rg.user): for u2 in self.rg.user: if u1 != u2: if self.user_sim.contains(u1, u2): continue # 皮尔逊相似度? 修改为余弦相似度?; sim = pearson_sp(self.rg.get_row(u1), self.rg.get_row(u2)) sim = round(sim, 5) self.user_sim.set(u1, u2, sim) if not os.path.exists('../data/sim'): os.makedirs('../data/sim') print('../data/sim folder has been established.') util.save_data( self.user_sim, '../data/sim/%s_08_uu_cucmemf_cv0.pkl' % self.config.dataset_name) # compute the k neighbors of user if load_save_sim: self.user_k_neibor = util.load_data( '../data/neibor/%s_08_uu_%s_neibor_cucmemf_cv0.pkl' % (self.config.dataset_name, user_near_num)) for user in self.rg.user: matchUsers = sorted(self.user_sim[user].items(), key=lambda x: x[1], reverse=True)[kfold:user_near_num] matchUsers = matchUsers[:user_near_num] self.user_k_neibor[user] = dict(matchUsers) if not os.path.exists('../data/neibor'): os.makedirs('../data/neibor') print('../data/neibor folder has been established.') util.save_data( self.user_k_neibor, '../data/neibor/%s_08_uu_%s_neibor_cucmemf_cv0.pkl' % (self.config.dataset_name, user_near_num))
def build_user_item_sim_CF(self, kfold, user_near_num=50, item_near_num=50, load_save_sim=False): self.rg = RatingGetter(kfold) from collections import defaultdict # compute item-item similarity matrix print('构建 user-user 相似度矩阵 ...') if load_save_sim: self.user_sim = util.load_data( '../data/sim/db_08_uu_tricf_cv0.pkl') else: for u1 in self.rg.user: for u2 in self.rg.user: if u1 != u2: if self.user_sim.contains(u1, u2): continue # 皮尔逊相似度? 修改为余弦相似度; sim = pearson_sp(self.rg.get_row(u1), self.rg.get_row(u2)) sim = round(sim, 5) self.user_sim.set(u1, u2, sim) if not os.path.exists('../data/sim'): os.makedirs('../data/sim') print('../data/sim folder has been established.') util.save_data(self.user_sim, '../data/sim/db_08_uu_tricf_cv0.pkl') # compute the k neighbors of user # self.user_k_neibor = util.load_data( # '../data/neibor/db_08_uu_' + str(user_near_num) + '_neibor_tricf.pkl') for user in self.rg.user: matchUsers = sorted(self.user_sim[user].items(), key=lambda x: x[1], reverse=True)[kfold:user_near_num] matchUsers = matchUsers[:user_near_num] self.user_k_neibor[user] = dict(matchUsers) if not os.path.exists('../data/neibor'): os.makedirs('../data/neibor') print('../data/neibor folder has been established.') util.save_data( self.user_k_neibor, '../data/neibor/db_08_uu_' + str(user_near_num) + '_neibor_tricf_cv0.pkl') # compute item-item similarity matrix print('构建 item-item 相似度矩阵 ...') if load_save_sim: self.item_sim = util.load_data( '../data/sim/db_08_ii_tricf_cv0.pkl') else: for i1 in self.rg.item: for i2 in self.rg.item: if i1 != i2: if self.item_sim.contains(i1, i2): continue # 皮尔逊相似度? 修改为余弦相似度; sim = pearson_sp(self.rg.get_col(i1), self.rg.get_col(i2)) sim = round(sim, 5) self.item_sim.set(i1, i2, sim) util.save_data(self.item_sim, '../data/sim/db_08_ii_tricf_cv0.pkl') # compute the k neighbors of item # self.item_k_neibor = util.load_data( # '../data/neibor/db_08_ii_' + str(item_near_num) + '_neibor_tricf.pkl') for item in self.rg.item: matchItems = sorted(self.item_sim[item].items(), key=lambda x: x[1], reverse=True)[:item_near_num] matchItems = matchItems[:item_near_num] self.item_k_neibor[item] = dict(matchItems) util.save_data( self.item_k_neibor, '../data/neibor/db_08_ii_' + str(item_near_num) + '_neibor_tricf_cv0.pkl') pass