def build_user_item_sim_CF(self): from collections import defaultdict self.user_sim = SimMatrix() self.item_sim = SimMatrix() self.user_k_neibor = defaultdict(dict) self.item_k_neibor = defaultdict(dict) # compute item-item similarity matrix print('constructing user-user similarity matrix...') # self.user_sim = util.load_data('../data/sim/ft_08_uu_tricf.pkl') for u1 in self.rg.user: for u2 in self.rg.user: if u1 != u2: if self.user_sim.contains(u1, u2): continue sim = pearson_sp(self.rg.get_row(u1), self.rg.get_row(u2)) sim = round(sim, 5) self.user_sim.set(u1, u2, sim) util.save_data(self.user_sim, '../data/sim/ft_08_uu_tricf_cv0.pkl') # compute the k neighbors of user # self.user_k_neibor = util.load_data( # '../data/neibor/ft_08_uu_' + str(self.config.user_near_num) + '_neibor_tricf.pkl') for user in self.rg.user: matchUsers = sorted(self.user_sim[user].items(), key=lambda x: x[1], reverse=True)[:self.config.user_near_num] matchUsers = matchUsers[:self.config.user_near_num] self.user_k_neibor[user] = dict(matchUsers) util.save_data( self.user_k_neibor, '../data/neibor/ft_08_uu_' + str(self.config.user_near_num) + '_neibor_tricf_cv0.pkl') # compute item-item similarity matrix print('constructing item-item similarity matrix...') # self.item_sim = util.load_data('../data/sim/ft_08_ii_tricf.pkl') for i1 in self.rg.item: for i2 in self.rg.item: if i1 != i2: if self.item_sim.contains(i1, i2): continue sim = pearson_sp(self.rg.get_col(i1), self.rg.get_col(i2)) sim = round(sim, 5) self.item_sim.set(i1, i2, sim) util.save_data(self.item_sim, '../data/sim/ft_08_ii_tricf_cv0.pkl') # compute the k neighbors of item # self.item_k_neibor = util.load_data( # '../data/neibor/ft_08_ii_' + str(self.config.item_near_num) + '_neibor_tricf.pkl') for item in self.rg.item: matchItems = sorted(self.item_sim[item].items(), key=lambda x: x[1], reverse=True)[:self.config.item_near_num] matchItems = matchItems[:self.config.item_near_num] self.item_k_neibor[item] = dict(matchItems) util.save_data( self.item_k_neibor, '../data/neibor/ft_08_ii_' + str(self.config.item_near_num) + '_neibor_tricf_cv0.pkl') pass
def init_model(self): self.item_sim = SimMatrix() for i_test in self.rg.testSet_i: for i_train in self.rg.item: if i_test != i_train: if self.item_sim.contains(i_test, i_train): continue sim = pearson_sp(self.rg.get_col(i_test), self.rg.get_col(i_train)) self.item_sim.set(i_test, i_train, sim)
def init_model(self): self.user_sim = SimMatrix() for u_test in self.rg.testSet_u: for u_train in self.rg.user: if u_test != u_train: if self.user_sim.contains(u_test, u_train): continue sim = pearson_sp(self.rg.get_row(u_test), self.rg.get_row(u_train)) self.user_sim.set(u_test, u_train, sim)
def get_neighbor(self, user, item): if user in self.user_item_nei and item in self.user_item_nei[user]: return self.user_item_nei[user][item] items = self.rg.user_rated_items(user) u_item_d = {} for u_item in items: if item != u_item: sim = pearson_sp(self.rg.get_col(item), self.rg.get_col(u_item)) u_item_d[u_item] = round(sim, 4) matchItems = sorted(u_item_d.items(), key=lambda x: x[1], reverse=True)[:self.config.item_near_num] matchItems = list(zip(*matchItems)) if len(matchItems) > 0: self.user_item_nei[user][item] = matchItems[0] return matchItems[0] else: return []
def get_sim(self, u, k): sim = (pearson_sp(self.rg.get_row(u), self.rg.get_row(k)) + 1.0) / 2.0 # fit the value into range [0.0,1.0] return sim
def build_user_item_sim_CF(self, kfold, user_near_num=50, item_near_num=50, load_save_sim=False): self.rg = RatingGetter(kfold) self.mg = MetaGetter(kfold) from collections import defaultdict # compute item-item similarity matrix print('构建 item-item 相似度矩阵 ...') if load_save_sim: self.item_sim = util.load_data( '../data/sim/%s_08_ii_cucmemf_cv0.pkl' % self.config.dataset_name) else: # 封装 item 相似度计算 self.item_sim = self.mg.getSimMatrix(jaccard_sim) util.save_data( self.item_sim, '../data/sim/%s_08_ii_cucmemf_cv0.pkl' % self.config.dataset_name) # compute the k neighbors of item if load_save_sim: self.item_k_neibor = util.load_data( '../data/neibor/%s_08_ii_%s_neibor_cucmemf_cv0.pkl' % (self.config.dataset_name, item_near_num)) for item in self.mg.item: matchItems = sorted(self.item_sim[item].items(), key=lambda x: x[1], reverse=True)[:item_near_num] matchItems = matchItems[:item_near_num] self.item_k_neibor[item] = dict(matchItems) util.save_data( self.item_k_neibor, '../data/neibor/%s_08_ii_%s_neibor_cucmemf_cv0.pkl' % (self.config.dataset_name, item_near_num)) # compute user-user similarity matrix print('构建 user-user 相似度矩阵 ...') if load_save_sim: # if True: self.user_sim = util.load_data( '../data/sim/%s_08_uu_cucmemf_cv0.pkl' % self.config.dataset_name) else: for u1 in tqdm(self.rg.user): for u2 in self.rg.user: if u1 != u2: if self.user_sim.contains(u1, u2): continue # 皮尔逊相似度? 修改为余弦相似度?; sim = pearson_sp(self.rg.get_row(u1), self.rg.get_row(u2)) sim = round(sim, 5) self.user_sim.set(u1, u2, sim) if not os.path.exists('../data/sim'): os.makedirs('../data/sim') print('../data/sim folder has been established.') util.save_data( self.user_sim, '../data/sim/%s_08_uu_cucmemf_cv0.pkl' % self.config.dataset_name) # compute the k neighbors of user if load_save_sim: self.user_k_neibor = util.load_data( '../data/neibor/%s_08_uu_%s_neibor_cucmemf_cv0.pkl' % (self.config.dataset_name, user_near_num)) for user in self.rg.user: matchUsers = sorted(self.user_sim[user].items(), key=lambda x: x[1], reverse=True)[kfold:user_near_num] matchUsers = matchUsers[:user_near_num] self.user_k_neibor[user] = dict(matchUsers) if not os.path.exists('../data/neibor'): os.makedirs('../data/neibor') print('../data/neibor folder has been established.') util.save_data( self.user_k_neibor, '../data/neibor/%s_08_uu_%s_neibor_cucmemf_cv0.pkl' % (self.config.dataset_name, user_near_num))
def build_user_item_sim_CF(self, kfold, user_near_num=50, item_near_num=50, load_save_sim=False): self.rg = RatingGetter(kfold) from collections import defaultdict # compute item-item similarity matrix print('构建 user-user 相似度矩阵 ...') if load_save_sim: self.user_sim = util.load_data( '../data/sim/db_08_uu_tricf_cv0.pkl') else: for u1 in self.rg.user: for u2 in self.rg.user: if u1 != u2: if self.user_sim.contains(u1, u2): continue # 皮尔逊相似度? 修改为余弦相似度; sim = pearson_sp(self.rg.get_row(u1), self.rg.get_row(u2)) sim = round(sim, 5) self.user_sim.set(u1, u2, sim) if not os.path.exists('../data/sim'): os.makedirs('../data/sim') print('../data/sim folder has been established.') util.save_data(self.user_sim, '../data/sim/db_08_uu_tricf_cv0.pkl') # compute the k neighbors of user # self.user_k_neibor = util.load_data( # '../data/neibor/db_08_uu_' + str(user_near_num) + '_neibor_tricf.pkl') for user in self.rg.user: matchUsers = sorted(self.user_sim[user].items(), key=lambda x: x[1], reverse=True)[kfold:user_near_num] matchUsers = matchUsers[:user_near_num] self.user_k_neibor[user] = dict(matchUsers) if not os.path.exists('../data/neibor'): os.makedirs('../data/neibor') print('../data/neibor folder has been established.') util.save_data( self.user_k_neibor, '../data/neibor/db_08_uu_' + str(user_near_num) + '_neibor_tricf_cv0.pkl') # compute item-item similarity matrix print('构建 item-item 相似度矩阵 ...') if load_save_sim: self.item_sim = util.load_data( '../data/sim/db_08_ii_tricf_cv0.pkl') else: for i1 in self.rg.item: for i2 in self.rg.item: if i1 != i2: if self.item_sim.contains(i1, i2): continue # 皮尔逊相似度? 修改为余弦相似度; sim = pearson_sp(self.rg.get_col(i1), self.rg.get_col(i2)) sim = round(sim, 5) self.item_sim.set(i1, i2, sim) util.save_data(self.item_sim, '../data/sim/db_08_ii_tricf_cv0.pkl') # compute the k neighbors of item # self.item_k_neibor = util.load_data( # '../data/neibor/db_08_ii_' + str(item_near_num) + '_neibor_tricf.pkl') for item in self.rg.item: matchItems = sorted(self.item_sim[item].items(), key=lambda x: x[1], reverse=True)[:item_near_num] matchItems = matchItems[:item_near_num] self.item_k_neibor[item] = dict(matchItems) util.save_data( self.item_k_neibor, '../data/neibor/db_08_ii_' + str(item_near_num) + '_neibor_tricf_cv0.pkl') pass
def get_sim(self, u, k): sim = (pearson_sp(self.rg.get_row(u), self.rg.get_row(k)) + 1.0) / 2.0 # 为了让范围在[0,1] +1.0)/2.0 0.83626 return sim