class TrustSVD(MF): """ docstring for TrustSVD implement the TrustSVD Koren Y. Factor in the neighbors: Scalable and accurate collaborative filtering[J]. ACM Transactions on Knowledge Discovery from Data (TKDD), 2010, 4(1): 1. """ def __init__(self): super(TrustSVD, self).__init__() self.config.lr = 0.005 self.config.maxIter = 100 self.config.lambdaP = 1.2 self.config.lambdaQ = 1.2 self.config.lambdaB = 1.2 self.config.lambdaY = 1.2 self.config.lambdaW = 1.2 self.config.lambdaT = 0.9 self.tg = TrustGetter() self.init_model() def init_model(self): super(TrustSVD, self).init_model() self.Bu = np.random.rand(self.rg.get_train_size()[0]) / ( self.config.factor**0.5) # bias value of user self.Bi = np.random.rand(self.rg.get_train_size()[1]) / ( self.config.factor**0.5) # bias value of item self.Y = np.random.rand(self.rg.get_train_size()[1], self.config.factor) / (self.config.factor**0.5 ) # implicit preference self.W = np.random.rand(self.rg.get_train_size()[0], self.config.factor) / (self.config.factor**0.5 ) # implicit preference def train_model(self): iteration = 0 while iteration < self.config.maxIter: self.loss = 0 for index, line in enumerate(self.rg.trainSet()): user, item, rating = line u = self.rg.user[user] i = self.rg.item[item] error = rating - self.predict(user, item) self.loss += error**2 p, q = self.P[u], self.Q[i] nu, sum_y = self.get_sum_y(user) nv, sum_w = self.get_sum_w(user) frac = lambda x: 1.0 / math.sqrt(x) # update latent vectors self.Bu[u] += self.config.lr * ( error - self.config.lambdaB * frac(nu) * self.Bu[u]) self.Bi[i] += self.config.lr * ( error - self.config.lambdaB * frac(nv) * self.Bi[i]) self.Q[i] += self.config.lr * ( error * (p + sum_y + sum_w) - self.config.lambdaQ * frac(nu) * q) followees = self.tg.get_followees(user) ws = np.zeros(self.config.factor) for followee in followees: if self.rg.containsUser(user) and self.rg.containsUser( followee): nw = len(self.tg.get_followers(followee)) vid = self.rg.user[followee] w = self.W[vid] weight = 1 # followees[followee] err = w.dot(p) - weight self.loss += err**2 ws += err * w self.W[vid] += self.config.lr * ( err * frac(nv) * q - self.config.lambdaT * err * p - self.config.lambdaW * frac(nw) * w) # 更新w self.P[u] += self.config.lr * ( error * q - self.config.lambdaT * ws - (self.config.lambdaP * frac(nu) + self.config.lambdaT * frac(nv)) * p) u_items = self.rg.user_rated_items(u) # 更新y for j in u_items: idj = self.rg.item[j] self.Y[idj] += self.config.lr * ( error * frac(nu) * q - self.config.lambdaY * frac(nv) * self.Y[idj]) self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \ + self.config.lambdaB * ( (self.Bu * self.Bu).sum() + (self.Bi * self.Bi).sum()) + self.config.lambdaY * ( self.Y * self.Y).sum() + self.config.lambdaW * (self.W * self.W).sum() iteration += 1 if self.isConverged(iteration): break def predict(self, u, i): if self.rg.containsUser(u) and self.rg.containsItem(i): _, sum_y = self.get_sum_y(u) _, sum_w = self.get_sum_w(u) u = self.rg.user[u] i = self.rg.item[i] return self.Q[i].dot( self.P[u] + sum_y + sum_w) + self.rg.globalMean + self.Bi[i] + self.Bu[u] else: return self.rg.globalMean def get_sum_y(self, u): u_items = self.rg.user_rated_items(u) nu = len(u_items) sum_y = np.zeros(self.config.factor) for j in u_items: sum_y += self.Y[self.rg.item[j]] sum_y /= (np.sqrt(nu)) return nu, sum_y def get_sum_w(self, u): followees = self.tg.get_followees(u) nu = 1 sum_w = np.zeros(self.config.factor) for v in followees.keys(): if self.rg.containsUser(v): nu += 1 sum_w += self.W[self.rg.user[v]] sum_w /= np.sqrt(nu) return nu, sum_w
class SocialReg(MF): """ docstring for SocialReg Ma H, Zhou D, Liu C, et al. Recommender systems with social regularization[C]//Proceedings of the fourth ACM international conference on Web search and data mining. ACM, 2011: 287-296. """ def __init__(self): super(SocialReg, self).__init__() # self.config.lambdaP = 0.001 # self.config.lambdaQ = 0.001 self.config.alpha = 0.1 self.tg = TrustGetter() # self.init_model() def init_model(self, k): super(SocialReg, self).init_model(k) from collections import defaultdict self.user_sim = SimMatrix() print('constructing user-user similarity matrix...') # self.user_sim = util.load_data('../data/sim/ft_cf_soreg08_cv1.pkl') for u in self.rg.user: for f in self.tg.get_followees(u): if self.user_sim.contains(u, f): continue sim = self.get_sim(u, f) self.user_sim.set(u, f, sim) # util.save_data(self.user_sim,'../data/sim/ft_cf_soreg08.pkl') def get_sim(self, u, k): sim = (pearson_sp(self.rg.get_row(u), self.rg.get_row(k)) + 1.0) / 2.0 # fit the value into range [0.0,1.0] return sim def train_model(self, k): super(SocialReg, self).train_model(k) iteration = 0 while iteration < self.config.maxIter: self.loss = 0 for index, line in enumerate(self.rg.trainSet()): user, item, rating = line u = self.rg.user[user] i = self.rg.item[item] error = rating - self.predict(user, item) self.loss += 0.5 * error**2 p, q = self.P[u], self.Q[i] social_term_p, social_term_loss = np.zeros( (self.config.factor)), 0.0 followees = self.tg.get_followees(user) for followee in followees: if self.rg.containsUser(followee): s = self.user_sim[user][followee] uf = self.P[self.rg.user[followee]] social_term_p += s * (p - uf) social_term_loss += s * ((p - uf).dot(p - uf)) social_term_m = np.zeros((self.config.factor)) followers = self.tg.get_followers(user) for follower in followers: if self.rg.containsUser(follower): s = self.user_sim[user][follower] ug = self.P[self.rg.user[follower]] social_term_m += s * (p - ug) # update latent vectors self.P[u] += self.config.lr * ( error * q - self.config.alpha * (social_term_p + social_term_m) - self.config.lambdaP * p) self.Q[i] += self.config.lr * (error * p - self.config.lambdaQ * q) self.loss += 0.5 * self.config.alpha * social_term_loss self.loss += 0.5 * self.config.lambdaP * (self.P * self.P).sum( ) + 0.5 * self.config.lambdaQ * (self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break
class TrustWalker(MF): """ docstring for TrustWalker Jamali M, Ester M. Trustwalker: a random walk model for combining trust-based and item-based recommendation[C]//Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2009: 397-406. """ def __init__(self): super(TrustWalker, self).__init__() np.random.seed(0) self.tg = TrustGetter() self.init_model() def init_model(self): self.p = 1.0 pass def single_random_walk(self, user=5, item=3, k=0): print(user, item, k) print('%s%d' % ('k=', k)) if self.rg.containsUserItem( user, item ): # judge whether user u rate on item i, if so, return the rating. return self.p, self.rg.trainSet_u[user][item] else: rand_num = np.random.rand(1) # get random number # compute the stop probability stop_prob, max_item, p_j = self.get_stop_prob(user, item, k) print('stop probability:' + str(stop_prob)) # print('%s%d'%('stop probbability:',stop_prob)) # the probability of stopping walk print(rand_num, stop_prob) if rand_num < stop_prob or k >= 6: # no more than six steps # get the most similar item j, and return r(u,j) rating = self.rg.trainSet_u[user][max_item] self.p = self.p * stop_prob * p_j return (self.p, rating) # compute the probability of next random walk else: # get next user for random walk next_user, tu_prob = self.get_followee_user( user) # if user don't have friends in trust network print('next step user is:' + str(next_user)) if next_user == None: # if no next user _, max_item, p_j = self.get_stop_prob( user, item, -1) # no sense if k=-1 if max_item == 0: # if no next user and no similar users return self.p, 0 rating = self.rg.trainSet_u[user][max_item] self.p = self.p * p_j return (self.p, rating) self.p = self.p * (1 - stop_prob) * tu_prob k += 1 return self.single_random_walk(user=next_user, item=item, k=k) # 特么忘了return 当然返回None啦 def get_followee_user(self, user): p = 0 followees = list(self.tg.get_followees(user)) num_foll = len(followees) if num_foll == 0: return None, 0 # pick one randomly ind = np.random.randint(num_foll) p = 1.0 / num_foll return followees[ind], p # def get_max_item(self,user,item): # u_items=self.rg.user_rated_items(user) # sum_sim=0.0 # max_sim=0 # max_prob_item=0 # if len(u_items)==0: # return 0,0 # print(u_items) # for i,u_item in enumerate(u_items): # sim=self.get_sim(item,u_item) # sum_sim+=sim # if sim>max_sim: # max_sim=sim # max_prob_item=u_item # return max_prob_item,max_sim/sum_sim def get_stop_prob(self, user, item, k): p = 1.0 sum_sim = 0.0 max_sim = 0 max_prob = 0.0 max_prob_item = 0 if k == 0: # if k==0,the stop probability=0 self.p = 1.0 return 0, 0, 0 param = sigmoid_2(k) u_items = self.rg.user_rated_items(user) print(u_items) if len(u_items) == 0: return 0, 0, 0 for u_item in u_items: sim = self.get_sim(item, u_item) sum_sim += sim prob = sim * param if prob > max_prob: max_sim = sim max_prob = prob max_prob_item = u_item return max_prob, max_prob_item, max_sim / sum_sim # 返回停止概率,最相似item,选择item j的概率 def get_sim(self, item1, item2): return cosine_improved_sp(self.rg.get_col(item1), self.rg.get_col(item2))
class SocialRec(MF): """ docstring for SocialRec Ma H, Yang H, Lyu M R, et al. Sorec: social recommendation using probabilistic matrix factorization[C]//Proceedings of the 17th ACM conference on Information and knowledge management. ACM, 2008: 931-940. """ def __init__(self): super(SocialRec, self).__init__() # self.config.lr=0.0001 self.config.alpha = 0.1 self.config.lambdaZ = 0.01 self.tg = TrustGetter() # self.init_model() def init_model(self, k): super(SocialRec, self).init_model(k) self.Z = np.random.rand( self.rg.get_train_size()[0], self.config.factor) / ( self.config.factor**0.5) # latent user social matrix def train_model(self, k): super(SocialRec, self).train_model(k) iteration = 0 while iteration < self.config.maxIter: # tempP=np.zeros((self.rg.get_train_size()[0], self.config.factor)) self.loss = 0 for index, line in enumerate(self.rg.trainSet()): user, item, rating = line u = self.rg.user[user] i = self.rg.item[item] error = rating - self.predict(user, item) self.loss += error**2 p, q = self.P[u], self.Q[i] followees = self.tg.get_followees(user) zs = np.zeros(self.config.factor) for followee in followees: if self.rg.containsUser(user) and self.rg.containsUser( followee): vminus = len( self.tg.get_followers(followee)) # ~ d - (k) uplus = len(self.tg.get_followees(user)) # ~ d + (i) import math try: weight = math.sqrt(vminus / (uplus + vminus + 0.0)) except ZeroDivisionError: weight = 1 zid = self.rg.user[followee] z = self.Z[zid] err = weight - z.dot(p) self.loss += err**2 zs += -1.0 * err * p self.Z[zid] += self.config.lr * ( self.config.alpha * err * p - self.config.lambdaZ * z) self.P[u] += self.config.lr * (error * q - self.config.alpha * zs - self.config.lambdaP * p) self.Q[i] += self.config.lr * (error * p - self.config.lambdaQ * q) self.loss += self.config.lambdaP * (self.P * self.P).sum() + self.config.lambdaQ * (self.Q * self.Q).sum() \ + self.config.lambdaZ * (self.Z * self.Z).sum() iteration += 1 if self.isConverged(iteration): break
class RSTE(MF): """ docstring for RSTE Ma H, King I, Lyu M R. Learning to recommend with social trust ensemble[C]//Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval. ACM, 2009: 203-210. """ def __init__(self): super(RSTE, self).__init__() # self.maxIter=700 self.config.alpha = 0.5 # self.config.lambdaH=0.01 self.tg = TrustGetter() # self.init_model() def init_model(self, k): super(RSTE, self).init_model(k) # from collections import defaultdict # self.Sim = defaultdict(dict) # print('constructing similarity matrix...') # for user in self.rg.user: # for k in self.tg.get_followees(user): # if user in self.Sim and k in self.Sim[user]: # pass # else: # self.Sim[user][k]=self.get_sim(user,k) def train_model(self, k): super(RSTE, self).train_model(k) iteration = 0 while iteration < self.config.maxIter: self.loss = 0 for index, line in enumerate(self.rg.trainSet()): user, item, rating = line error = rating - self.predict(user, item) self.loss += error**2 social_term, _ = self.get_social_term_Q(user, item) u = self.rg.user[user] i = self.rg.item[item] p, q = self.P[u], self.Q[i] # update latent vectors self.P[u] += self.config.lr * (self.config.alpha * error * q + \ (1 - self.config.alpha) * self.get_social_term_P(user, item) - self.config.lambdaP * p) self.Q[i] += self.config.lr * (error * (self.config.alpha * p + (1 - self.config.alpha) * social_term) \ - self.config.lambdaQ * q) self.loss += self.config.lambdaP * (self.P * self.P).sum( ) + self.config.lambdaQ * (self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break def get_social_term_Q(self, user, item): if self.rg.containsUser(user) and self.rg.containsItem(item): i = self.rg.item[item] u = self.rg.user[user] social_term_loss = 0 social_term = np.zeros(self.config.factor) followees = self.tg.get_followees(user) weights = [] indexes = [] for followee in followees: if self.rg.containsUser(followee): # followee is in rating set indexes.append(self.rg.user[followee]) weights.append(followees[followee]) weights = np.array(weights) qw = weights.sum() indexes = np.array(indexes) if qw != 0: social_term = weights.dot(self.P[indexes]) social_term /= qw social_term_loss += weights.dot( (self.P[indexes].dot(self.Q[i]))) / qw return social_term, social_term_loss def get_social_term_P(self, user, item): i = self.rg.item[item] # social_term_loss = 0 social_term = np.zeros(self.config.factor) followers = self.tg.get_followers(user) weights = [] indexes = [] errs = [] for follower in followers: if self.rg.containsUser(follower) and self.rg.containsItem( item) and self.rg.containsUserItem( follower, item): # followee is in rating set indexes.append(self.rg.user[follower]) weights.append(followers[follower]) errs.append(self.rg.trainSet_u[follower][item] - self.predict(follower, item)) weights = np.array(weights) indexes = np.array(indexes) errs = np.array(errs) qw = weights.sum() if qw != 0: for es in errs * weights: social_term += es * self.Q[i] social_term /= qw # social_term_loss += weights.dot((self.P[indexes].dot(self.Q[i]))) return social_term def predict(self, u, i): if self.rg.containsUser(u) and self.rg.containsItem(i): _, social_term_loss = self.get_social_term_Q(u, i) i = self.rg.item[i] u = self.rg.user[u] if social_term_loss != 0: return self.config.alpha * self.P[u].dot( self.Q[i]) + (1 - self.config.alpha) * social_term_loss else: return self.P[u].dot(self.Q[i]) else: return self.rg.globalMean
class SocialMF(MF): """ docstring for SocialMF Jamali M, Ester M. A matrix factorization technique with trust propagation for recommendation in social networks[C]//Proceedings of the fourth ACM conference on Recommender systems. ACM, 2010: 135-142. """ def __init__(self): super(SocialMF, self).__init__() # self.config.lr=0.0001 self.config.alpha = 1 # 0.8 rmse=0.87605 self.tg = TrustGetter() # loading trust data self.init_model() def train_model(self): iteration = 0 while iteration < self.config.maxIter: self.loss = 0 for index, line in enumerate(self.rg.trainSet()): user, item, rating = line u = self.rg.user[user] i = self.rg.item[item] error = rating - self.predict(user, item) self.loss += error**2 p, q = self.P[u], self.Q[i] total_weight = 0.0 social_term = np.zeros(self.config.factor) followees = self.tg.get_followees(user) # 获得u所关注的用户列表 for followee in followees: weight = followees[followee] if self.rg.containsUser(followee): uk = self.P[self.rg.user[followee]] social_term += weight * uk total_weight += weight if total_weight != 0: social_term = p - social_term / total_weight social_term_a = np.zeros(self.config.factor) total_count = 0 followers = self.tg.get_followers(user) for follower in followers: if self.rg.containsUser(follower): total_count += 1 uv = self.P[self.rg.user[follower]] social_term_m = np.zeros(self.config.factor) total_weight = 0.0 followees = self.tg.get_followees( follower) # 获得u所关注的用户列表 for followee in followees: weight = followees[followee] if self.rg.containsUser(followee): uw = self.P[self.rg.user[followee]] social_term_m += weight * uw total_weight += weight if total_weight != 0: social_term_a += uv - social_term_m / total_weight if total_count != 0: social_term_a /= total_count # update latent vectors self.P[u] += self.config.lr * ( error * q - self.config.alpha * social_term + self.config.alpha * social_term_a - self.config.lambdaP * p ) # self.Q[i] += self.config.lr * (error * p - self.config.lambdaQ * q) self.loss += self.config.alpha * social_term.dot( social_term).sum() self.loss += self.config.lambdaP * (self.P * self.P).sum( ) + self.config.lambdaQ * (self.Q * self.Q).sum() iteration += 1 if self.isConverged(iteration): break
class TrustWalker(MF): """ docstring for TrustWalker Jamali M, Ester M. Trustwalker: a random walk model for combining trust-based and item-based recommendation[C]//Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining. ACM, 2009: 397-406. """ def __init__(self): super(TrustWalker, self).__init__() np.random.seed(0) self.tg = TrustGetter() self.init_model() def init_model(self): self.p = 1.0 pass def single_random_walk(self, user=5, item=3, k=0): print(user, item, k) print('%s%d' % ('k=', k)) if self.rg.containsUserItem(user, item): # 判断用户user是否对item评分,若评分则返回r return self.p, self.rg.trainSet_u[user][item] else: rand_num = np.random.rand(1) # 获取随机数 # 计算停止概率 stop_prob, max_item, p_j = self.get_stop_prob(user, item, k) print('stop probability:' + str(stop_prob)) # print('%s%d'%('stop probbability:',stop_prob)) # 停止游走的概率 print(rand_num, stop_prob) if rand_num < stop_prob or k >= 6: # (不超过6步) # 获取相似项目j,然后返回r(u,j) rating = self.rg.trainSet_u[user][max_item] self.p = self.p * stop_prob * p_j return (self.p, rating) # 获取继续游走的概率 else: # 得到下一个游走的用户v next_user, tu_prob = self.get_followee_user( user) # 对于user在信任网络中没有朋友的情况 print('next step user is:' + str(next_user)) if next_user == None: # 不存在下一个用户的情况 _, max_item, p_j = self.get_stop_prob(user, item, -1) # k=-1无意义 if max_item == 0: # 没有一个用户并且不存在相似的用户的情况 return self.p, 0 rating = self.rg.trainSet_u[user][max_item] self.p = self.p * p_j return (self.p, rating) self.p = self.p * (1 - stop_prob) * tu_prob k += 1 return self.single_random_walk(user=next_user, item=item, k=k) # 特么忘了return 当然返回None啦 def get_followee_user(self, user): p = 0 followees = list(self.tg.get_followees(user)) num_foll = len(followees) if num_foll == 0: return None, 0 # 随机选一个 ind = np.random.randint(num_foll) p = 1.0 / num_foll return followees[ind], p # def get_max_item(self,user,item): # u_items=self.rg.user_rated_items(user) # sum_sim=0.0 # max_sim=0 # max_prob_item=0 # if len(u_items)==0: # return 0,0 # print(u_items) # for i,u_item in enumerate(u_items): # sim=self.get_sim(item,u_item) # sum_sim+=sim # if sim>max_sim: # max_sim=sim # max_prob_item=u_item # return max_prob_item,max_sim/sum_sim def get_stop_prob(self, user, item, k): p = 1.0 sum_sim = 0.0 # 用于计算P(Y=j)-分母 max_sim = 0 # 用于计算P(Y=j)-分子 max_prob = 0.0 # 用于计算停止概率 max_prob_item = 0 # 用于方便选择相似item-j if k == 0: # k==0时,停留概率为0 self.p = 1.0 return 0, 0, 0 param = sigmoid_2(k) u_items = self.rg.user_rated_items(user) print(u_items) if len(u_items) == 0: return 0, 0, 0 for u_item in u_items: sim = self.get_sim(item, u_item) sum_sim += sim prob = sim * param if prob > max_prob: max_sim = sim max_prob = prob max_prob_item = u_item return max_prob, max_prob_item, max_sim / sum_sim # 返回停止概率,最相似item,选择item j的概率 def get_sim(self, item1, item2): return cosine_improved_sp(self.rg.get_col(item1), self.rg.get_col(item2))