class TopN(BaseEstimator): def __init__(self, n=5): print 'topN begin' self.n = n def gen_items_popular(self, trainSamples, trainTargets, hasTimes=False): self.dataModel = MemeryDataModel(trainSamples, trainTargets) itempopular = np.zeros(self.dataModel.getItemsNum()) uids = self.dataModel.getData().nonzero()[0] iids = self.dataModel.getData().nonzero()[1] for i in range(len(iids)): iid = iids[i] itempopular[iid] += 1 self.popItems = itempopular def predict(self, testSamples): recommend_lists = [] for user_item in testSamples: if self.dataModel.getIidByItem(user_item[1]) in self.topN[:self.n]: recommend_lists.append(1) else: recommend_lists.append(0) return recommend_lists def fit(self, trainSamples, trainTargets): #print trainSamples, trainTargets #print len(trainSamples), len(trainTargets) self.gen_items_popular(trainSamples, trainTargets) self.topN = np.argsort(np.array(self.popItems))[-1::-1] return self def recommend(self, uid): return [self.dataModel.getItemByIid(i) for i in self.topN[:self.n]] def score(self, testSamples, trueLabels): #print testSamples #print len(testSamples) trueList = [] recommendList = [] user_unique = list(set(np.array(testSamples)[:, 0])) for u in user_unique: uTrueIndex = np.argwhere(np.array(testSamples)[:, 0] == u)[:, 0] #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])] true = list(np.array(testSamples)[uTrueIndex][:, 1]) trueList.append(true) pre = [self.dataModel.getItemByIid(i) for i in self.topN[:self.n]] recommendList.append(pre) e = Eval() result = e.evalAll(recommendList, trueList) print 'TopN result:' + '(' + str(self.get_params()) + ')' + str( (result)['F1']) return (result)['F1']
class TopN(BaseEstimator): def __init__(self, n=5): print 'topN begin' self.n = n def gen_items_popular(self, trainSamples, trainTargets, hasTimes=False): self.dataModel = MemeryDataModel(trainSamples, trainTargets) itempopular = np.zeros(self.dataModel.getItemsNum()) uids = self.dataModel.getData().nonzero()[0] iids = self.dataModel.getData().nonzero()[1] for i in range(len(iids)): iid = iids[i] itempopular[iid] += 1 self.popItems = itempopular def predict(self, testSamples): recommend_lists = [] for user_item in testSamples: if self.dataModel.getIidByItem(user_item[1]) in self.topN[:self.n]: recommend_lists.append(1) else: recommend_lists.append(0) return recommend_lists def fit(self, trainSamples, trainTargets): #print trainSamples, trainTargets #print len(trainSamples), len(trainTargets) self.gen_items_popular(trainSamples, trainTargets) self.topN = np.argsort(np.array(self.popItems))[-1::-1] return self def recommend(self, uid): return [self.dataModel.getItemByIid(i) for i in self.topN[:self.n]] def score(self, testSamples, trueLabels): #print testSamples #print len(testSamples) trueList = [] recommendList= [] user_unique = list(set(np.array(testSamples)[:,0])) for u in user_unique: uTrueIndex = np.argwhere(np.array(testSamples)[:,0] == u)[:,0] #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])] true = list(np.array(testSamples)[uTrueIndex][:,1]) trueList.append(true) pre = [self.dataModel.getItemByIid(i) for i in self.topN[:self.n]] recommendList.append(pre) e = Eval() result = e.evalAll(recommendList, trueList) print 'TopN result:'+'('+str(self.get_params())+')'+str((result)['F1']) return (result)['F1']
class LFM(BaseEstimator): def __init__(self, n=5, factors=25, learningrate=0.05, userregular=0.0001, itemregular=0.0001, iter = 10): print 'lfm begin' self.factors = factors self.n = n self.learningrate = learningrate self.userregular = userregular self.itemregular = itemregular self.iter = iter def predict(self, testSamples): recList = [] for user_item in testSamples: uid = self.dataModel.getUidByUser(user_item[0]) recList.append(self.recommend(uid)) return recList def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets) self.mu = np.array(trainTargets).mean() self.bu = np.zeros(self.dataModel.getUsersNum()) self.bi = np.zeros(self.dataModel.getItemsNum()) temp = math.sqrt(self.factors) self.qi = [[(0.1 * random.random() / temp) for j in range(self.factors)] for i in range(self.dataModel.getItemsNum())] self.pu = [[(0.1 * random.random() / temp) for j in range(self.factors)] for i in range(self.dataModel.getUsersNum())] lineData = self.dataModel.getLineData() lengthOfTrain = len(lineData) for step in range(self.iter): rmse_sum = 0.0 hash = np.random.permutation(lengthOfTrain) for j in range(lengthOfTrain): n = hash[j] row = lineData[n] uid = self.dataModel.getUidByUser(row[0]) iid = self.dataModel.getIidByItem(row[1]) rating = row[2] #rating = 1 eui = rating - self.predict_single(uid, iid) rmse_sum += eui**2 self.bu[uid] += self.learningrate*(eui-self.userregular*self.bu[uid]) self.bi[iid] += self.learningrate*(eui-self.itemregular*self.bi[iid]) temp = self.qi[iid] self.qi[iid] += self.learningrate*(np.dot(eui, self.pu[uid]) - np.dot(self.itemregular, self.qi[iid])) self.pu[uid] += self.learningrate*(np.dot(eui, temp) - np.dot(self.userregular, self.pu[uid])) self.learningrate = self.learningrate * 0.93 def predict_single(self, uid, iid): ans = self.mu + self.bi[iid] + self.bu[uid] + np.dot(self.qi[iid], self.pu[uid]) if ans > 5: return 5 elif ans < 1: return 1 return ans def recommend(self, u): uid = self.dataModel.getUidByUser(u) if uid == -1: print 'not in test' return [] else: predict_scores = [] for i in range(self.dataModel.getItemsNum()): predict_scores.append(self.predict_single(uid, i)) topN = np.argsort(np.array(predict_scores))[-1:-self.n-1:-1] return [self.dataModel.getItemByIid(i) for i in topN] def score(self, testSamples, trueLabels): print 'LFM scoring ...' trueList = [] recommendList= [] user_unique = list(set(np.array(testSamples)[:,0])) for u in user_unique: uTrueIndex = np.argwhere(np.array(testSamples)[:,0] == u)[:,0] #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])] true = list(np.array(testSamples)[uTrueIndex][:,1]) trueList.append(true) pre = self.recommend(u) recommendList.append(pre) e = Eval() result = e.evalAll(recommendList, trueList) print 'LFM result:'+ '('+str(self.get_params())+')'+str((result)['F1']) return (result)['F1']
class LFM(BaseEstimator): def __init__(self, n=5, factors=25, learningrate=0.05, userregular=0.0001, itemregular=0.0001, iter=10): print 'lfm begin' self.factors = factors self.n = n self.learningrate = learningrate self.userregular = userregular self.itemregular = itemregular self.iter = iter def predict(self, testSamples): recList = [] for user_item in testSamples: uid = self.dataModel.getUidByUser(user_item[0]) recList.append(self.recommend(uid)) return recList def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets) self.mu = np.array(trainTargets).mean() self.bu = np.zeros(self.dataModel.getUsersNum()) self.bi = np.zeros(self.dataModel.getItemsNum()) temp = math.sqrt(self.factors) self.qi = [[(0.1 * random.random() / temp) for j in range(self.factors)] for i in range(self.dataModel.getItemsNum())] self.pu = [[(0.1 * random.random() / temp) for j in range(self.factors)] for i in range(self.dataModel.getUsersNum())] lineData = self.dataModel.getLineData() lengthOfTrain = len(lineData) for step in range(self.iter): rmse_sum = 0.0 hash = np.random.permutation(lengthOfTrain) for j in range(lengthOfTrain): n = hash[j] row = lineData[n] uid = self.dataModel.getUidByUser(row[0]) iid = self.dataModel.getIidByItem(row[1]) rating = row[2] #rating = 1 eui = rating - self.predict_single(uid, iid) rmse_sum += eui**2 self.bu[uid] += self.learningrate * ( eui - self.userregular * self.bu[uid]) self.bi[iid] += self.learningrate * ( eui - self.itemregular * self.bi[iid]) temp = self.qi[iid] self.qi[iid] += self.learningrate * ( np.dot(eui, self.pu[uid]) - np.dot(self.itemregular, self.qi[iid])) self.pu[uid] += self.learningrate * ( np.dot(eui, temp) - np.dot(self.userregular, self.pu[uid])) self.learningrate = self.learningrate * 0.93 def predict_single(self, uid, iid): ans = self.mu + self.bi[iid] + self.bu[uid] + np.dot( self.qi[iid], self.pu[uid]) if ans > 5: return 5 elif ans < 1: return 1 return ans def recommend(self, u): uid = self.dataModel.getUidByUser(u) if uid == -1: print 'not in test' return [] else: predict_scores = [] for i in range(self.dataModel.getItemsNum()): predict_scores.append(self.predict_single(uid, i)) topN = np.argsort(np.array(predict_scores))[-1:-self.n - 1:-1] return [self.dataModel.getItemByIid(i) for i in topN] def score(self, testSamples, trueLabels): print 'LFM scoring ...' trueList = [] recommendList = [] user_unique = list(set(np.array(testSamples)[:, 0])) for u in user_unique: uTrueIndex = np.argwhere(np.array(testSamples)[:, 0] == u)[:, 0] #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])] true = list(np.array(testSamples)[uTrueIndex][:, 1]) trueList.append(true) pre = self.recommend(u) recommendList.append(pre) e = Eval() result = e.evalAll(recommendList, trueList) print 'LFM result:' + '(' + str(self.get_params()) + ')' + str( (result)['F1']) return (result)['F1']