class VSRank(BaseEstimator): def __init__(self, neighbornum=5, n=5): print 'vsrank begin' self.neighbornum = neighbornum self.similarity = Similarity('COSINE') self.n = n def predict(self,testSamples): recList = [] for user_item in testSamples: uid = self.dataModel.getUidByUser(user_item[0]) recList.append(self.recommend(uid)) return recList def fit(self, trainSamples, trainTargets): self.dataModel = MemeryDataModel(trainSamples, trainTargets, isRating=True) usersNum = self.dataModel.getUsersNum() itemsNum = self.dataModel.getItemsNum() self.T = [{} for i in range(usersNum)] for uid in range(usersNum): purchased_items = self.dataModel.getItemIDsFromUid(uid) for i in range(len(purchased_items)): for j in range(i+1, len(purchased_items)): rating_i = self.dataModel.getRating(uid, purchased_items[i]) rating_j = self.dataModel.getRating(uid, purchased_items[j]) if rating_i > rating_j: key = str(purchased_items[i]) + " " + str(purchased_items[j]) elif rating_i < rating_j: key = str(purchased_items[j]) + " " + str(purchased_items[i]) else: continue self.T[uid][key] = 1 for uid in range(usersNum): print self.dataModel.getUserByUid(uid), len(self.T[uid]) idf = {} pair_sum = [[0]*itemsNum for i in range(itemsNum)] for uid in range(usersNum): for t, times in self.T[uid].iteritems(): i1, i2 = t.split(" ") pair_sum[int(i1)][int(i2)] += 1 for i1 in range(itemsNum): for i2 in range(itemsNum): if pair_sum[i1][i2] != 0: key = str(i1) + ' ' + str(i2) sum = pair_sum[i1][i2] + pair_sum[i2][i1] alpha = log10(1+9.0*sum/usersNum) idf[key] = alpha*log2(sum*1.0/pair_sum[i1][i2])+(1-alpha) W = [{} for i in range(usersNum)] for uid in range(usersNum): for t, times in self.T[uid].iteritems(): i1, i2 = t.split(" ") diff = self.dataModel.getRating(uid, int(i1))-self.dataModel.getRating(uid, int(i2)) tf = log2(1+abs(diff)) W[uid][t] = tf * idf[t] self.simiMatrix = np.zeros((usersNum, usersNum)) for i in range(usersNum): for j in range(i+1, usersNum): s = self.cos(W[i], W[j]) self.simiMatrix[i][j] = self.simiMatrix[j][i] = s def cos(self, dict1, dict2): product = 0.0 m1 = 0.0 m2 = 0.0 for k, v in dict1.iteritems(): m1 += v*v i1, i2 = k.split(' ') k_ = i2 + ' ' + i1 if dict2.has_key(k): product += v * dict2[k] elif dict2.has_key(k_): product -= v * dict2[k_] for k, v in dict2.iteritems(): m2 += v*v if product == 0: return 0 else: return product/sqrt(m1)/sqrt(m2) def tau(self, dict1, dict2, u1, u2): pass def neighborhood(self, userID): neighbors = np.argsort(np.array(self.simiMatrix[userID]))[-1:-self.neighbornum-1:-1] return neighbors def predict_single(self, userID, itemID): rating = 0.0 for uid in self.neighborhood(userID): if itemID in self.dataModel.getItemIDsFromUid(uid): rating += self.simiMatrix[userID][uid] * self.dataModel.getRating(uid, itemID) return rating def recommend(self, u): userID = self.dataModel.getUidByUser(u) if userID == -1: print 'not in test' return [] else: # return self.recommend_listwise(userID) return self.recommend_pairwise(userID) def recommend_pointwise(self, userID): #interactedItems = self.dataModel.getItemIDsFromUid(userID) ratings = dict() for uid in self.neighborhood(userID): for iid in self.dataModel.getItemIDsFromUid(uid): #if iid in interactedItems: #continue r = ratings.get(iid, 0) ratings[iid] = r + self.simiMatrix[userID][uid] * self.dataModel.getRating(uid, iid) r = [x for (x, y) in sorted(ratings.items(), lambda a, b: cmp(a[1], b[1]), reverse=True)[:self.n]] return [self.dataModel.getItemByIid(i) for i in r] def recommend_pairwise(self, userID): itemsNum = self.dataModel.getItemsNum() N = itemsNum recNum = self.n pi = [0]*itemsNum rank = [] for i in range(itemsNum): sum1 = 0 sum2 = 0 for j in range(itemsNum): if j != i: p = self.preference(userID, i, j) sum1 += p sum2 -= p pi[i] = sum1 - sum2 I = set(i for i in range(itemsNum)) while recNum > 0: # while len(I) > 0: recNum -= 1 t = np.argmax(pi) rank.append(t) I.remove(t) pi[t] = None for i in I: pi[i] += self.preference(userID, t, i) - self.preference(userID, i, t) # r = [x for (x, y) in sorted(zip(range(itemsNum), rank), lambda a, b: cmp(a[1], b[1]))[:self.n]] return [self.dataModel.getItemByIid(i) for i in rank] def preference(self, uid, i1, i2): nerghborhood = [] keystr = str(i1) + ' ' + str(i2) keystr_ = str(i2) + ' ' + str(i1) for i in range(self.dataModel.getUsersNum()): if self.T[i].has_key(keystr) or self.T[i].has_key(keystr_): nerghborhood.append(i) distance = [0]*len(nerghborhood) for i in range(len(nerghborhood)): distance[i] = self.simiMatrix[uid][nerghborhood[i]] nerghborhood = [x for (x, y) in sorted(zip(nerghborhood, distance), lambda a, b: cmp(a[1], b[1]), reverse=True)[:self.neighbornum]] preference = 0.0 sum = 0.0 for i in nerghborhood: rating1 = self.dataModel.getRating(i, i1) rating2 = self.dataModel.getRating(i, i2) sum += self.simiMatrix[uid][i] if rating1 > rating2: preference += self.simiMatrix[uid][i] elif rating1 < rating2: preference -= self.simiMatrix[uid][i] if sum == 0: return 0 else: return preference/sum def recommend_listwise(self, userID): itemsNum = self.dataModel.getItemsNum() M = [[0]*itemsNum for i in range(itemsNum)] for uid in self.neighborhood(userID): for t, times in self.T[uid].iteritems(): i1, i2 = t.split(" ") M[int(i1)][int(i2)] += 1 for m in xrange(itemsNum): for n in xrange(itemsNum): for k in xrange(itemsNum): M[n][k] = max(M[n][k], min(M[n][m], M[m][k])) rank = [0]*itemsNum for m in range(itemsNum): for n in range(itemsNum): if n != m and M[m][n] > M[n][m]: rank[m] += 1 r = [x for (x, y) in sorted(zip(range(itemsNum), rank), lambda a, b: cmp(a[1], b[1]))[:self.n]] return [self.dataModel.getItemByIid(i) for i in r] def score(self, testSamples, trueLabels): print 'vsrank scoring ...' #print len(testSamples) trueList = [] recommendList= [] user_unique = list(set(np.array(testSamples)[:,0])) for u in user_unique: uTrueIndex = np.argwhere(np.array(testSamples)[:,0] == u)[:,0] #true = [self.dataModel.getIidByItem(i) for i in list(np.array(testSamples)[uTrueIndex][:,1])] uTrueItem = list(np.array(testSamples)[uTrueIndex][:,1]) uTrueRating = list(np.array(trueLabels)[uTrueIndex]) true = [x for (x, y) in sorted(zip(uTrueItem, uTrueRating), lambda a, b: cmp(a[1], b[1]), reverse=True)[:self.n]] trueList.append(true) pre = self.recommend(u) recommendList.append(pre) e = Eval() result = e.evalAll(recommendList, trueList) print 'vsrank result:'+'('+str(self.get_params())+')'+str(result) return (result)['F1']