def __generateSet(self): triple = [] for line in self.relation: userId1,userId2,weight = line #add relations to dict if userId1 not in self.followees.keys(): self.followees[userId1] = {} #if not self.followees.has_key(userId1): # self.followees[userId1] = {} self.followees[userId1][userId2] = weight if userId2 not in self.followers.keys(): self.followers[userId2] = {} #if not self.followers.has_key(userId2): # self.followers[userId2] = {} self.followers[userId2][userId1] = weight # order the user if userId1 not in self.user.keys(): self.user[userId1] = len(self.user) if userId2 not in self.user.keys(): self.user[userId2] = len(self.user) #if not self.user.has_key(userId1): # self.user[userId1] = len(self.user) #if not self.user.has_key(userId2): # self.user[userId2] = len(self.user) triple.append([self.user[userId1], self.user[userId2], weight]) return new_sparseMatrix.SparseMatrix(triple)
def loadRatings(self, file, bTest=False): with open(file) as f: ratings = f.readlines() #ignore the headline if self.ratingConfig.contains('-header'): ratings = ratings[1:] #order of the columns order = self.ratingConfig['-columns'].strip().split() #split data userList = [] u_i_r = {} i_u_r = {} triple = [] for line in ratings: items = split(' |,|\t', line.strip()) userId = items[int(order[0])] itemId = items[int(order[1])] rating = items[int(order[2])] if float(rating) > self.rScale[0]: self.rScale[0] = float(rating) if float(rating) < self.rScale[1]: self.rScale[1] = float(rating) #order the user if not self.user.has_key(userId): self.user[userId] = len(self.user) #order the item if not self.item.has_key(itemId): self.item[itemId] = len(self.item) if not u_i_r.has_key(userId): u_i_r[userId] = [] userList.append(userId) u_i_r[userId].append([itemId, float(rating)]) if not i_u_r.has_key(itemId): i_u_r[itemId] = [] i_u_r[itemId].append([userId, float(rating)]) triple.append( [self.user[userId], self.item[itemId], float(rating)]) if not bTest: #contruct the sparse matrix # data=[] # indices=[] # indptr=[] # offset = 0 # for uid in userList: # uRating = [r[1] for r in u_i_r[uid]] # uColunms = [self.item[r[0]] for r in u_i_r[uid]] # data += uRating # indices += uColunms # indptr .append(offset) # offset += len(uRating) # indptr.append(offset) # return sparseMatrix.SparseMatrix(data, indices, indptr) return new_sparseMatrix.SparseMatrix( triple, (len(self.user), len(self.item))) else: # return testSet return u_i_r, i_u_r
def __generateSet(self): triple = [] scale = set() # find the maximum rating and minimum value for i, entry in enumerate(self.trainingData): userName, itemName, rating = entry scale.add(float(rating)) self.rScale = list(scale) self.rScale.sort() for i, entry in enumerate(self.trainingData): userName, itemName, rating = entry # makes the rating within the range [0, 1]. rating = normalize(float(rating), self.rScale[-1], self.rScale[0]) self.trainingData[i][2] = rating # order the user if not self.user.has_key(userName): self.user[userName] = len(self.user) self.id2user[self.user[userName]] = userName # order the item if not self.item.has_key(itemName): self.item[itemName] = len(self.item) self.id2item[self.item[itemName]] = itemName # userList.append triple.append([self.user[userName], self.item[itemName], rating]) self.trainingMatrix = new_sparseMatrix.SparseMatrix(triple) self.all_User.update(self.user) self.all_Item.update(self.item) for entry in self.testData: userId, itemId, rating = entry # order the user if not self.user.has_key(userId): self.all_User[userId] = len(self.all_User) # order the item if not self.item.has_key(itemId): self.all_Item[itemId] = len(self.all_Item) if not self.testSet_u.has_key(userId): self.testSet_u[userId] = {} self.testSet_u[userId][itemId] = rating if not self.testSet_i.has_key(itemId): self.testSet_i[itemId] = {} self.testSet_i[itemId][userId] = rating
def __generateSet(self): triple = [] for line in self.relation: userId1,userId2,weight = line #add relations to dict if userId1 in self.followees: self.followees[userId1].update({userId2:weight}) else: self.followees.update({userId1:{userId2:weight}}) if userId2 in self.followers: self.followers[userId2].update({userId1:weight}) else: self.followers.update({userId2:{userId1:weight}}) # order the user if userId1 not in self.user: self.user[userId1] = len(self.user) if userId2 not in self.user: self.user[userId2] = len(self.user) triple.append([self.user[userId1], self.user[userId2], weight]) return new_sparseMatrix.SparseMatrix(triple)
def __generateDireSet(self): triple = [] for line in self.inform2: movieId, direId, weight = line #add relations to dict if movieId in self.md: self.md[movieId].append(direId) else: self.md.update({movieId: [direId]}) if direId in self.dm: self.dm[direId].append(movieId) else: self.dm.update({direId: [movieId]}) # order the movie if movieId not in self.item: self.item[movieId] = len(self.item) if direId not in self.dire: self.dire[direId] = len(self.dire) triple.append([self.item[movieId], self.dire[direId], weight]) return new_sparseMatrix.SparseMatrix(triple)
def __generateActSet(self): triple = [] for line in self.inform1: movieId, actorId, weight = line #add relations to dict if movieId in self.actors: self.actors[movieId].append(actorId) else: self.actors.update({movieId: [actorId]}) if actorId in self.act: self.act[actorId].append(movieId) else: self.act.update({actorId: [movieId]}) # order the movie if movieId not in self.item: self.item[movieId] = len(self.item) if actorId not in self.actor: self.actor[actorId] = len(self.actor) triple.append([self.item[movieId], self.actor[actorId], weight]) return new_sparseMatrix.SparseMatrix(triple)
def loadRelationship(self, filePath): print 'load social data...' triple = [] with open(filePath) as f: relations = f.readlines() # ignore the headline if self.socialConfig.contains('-header'): relations = relations[1:] # order of the columns order = self.socialConfig['-columns'].strip().split() if len(order) <= 2: print 'The social file is not in a correct format.' for line in relations: items = split(' |,|\t', line.strip()) if len(order) < 2: print 'The social file is not in a correct format. Error: Line num %d' % lineNo exit(-1) userId1 = items[int(order[0])] userId2 = items[int(order[1])] if len(order) < 3: weight = 1 else: weight = float(items[int(order[2])]) #add relations to dict if not self.followees.has_key(userId1): self.followees[userId1] = {} self.followees[userId1][userId2] = weight if not self.followers.has_key(userId2): self.followers[userId2] = {} self.followers[userId2][userId1] = weight # order the user if not self.user.has_key(userId1): self.user[userId1] = len(self.user) if not self.user.has_key(userId2): self.user[userId2] = len(self.user) self.triple.append([userId1, userId2, weight]) triple.append([self.user[userId1], self.user[userId2], weight]) return new_sparseMatrix.SparseMatrix(triple)
def __loadRatings(self, file, bTest=False): if not bTest: print 'load training data...' else: print 'load test data...' with open(file) as f: ratings = f.readlines() #ignore the headline if self.ratingConfig.contains('-header'): ratings = ratings[1:] #order of the columns order = self.ratingConfig['-columns'].strip().split() #split data #userList= [] u_i_r = {} i_u_r = {} triple = [] #find the maximum rating and minimum value for lineNo, line in enumerate(ratings): items = split(' |,|\t', line.strip()) if len(order) < 3: print 'The rating file is not in a correct format. Error: Line num %d' % lineNo exit(-1) userId = items[int(order[0])] itemId = items[int(order[1])] rating = items[int(order[2])] if float(rating) > self.rScale[0]: self.rScale[0] = float(rating) if float(rating) < self.rScale[1]: self.rScale[1] = float(rating) for lineNo, line in enumerate(ratings): items = split(' |,|\t', line.strip()) if len(order) < 3: print 'The rating file is not in a correct format. Error: Line num %d' % lineNo exit(-1) userId = items[int(order[0])] itemId = items[int(order[1])] rating = items[int(order[2])] #makes the rating within the range [0, 1]. normRating = normalize(float(rating), self.rScale[0], self.rScale[1]) #order the user if not self.user.has_key(userId): self.user[userId] = len(self.user) #order the item if not self.item.has_key(itemId): self.item[itemId] = len(self.item) if not u_i_r.has_key(userId): u_i_r[userId] = [] #userList.append(userId) u_i_r[userId].append([itemId, float(rating)]) if not i_u_r.has_key(itemId): i_u_r[itemId] = [] i_u_r[itemId].append([userId, float(rating)]) if not bTest: self.triple.append([userId, itemId, normRating]) triple.append( [self.user[userId], self.item[itemId], normRating]) if not bTest: #contruct the sparse matrix # data=[] # indices=[] # indptr=[] # offset = 0 # for uid in userList: # uRating = [r[1] for r in u_i_r[uid]] # uColunms = [self.item[r[0]] for r in u_i_r[uid]] # data += uRating # indices += uColunms # indptr .append(offset) # offset += len(uRating) # indptr.append(offset) # return sparseMatrix.SparseMatrix(data, indices, indptr) return new_sparseMatrix.SparseMatrix(triple) else: # return testSet return u_i_r, i_u_r