def __generateSet(self): triple = [] scale = set() # find the maximum rating and minimum value for i, entry in enumerate(self.trainingData): userName, itemName, rating = entry scale.add(float(rating)) self.rScale = list(scale) self.rScale.sort() for i, entry in enumerate(self.trainingData): userName, itemName, rating = entry # makes the rating within the range [0, 1]. rating = normalize(float(rating), self.rScale[-1], self.rScale[0]) self.trainingData[i][2] = rating # order the user if not self.user.has_key(userName): self.user[userName] = len(self.user) self.id2user[self.user[userName]] = userName # order the item if not self.item.has_key(itemName): self.item[itemName] = len(self.item) self.id2item[self.item[itemName]] = itemName # userList.append triple.append([self.user[userName], self.item[itemName], rating]) self.trainingMatrix = new_sparseMatrix.SparseMatrix(triple) self.all_User.update(self.user) self.all_Item.update(self.item) for entry in self.testData: userId, itemId, rating = entry # order the user if not self.user.has_key(userId): self.all_User[userId] = len(self.all_User) # order the item if not self.item.has_key(itemId): self.all_Item[itemId] = len(self.all_Item) if not self.testSet_u.has_key(userId): self.testSet_u[userId] = {} self.testSet_u[userId][itemId] = rating if not self.testSet_i.has_key(itemId): self.testSet_i[itemId] = {} self.testSet_i[itemId][userId] = rating
def __generateSet(self): triple = [] scale = set() # find the maximum rating and minimum value for i, entry in enumerate(self.trainingData): userName, itemName, rating = entry scale.add(float(rating)) self.rScale = list(scale) self.rScale.sort() for i,entry in enumerate(self.trainingData): userName,itemName,rating = entry # makes the rating within the range [0, 1]. rating = normalize(float(rating), self.rScale[-1], self.rScale[0]) self.trainingData[i][2] = rating # order the user if userName not in self.user: self.user[userName] = len(self.user) self.id2user[self.user[userName]] = userName # order the item if itemName not in self.item: self.item[itemName] = len(self.item) self.id2item[self.item[itemName]] = itemName # userList.append self.trainSet_u[userName][itemName] = rating self.trainSet_i[itemName][userName] = rating self.all_User.update(self.user) self.all_Item.update(self.item) for entry in self.testData: userName, itemName, rating = entry # order the user if userName not in self.user: self.all_User[userName] = len(self.all_User) # order the item if itemName not in self.item: self.all_Item[itemName] = len(self.all_Item) self.testSet_u[userName][itemName] = rating self.testSet_i[itemName][userName] = rating
def __loadRatings(self, file, bTest=False): if not bTest: print 'load training data...' else: print 'load test data...' with open(file) as f: ratings = f.readlines() #ignore the headline if self.ratingConfig.contains('-header'): ratings = ratings[1:] #order of the columns order = self.ratingConfig['-columns'].strip().split() #split data #userList= [] u_i_r = {} i_u_r = {} triple = [] #find the maximum rating and minimum value for lineNo, line in enumerate(ratings): items = split(' |,|\t', line.strip()) if len(order) < 3: print 'The rating file is not in a correct format. Error: Line num %d' % lineNo exit(-1) userId = items[int(order[0])] itemId = items[int(order[1])] rating = items[int(order[2])] if float(rating) > self.rScale[0]: self.rScale[0] = float(rating) if float(rating) < self.rScale[1]: self.rScale[1] = float(rating) for lineNo, line in enumerate(ratings): items = split(' |,|\t', line.strip()) if len(order) < 3: print 'The rating file is not in a correct format. Error: Line num %d' % lineNo exit(-1) userId = items[int(order[0])] itemId = items[int(order[1])] rating = items[int(order[2])] #makes the rating within the range [0, 1]. normRating = normalize(float(rating), self.rScale[0], self.rScale[1]) #order the user if not self.user.has_key(userId): self.user[userId] = len(self.user) #order the item if not self.item.has_key(itemId): self.item[itemId] = len(self.item) if not u_i_r.has_key(userId): u_i_r[userId] = [] #userList.append(userId) u_i_r[userId].append([itemId, float(rating)]) if not i_u_r.has_key(itemId): i_u_r[itemId] = [] i_u_r[itemId].append([userId, float(rating)]) if not bTest: self.triple.append([userId, itemId, normRating]) triple.append( [self.user[userId], self.item[itemId], normRating]) if not bTest: #contruct the sparse matrix # data=[] # indices=[] # indptr=[] # offset = 0 # for uid in userList: # uRating = [r[1] for r in u_i_r[uid]] # uColunms = [self.item[r[0]] for r in u_i_r[uid]] # data += uRating # indices += uColunms # indptr .append(offset) # offset += len(uRating) # indptr.append(offset) # return sparseMatrix.SparseMatrix(data, indices, indptr) return new_sparseMatrix.SparseMatrix(triple) else: # return testSet return u_i_r, i_u_r