class Display(object): def __init__(self, conf): self.conf = conf if not conf.contains('ratings') and not conf.contains('social'): print 'The config file is not in the correct format!' exit(-1) if conf.contains('ratings'): ratingData = FileIO.loadDataSet(conf, conf['ratings']) self.dao = RatingDAO(conf, ratingData) if conf.contains('social'): relationData = FileIO.loadRelationship(conf, conf['social']) self.sao = SocialDAO(conf, relationData) def draw(self): print 'draw chart...' #rating if self.conf.contains('ratings'): y = [triple[2] for triple in self.dao.trainingData] x = self.dao.rScale if len(x) < 20: Chart.hist(x, y, len(self.dao.rScale), '#058edc', 'Rating Histogram', 'Rating Scale', 'Count', '../visual/visualization/images/rh') y = [len(self.dao.userRated(u)[0]) for u in self.dao.user] Chart.distribution(y, 'Rating Count Distribution', '', 'Rated items count per user', '../visual/visualization/images/rcu') y = [len(self.dao.itemRated(i)[0]) for i in self.dao.item] Chart.distribution(y, 'Rating Count Distribution', '', 'user Rated count per item', '../visual/visualization/images/rci') #social if self.conf.contains('social'): x = [len(self.sao.getFollowers(u)) for u in self.sao.user] y = [len(self.sao.getFollowees(u)) for u in self.sao.user] Chart.scatter(x, y, 'red', 'Follower&Followee', 'Follower count', 'Followee count', '../visual/visualization/images/ff') y = [len(self.sao.getFollowers(u)) for u in self.sao.user] Chart.distribution(y, 'Followers Distribution', '', 'Followers count per user', '../visual/visualization/images/fd1') y = [len(self.sao.getFollowees(u)) for u in self.sao.user] Chart.distribution(y, 'Followees Distribution', '', 'Followees count per user', '../visual/visualization/images/fd2') def render(self): self.draw() html ="<html><head><title>Data Analysis</title>\n" \ "<link rel='stylesheet' type='text/css' href='reportStyle.css'/></head>\n" \ "<body><div class='reportTitle'><div class='in'>Data Analysis</div></div>\n" \ "<div class='main'><div class='area1'>\n" \ "<div class='title'><h3>Data Files</h3></div><div class='text'>" if self.conf.contains('ratings'): html += "<b>Rating Data</b>: {rating}".format( rating=abspath(self.conf['ratings'])) if self.conf.contains('social'): html += "<br><b>Social Data</b>: {social}".format( social=abspath(self.conf['social'])) html+="</div></div><div style='padding-top:20px'><center>" \ "<img src='images/header2.png'/></center></div>\n" if self.conf.contains('ratings'): html += "<div class='area1'><div class='title'><h3>Rating Data</h3></div>\n" html += "<div class='text'><b>Rating Scale</b>: {scale}</br>".format( scale=' '.join([str(item) for item in self.dao.rScale])) html += "<b>User Count</b>: {user}<br><b>Item Count</b>: {item}<br><b>Record Count</b>: {record}<br><b>Global Mean</b>: {mean}</div>\n"\ .format(user = str(len(self.dao.user)),item=str(len(self.dao.item)),record = str(len(self.dao.trainingData)), mean = str(round(denormalize(self.dao.globalMean,self.dao.rScale[-1],self.dao.rScale[0]),3))) html += "<center><div class='img'><img src='images/rh.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/rcu.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/rci.png' width='640px' height='480px'/></div></center>\n" html += "</div><div style='padding-top:20px'><center>" \ "<img src='images/header2.png'/></center></div>\n" if self.conf.contains('social'): html += "<div class='area1'><div class='title'><h3>Social Data</h3></div>\n" html += "<div class='text'><b>User Count</b>: {user}<br><b>Relation Count</b>: {relation}<br></div>\n" \ .format(user=str(len(self.sao.user)), relation=str(len(self.sao.relation))) html += "<center><div class='img'><img src='images/ff.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/fd1.png' width='640px' height='480px'/></div></center>\n" html += "<center><div class='img'><img src='images/fd2.png' width='640px' height='480px'/></div></center>\n" html += "</div><div style='padding-top:20px'><center>" \ "<img src='images/header2.png'/></center></div>\n" html += "</div></body></html>" FileIO.writeFile('../visual/visualization/', 'analysis.html', html) print 'The report has been output to', abspath( '../visual/visualization/analysis.html') webbrowser.open(abspath('../visual/visualization/analysis.html'), new=0, autoraise=True)
class Recommender(object): def __init__(self, conf, trainingSet, testSet, fold='[1]'): self.config = conf self.data = None self.isSaveModel = False self.ranking = None self.isLoadModel = False self.output = None self.isOutput = True self.data = RatingDAO(self.config, trainingSet, testSet) self.foldInfo = fold self.evalSettings = LineConfig(self.config['evaluation.setup']) self.measure = [] self.record = [] if self.evalSettings.contains('-cold'): #evaluation on cold-start users threshold = int(self.evalSettings['-cold']) removedUser = {} for user in self.data.testSet_u: if self.data.trainSet_u.has_key(user) and len( self.data.trainSet_u[user]) > threshold: removedUser[user] = 1 for user in removedUser: del self.data.testSet_u[user] testData = [] for item in self.data.testData: if not removedUser.has_key(item[0]): testData.append(item) self.data.testData = testData self.num_users, self.num_items, self.train_size = self.data.trainingSize( ) def readConfiguration(self): self.algorName = self.config['recommender'] self.output = LineConfig(self.config['output.setup']) self.isOutput = self.output.isMainOn() self.ranking = LineConfig(self.config['item.ranking']) def printAlgorConfig(self): "show algorithm's configuration" print('Algorithm:', self.config['recommender']) print('Ratings dataset:', abspath(self.config['ratings'])) if LineConfig(self.config['evaluation.setup']).contains('-testSet'): print( 'Test set:', abspath( LineConfig(self.config['evaluation.setup']).getOption( '-testSet'))) #print 'Count of the users in training set: ',len() print( 'Training set size: (user count: %d, item count %d, record count: %d)' % (self.data.trainingSize())) print( 'Test set size: (user count: %d, item count %d, record count: %d)' % (self.data.testSize())) print('=' * 80) def initModel(self): pass def buildModel(self): 'build the model (for model-based algorithms )' pass def buildModel_tf(self): 'training model on tensorflow' pass def saveModel(self): pass def loadModel(self): pass def predict(self, u, i): pass def predictForRanking(self, u): pass def checkRatingBoundary(self, prediction): if prediction > self.data.rScale[-1]: return self.data.rScale[-1] elif prediction < self.data.rScale[0]: return self.data.rScale[0] else: return round(prediction, 3) def evalRatings(self): res = [] #used to contain the text of the result res.append('userId itemId original prediction\n') #predict for ind, entry in enumerate(self.data.testData): user, item, rating = entry #predict prediction = self.predict(user, item) #denormalize #prediction = denormalize(prediction,self.data.rScale[-1],self.data.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure self.data.testData[ind].append(pred) res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n') currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) #output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print('The result has been output to ', abspath(outDir), '.') #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.ratingMeasure(self.data.testData) FileIO.writeFile(outDir, fileName, self.measure) print('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))) def evalRanking(self): res = [] # used to contain the text of the result if self.ranking.contains('-topN'): top = self.ranking['-topN'].split(',') top = [int(num) for num in top] N = int(top[-1]) if N > 100 or N < 0: print( 'N can not be larger than 100! It has been reassigned with 10' ) N = 10 if N > len(self.data.item): N = len(self.data.item) else: print('No correct evaluation metric is specified!') exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} userCount = len(self.data.testSet_u) #rawRes = {} for i, user in enumerate(self.data.testSet_u): itemSet = {} line = user + ':' predictedItems = self.predictForRanking(user) # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0]) for id, rating in enumerate(predictedItems): # if not self.data.rating(user, self.data.id2item[id]): # prediction = self.checkRatingBoundary(prediction) # pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure itemSet[self.data.id2item[id]] = rating ratedList, ratingList = self.data.userRated(user) for item in ratedList: del itemSet[item] Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # find the N biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while r >= l: mid = (r - l) / 2 + l if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 if r < l: ind = r break #move the items backwards if ind < N - 2: recommendations[ind + 2:] = recommendations[ind + 1:-1] resNames[ind + 2:] = resNames[ind + 1:-1] if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = zip(resNames, recommendations) if i % 100 == 0: print(self.algorName, self.foldInfo, 'progress:' + str(i) + '/' + str(userCount)) for item in recList[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.data.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print('The result has been output to ', abspath(outDir), '.') # output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.data.testSet_u, recList, top) FileIO.writeFile(outDir, fileName, self.measure) print('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))) def execute(self): self.readConfiguration() if self.foldInfo == '[1]': self.printAlgorConfig() #load model from disk or build model if self.isLoadModel: print('Loading model %s...' % (self.foldInfo)) self.loadModel() else: print('Initializing model %s...' % (self.foldInfo)) self.initModel() print('Building Model %s...' % (self.foldInfo)) try: import tensorflow if self.evalSettings.contains('-tf'): self.buildModel_tf() else: self.buildModel() except ImportError: self.buildModel() #preict the ratings or item ranking print('Predicting %s...' % (self.foldInfo)) if self.ranking.isMainOn(): self.evalRanking() else: self.evalRatings() #save model if self.isSaveModel: print('Saving model %s...' % (self.foldInfo)) self.saveModel() # with open(self.foldInfo+'measure.txt','w') as f: # f.writelines(self.record) return self.measure
class Recommender(object): def __init__(self, conf, trainingSet=None, testSet=None, fold='[1]'): self.config = conf self.dao = None self.isSaveModel = False self.ranking = None self.isLoadModel = False self.output = None self.isOutput = True self.dao = RatingDAO(self.config, trainingSet, testSet) self.foldInfo = fold self.measure = [] def readConfiguration(self): self.algorName = self.config['recommender'] self.output = LineConfig(self.config['output.setup']) self.isOutput = self.output.isMainOn() self.ranking = LineConfig(self.config['item.ranking']) def printAlgorConfig(self): "show algorithm's configuration" print 'Algorithm:', self.config['recommender'] print 'Ratings dataset:', abspath(self.config['ratings']) if LineConfig(self.config['evaluation.setup']).contains('-testSet'): print 'Test set:', abspath( LineConfig( self.config['evaluation.setup']).getOption('-testSet')) #print 'Count of the users in training set: ',len() print 'Training set size: (user count: %d, item count %d, record count: %d)' % ( self.dao.trainingSize()) print 'Test set size: (user count: %d, item count %d, record count: %d)' % ( self.dao.testSize()) print '=' * 80 def initModel(self): pass def buildModel(self): 'build the model (for model-based algorithms )' pass def saveModel(self): pass def loadModel(self): pass def predict(self, u, i): pass def predictForRanking(self, u): pass def checkRatingBoundary(self, prediction): if prediction > self.dao.rScale[-1]: return self.dao.rScale[-1] elif prediction < self.dao.rScale[0]: return self.dao.rScale[0] else: return round(prediction, 3) def evalRatings(self): res = [] #used to contain the text of the result res.append('userId itemId original prediction\n') #predict for ind, entry in enumerate(self.dao.testData): user, item, rating = entry #predict prediction = self.predict(user, item) #denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure self.dao.testData[ind].append(pred) res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n') currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) #output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.ratingMeasure(self.dao.testData) FileIO.writeFile(outDir, fileName, self.measure) def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 bThres = False bTopN = False if self.ranking.contains('-topN'): bTopN = True N = int(self.ranking['-topN']) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 100' N = 100 elif self.ranking.contains('-threshold'): threshold = float(self.ranking['-threshold']) bThres = True else: print 'No correct evaluation metric is specified!' exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} userCount = len(self.dao.testSet_u) for i, user in enumerate(self.dao.testSet_u): itemSet = {} line = user + ':' for item in self.dao.item: # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) #prediction = self.checkRatingBoundary(prediction) #pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure if bThres: if prediction > threshold: itemSet[item] = prediction else: itemSet[item] = prediction ratedList, ratingList = self.dao.userRated(user) for item in ratedList: del itemSet[self.dao.id2item[item]] itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) if self.ranking.contains('-topN'): recList[user] = itemSet[0:N] elif self.ranking.contains('-threshold'): recList[user] = itemSet[:] userN[user] = len(itemSet) if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.dao.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' elif self.ranking.contains('-threshold'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-threshold-' + str( threshold) + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' if self.ranking.contains('-topN'): self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList, N) elif self.ranking.contains('-threshold'): origin = self.dao.testSet_u.copy() for user in origin: temp = {} for item in origin[user]: if origin[user][item] >= threshold: temp[item] = threshold origin[user] = temp self.measure = Measure.rankingMeasure_threshold( origin, recList, userN) FileIO.writeFile(outDir, fileName, self.measure) def execute(self): self.readConfiguration() if self.foldInfo == '[1]': self.printAlgorConfig() #load model from disk or build model if self.isLoadModel: print 'Loading model %s...' % (self.foldInfo) self.loadModel() else: print 'Initializing model %s...' % (self.foldInfo) self.initModel() print 'Building Model %s...' % (self.foldInfo) self.buildModel() #preict the ratings or item ranking print 'Predicting %s...' % (self.foldInfo) if self.ranking.isMainOn(): self.evalRanking() else: self.evalRatings() #save model if self.isSaveModel: print 'Saving model %s...' % (self.foldInfo) self.saveModel() return self.measure def performance(self): #res = [] # used to contain the text of the result #res.append('userId itemId original prediction\n') # predict res = [] for ind, entry in enumerate(self.dao.testData): user, item, rating = entry # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure res.append([user, item, rating, pred]) #res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n') #currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result # if self.isOutput: # outDir = self.output['-dir'] # fileName = self.config['recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt' # FileIO.writeFile(outDir, fileName, res) # print 'The Result has been output to ', abspath(outDir), '.' # output evaluation result # outDir = self.output['-dir'] # fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.ratingMeasure(res) return self.measure