def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 N = int(self.ranking['-topN']) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 10' N = 10 res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userCount = len(self.data.testSet) rawRes = {} for i, user in enumerate(self.data.testSet): itemSet = {} line = user + ':' predictedItems = self.predict(user) recList[user] = predictedItems if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: if self.data.testSet[user].has_key(item[0]): line += '*' line += item + ',' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' elif self.ranking.contains('-threshold'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-threshold-' + str( threshold) + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The result has been output to ', abspath(outDir), '.' # output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' if self.ranking.contains('-topN'): self.measure = Measure.rankingMeasure(self.data.testSet, recList, rawRes, N) FileIO.writeFile(outDir, fileName, self.measure) print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))
def evalRatings(self): res = [] #used to contain the text of the result res.append('userId itemId original prediction\n') #predict for ind, entry in enumerate(self.data.testData): user, item, rating = entry #predict prediction = self.predict(user, item) #denormalize #prediction = denormalize(prediction,self.data.rScale[-1],self.data.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure self.data.testData[ind].append(pred) res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n') currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) #output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print('The result has been output to ', abspath(outDir), '.') #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.ratingMeasure(self.data.testData) FileIO.writeFile(outDir, fileName, self.measure) print('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))
def evalRatings(self): res = [] #used to contain the text of the result res.append('userId itemId original prediction\n') #predict for userId in self.dao.testSet_u: for ind,item in enumerate(self.dao.testSet_u[userId]): itemId = item[0] originRating = item[1] #predict prediction = self.predict(userId,itemId) #denormalize prediction = denormalize(prediction,self.dao.rScale[-1],self.dao.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure self.dao.testSet_u[userId][ind].append(pred) res.append(userId+' '+itemId+' '+str(originRating)+' '+str(pred)+'\n') currentTime = strftime("%Y-%m-%d %H-%M-%S",localtime(time())) #output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config['recommender']+'@'+currentTime+'-rating-predictions'+self.foldInfo+'.txt' FileIO.writeFile(outDir,fileName,res) print 'The Result has been output to ',abspath(outDir),'.' #output evaluation result outDir = self.output['-dir'] fileName = self.config['recommender'] + '@'+currentTime +'-measure'+ self.foldInfo + '.txt' measure = Measure.ratingMeasure(self.dao.testSet_u) FileIO.writeFile(outDir, fileName, measure)
def performance(self): #res = [] # used to contain the text of the result #res.append('userId itemId original prediction\n') # predict res = [] for ind, entry in enumerate(self.dao.testData): user, item, rating = entry # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure res.append([user, item, rating, pred]) #res.append(user + ' ' + item + ' ' + str(rating) + ' ' + str(pred) + '\n') #currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result # if self.isOutput: # outDir = self.output['-dir'] # fileName = self.config['recommender'] + '@' + currentTime + '-rating-predictions' + self.foldInfo + '.txt' # FileIO.writeFile(outDir, fileName, res) # print 'The Result has been output to ', abspath(outDir), '.' # output evaluation result # outDir = self.output['-dir'] # fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.ratingMeasure(res) return self.measure
def ranking_performance(self): N = 10 recList = {} testSample = {} for user in self.data.testSet_u: if len(testSample) == 300: break testSample[user] = self.data.testSet_u[user] for user in testSample: itemSet = {} predictedItems = self.predictForRanking(user) for id, rating in enumerate(predictedItems): itemSet[self.data.id2item[id]] = rating ratedList, ratingList = self.data.userRated(user) for item in ratedList: del itemSet[item] Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # find the K biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while True: mid = (l + r) / 2 if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 if r < l: ind = r break # ind = bisect(recommendations, itemSet[item]) if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = zip(resNames, recommendations) measure = Measure.rankingMeasure(testSample, recList, [10]) print '-'*80 print 'Ranking Performance '+self.foldInfo+' (Top-10 On 300 sampled users)' for m in measure[1:]: print m.strip() print '-'*80 #self.record.append(measure[3].strip()+' '+measure[4]) return measure
def evalRanking(self): res = [] # used to contain the text of the result N = int(self.ranking['-topN']) if N > 100 or N < 0: N = 100 res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item is matched\n' ) # predict topNSet = {} userCount = len(self.dao.testSet_u) for i, user in enumerate(self.dao.testSet_u): itemSet = [] line = user + ':' for item in self.dao.item: if not self.dao.rating(user, item): # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) prediction = round(prediction, 4) #pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure itemSet.append((item, prediction)) itemSet.sort(key=lambda d: d[1], reverse=True) topNSet[user] = itemSet[0:N] if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in topNSet[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.dao.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N) FileIO.writeFile(outDir, fileName, self.measure)
def rating_performance(self): res = [] for ind, entry in enumerate(self.data.testData): user, item, rating = entry # predict prediction = self.predict(user, item) pred = self.checkRatingBoundary(prediction) res.append([user,item,rating,pred]) self.measure = Measure.ratingMeasure(res) return self.measure
def rating_performance(self): res = [] for ind, entry in enumerate(self.data.testData): user, item, rating = entry # predict prediction = self.predict(user, item) # denormalize #prediction = denormalize(prediction, self.data.rScale[-1], self.data.rScale[0]) ##################################### pred = self.checkRatingBoundary(prediction) # add prediction in order to measure res.append([user,item,rating,pred]) self.measure = Measure.ratingMeasure(res) return self.measure
def evalRanking(self): res = [] # used to contain the text of the result N = int(self.ranking['-topN']) if N > 100 or N < 0: N = 100 res.append( 'userId: recommendations in (itemId, ranking score) pairs\n') # predict topNSet = {} userCount = len(self.dao.testSet_u) for i, userId in enumerate(self.dao.testSet_u): itemSet = {} line = userId + ':' for itemId in self.dao.item: pred = self.predict(userId, itemId) # add prediction in order to measure itemSet[itemId] = pred topNSet[userId] = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True)[0:N] if i % 100 == 0: print 'Progress:' + str(i) + '/' + str(userCount) for item in topNSet[userId]: line += '(' + item[0] + ',' + str(item[1]) + ') ' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' measure = Measure.rankingMeasure(self.dao.testSet_u, topNSet, N) FileIO.writeFile(outDir, fileName, measure)
def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 bThres = False bTopN = False if self.ranking.contains('-topN'): bTopN = True N = int(self.ranking['-topN']) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 100' N = 100 elif self.ranking.contains('-threshold'): threshold = float(self.ranking['-threshold']) bThres = True else: print 'No correct evaluation metric is specified!' exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} userCount = len(self.dao.testSet_u) for i, user in enumerate(self.dao.testSet_u): itemSet = {} line = user + ':' for item in self.dao.item: # predict prediction = self.predict(user, item) # denormalize prediction = denormalize(prediction, self.dao.rScale[-1], self.dao.rScale[0]) #prediction = self.checkRatingBoundary(prediction) #pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure if bThres: if prediction > threshold: itemSet[item] = prediction else: itemSet[item] = prediction ratedList, ratingList = self.dao.userRated(user) for item in ratedList: del itemSet[self.dao.id2item[item]] itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) if self.ranking.contains('-topN'): recList[user] = itemSet[0:N] elif self.ranking.contains('-threshold'): recList[user] = itemSet[:] userN[user] = len(itemSet) if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.dao.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' elif self.ranking.contains('-threshold'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-threshold-' + str( threshold) + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The Result has been output to ', abspath(outDir), '.' #output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' if self.ranking.contains('-topN'): self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList, N) elif self.ranking.contains('-threshold'): origin = self.dao.testSet_u.copy() for user in origin: temp = {} for item in origin[user]: if origin[user][item] >= threshold: temp[item] = threshold origin[user] = temp self.measure = Measure.rankingMeasure_threshold( origin, recList, userN) FileIO.writeFile(outDir, fileName, self.measure)
def ranking_performance(self,iteration): #for quick evaluation, we only rank 2000 items #results of 1000 users would be evaluated N = 10 recList = {} testSample = {} for user in self.data.testSet_u: if len(testSample) == 1000: break if user not in self.data.trainSet_u: continue testSample[user] = self.data.testSet_u[user] for user in testSample: itemSet = {} predictedItems = self.predictForRanking(user) for id, rating in enumerate(predictedItems[:2000]): itemSet[self.data.id2item[id]] = rating ratedList, ratingList = self.data.userRated(user) for item in ratedList: if item in itemSet: del itemSet[item] Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # find the K biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while True: mid = int((l + r) / 2) if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 if r < l: ind = r break if ind < N - 2: recommendations[ind + 2:] = recommendations[ind + 1:-1] resNames[ind + 2:] = resNames[ind + 1:-1] if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = list(zip(resNames, recommendations)) measure = Measure.rankingMeasure(testSample, recList, [10]) if len(self.bestPerformance)>0: count = 0 performance = {} for m in measure[1:]: k,v = m.strip().split(':') performance[k]=float(v) for k in self.bestPerformance[1]: if self.bestPerformance[1][k] > performance[k]: count += 1 else: count -=1 if count<0: self.bestPerformance[1]=performance self.bestPerformance[0]=iteration self.saveModel() else: self.bestPerformance.append(iteration) performance = {} for m in measure[1:]: k,v = m.strip().split(':') performance[k]=float(v) self.bestPerformance.append(performance) self.saveModel() print('-'*120) print('Quick Ranking Performance '+self.foldInfo+' (Top-10 Item Recommendation On 1000 sampled users)') measure = [m.strip() for m in measure[1:]] print('*Current Performance*') print('iteration:',iteration,' | '.join(measure)) bp = '' # for k in self.bestPerformance[1]: # bp+=k+':'+str(self.bestPerformance[1][k])+' | ' bp += 'Precision'+':'+str(self.bestPerformance[1]['Precision'])+' | ' bp += 'Recall' + ':' + str(self.bestPerformance[1]['Recall']) + ' | ' bp += 'F1' + ':' + str(self.bestPerformance[1]['F1']) + ' | ' bp += 'MAP' + ':' + str(self.bestPerformance[1]['MAP']) + ' | ' bp += 'MDCG' + ':' + str(self.bestPerformance[1]['NDCG']) print('*Best Performance* ') print('iteration:',self.bestPerformance[0],bp) print('-'*120) return measure
def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 top = self.ranking['-topN'].split(',') top = [int(num) for num in top] N = int(top[-1]) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 10' N = 10 res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userCount = len(self.data.testSet) for i, user in enumerate(self.data.testSet): line = user + ':' if self.data.userRecord.has_key(user): predictedItems = self.predict(user) else: predictedItems = ['0'] * N predicted = {} for k, item in enumerate(predictedItems): predicted[item] = k for item in self.data.userRecord[user]: if predicted.has_key(item[self.recType]): del predicted[item[self.recType]] predicted = sorted(predicted.iteritems(), key=lambda d: d[1]) predictedItems = [item[0] for item in predicted] recList[user] = predictedItems[:N] if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: if self.data.testSet[user].has_key(item): line += '*' line += item + ',' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config['recommender'] + '@' + currentTime + '-top-' + self.ranking['-topN']\ + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The result has been output to ', abspath(outDir), '.' # output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.data.testSet, recList, top, self.data.getSize(self.recType)) FileIO.writeFile(outDir, fileName, self.measure) print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))
def evalRanking(self): res = [] # used to contain the text of the result N = 0 top = self.ranking['-topN'].split(',') top = [int(num) for num in top] N = max(top) if N > 100 or N < 0: print ('N can not be larger than 100! It has been reassigned with 10') N = 10 res.append('userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n') # predict recList = {} userCount = len(self.data.testSet) for i, user in enumerate(self.data.testSet): itemSet = {} line = user + ':' predictedItems = self.predict(user) for id, score in enumerate(predictedItems): itemSet[self.data.id2name[self.recType][id]] = score for item in self.data.userRecord[user]: try: del itemSet[item[self.recType]] except KeyError: pass Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) # if bTopN: # find the K biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while True: mid = (l + r) // 2 if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 else: ind = mid break if r < l: ind = r break # ind = bisect(recommendations, itemSet[item]) if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = resNames if i % 100 == 0: print (self.algorName, self.foldInfo, 'progress:' + str(i) + '/' + str(userCount)) for item in recList[user]: line += item if item in self.data.testSet[user]: line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config['recommender'] + '@' + currentTime + '-top-' + self.ranking['-topN']\ + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print ('The result has been output to ', abspath(outDir), '.') # output evaluation result outDir = self.output['-dir'] fileName = self.config['recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.data.testSet, recList,top,self.data.getSize(self.recType)) FileIO.writeFile(outDir, fileName, self.measure) print ('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))
def ranking_performance(self): N = 10 itemcount = 0 recList = {} testSample = {} for user in self.data.testSet: itemcount+=len(self.data.testSet[user]) if len(testSample) == 300: break testSample[user] = self.data.testSet[user] for user in testSample: itemSet = {} predictedItems = self.predict(user) for id, rating in enumerate(predictedItems): itemSet[self.data.id2name['track'][id]] = rating ratedList = self.data.testSet[user].keys() ratingList = self.data.testSet[user].values() for item in ratedList: del itemSet[item] Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # find the K biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while True: mid = int((l + r) / 2) if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 if r < l: ind = r break # ind = bisect(recommendations, itemSet[item]) if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = resNames measure = Measure.rankingMeasure(testSample, recList, [10], itemcount) print ('-'*80) print ('Ranking Performance '+self.foldInfo+' (Top-10 On 300 sampled users)') for m in measure[1:]: print (m.strip()) print ('-'*80) return measure
def evalRanking(self): res = [] # used to contain the text of the result if self.ranking.contains('-topN'): top = self.ranking['-topN'].split(',') top = [int(num) for num in top] N = int(top[-1]) if N > 100 or N < 0: print( 'N can not be larger than 100! It has been reassigned with 10' ) N = 10 if N > len(self.data.item): N = len(self.data.item) else: print('No correct evaluation metric is specified!') exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} userCount = len(self.data.testSet_u) #rawRes = {} for i, user in enumerate(self.data.testSet_u): itemSet = {} line = user + ':' predictedItems = self.predictForRanking(user) # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0]) for id, rating in enumerate(predictedItems): # if not self.data.rating(user, self.data.id2item[id]): # prediction = self.checkRatingBoundary(prediction) # pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure itemSet[self.data.id2item[id]] = rating ratedList, ratingList = self.data.userRated(user) for item in ratedList: del itemSet[item] Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # find the N biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while r >= l: mid = (r - l) / 2 + l if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 if r < l: ind = r break #move the items backwards if ind < N - 2: recommendations[ind + 2:] = recommendations[ind + 1:-1] resNames[ind + 2:] = resNames[ind + 1:-1] if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = zip(resNames, recommendations) if i % 100 == 0: print(self.algorName, self.foldInfo, 'progress:' + str(i) + '/' + str(userCount)) for item in recList[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.data.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print('The result has been output to ', abspath(outDir), '.') # output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' self.measure = Measure.rankingMeasure(self.data.testSet_u, recList, top) FileIO.writeFile(outDir, fileName, self.measure) print('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))
def evalRanking(self): res = [] # used to contain the text of the result N = 0 threshold = 0 bThres = False bTopN = False if self.ranking.contains('-topN'): bTopN = True N = int(self.ranking['-topN']) if N > 100 or N < 0: print 'N can not be larger than 100! It has been reassigned with 100' N = 100 if N > len(self.dao.item): N = len(self.dao.item) elif self.ranking.contains('-threshold'): threshold = float(self.ranking['-threshold']) bThres = True else: print 'No correct evaluation metric is specified!' exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} userCount = len(self.dao.testSet_u) for i, user in enumerate(self.dao.testSet_u): itemSet = {} line = user + ':' predictedItems = self.predictForRanking(user) # predictedItems = denormalize(predictedItems, self.dao.rScale[-1], self.dao.rScale[0]) for id, rating in enumerate(predictedItems): # if not self.dao.rating(user, self.dao.id2item[id]): # prediction = self.checkRatingBoundary(prediction) # pred = self.checkRatingBoundary(prediction) ##################################### # add prediction in order to measure # if bThres: # if rating > threshold: # itemSet[self.dao.id2item[id]]= rating # else: itemSet[self.dao.id2item[id]] = rating ratedList, ratingList = self.dao.userRated(user) for item in ratedList: del itemSet[item] Nrecommendations = [] for item in itemSet: if len(Nrecommendations) < N: Nrecommendations.append((item, itemSet[item])) else: break Nrecommendations.sort(key=lambda d: d[1], reverse=True) recommendations = [item[1] for item in Nrecommendations] resNames = [item[0] for item in Nrecommendations] # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) # if bTopN: # find the K biggest scores for item in itemSet: ind = N l = 0 r = N - 1 if recommendations[r] < itemSet[item]: while True: mid = (l + r) / 2 if recommendations[mid] >= itemSet[item]: l = mid + 1 elif recommendations[mid] < itemSet[item]: r = mid - 1 else: ind = mid break if r < l: ind = r break # ind = bisect(recommendations, itemSet[item]) if ind < N - 1: recommendations[ind + 1] = itemSet[item] resNames[ind + 1] = item recList[user] = zip(resNames, recommendations) # elif bThres: # itemSet = sorted(itemSet.iteritems(), key=lambda d: d[1], reverse=True) # recList[user] = itemSet[:] # userN[user] = len(itemSet) if i % 100 == 0: print self.algorName, self.foldInfo, 'progress:' + str( i) + '/' + str(userCount) for item in recList[user]: line += ' (' + item[0] + ',' + str(item[1]) + ')' if self.dao.testSet_u[user].has_key(item[0]): line += '*' line += '\n' res.append(line) currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) # output prediction result if self.isOutput: fileName = '' outDir = self.output['-dir'] if self.ranking.contains('-topN'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' elif self.ranking.contains('-threshold'): fileName = self.config[ 'recommender'] + '@' + currentTime + '-threshold-' + str( threshold) + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) print 'The result has been output to ', abspath(outDir), '.' # output evaluation result outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' if self.ranking.contains('-topN'): self.measure = Measure.rankingMeasure(self.dao.testSet_u, recList, N) # elif self.ranking.contains('-threshold'): # origin = self.dao.testSet_u.copy() # for user in origin: # temp = {} # for item in origin[user]: # if origin[user][item] >= threshold: # temp[item] = threshold # origin[user] = temp # self.measure = Measure.rankingMeasure_threshold(origin, recList, userN) FileIO.writeFile(outDir, fileName, self.measure) print 'The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure))
def evalRanking(self, write_to_file=True, use_now_time=False): res = [] # used to contain the text of the result if self.ranking.contains('-topN'): top = self.ranking['-topN'].split(',') top = [int(num) for num in top] N = max(top) if N > 100 or N < 0: print( 'N can not be larger than 100! It has been reassigned with 10' ) N = 10 if N > len(self.data.item): N = len(self.data.item) else: print('No correct evaluation metric is specified!') exit(-1) res.append( 'userId: recommendations in (itemId, ranking score) pairs, * means the item matches.\n' ) # predict recList = {} userN = {} testSample = self.testSample # # multiprocessing way # pool = Pool(12) # dataset = [] # for user, testSample_u in testSample.items(): # identified_user = self.map_from_true_to_identify.get(user, -1) # if identified_user == -1: # continue # dataset.append([user, identified_user, testSample_u]) # # result_generator = pool.imap_unordered(partial(self.get_recommendation, N=N), dataset) # for result in tqdm(result_generator, total=len(dataset), desc='Measuring [{}]'): # user, line, recList_user = result # recList[user] = recList_user # res.append(line) # pool.close() # pool.join() testSample_copy = testSample.copy() for i, user in tqdm(enumerate(testSample), total=len(testSample), desc='Measuring [{}]'.format(self.algorName)): identified_user = self.map_from_true_to_identify.get(user, -1) if identified_user == -1: del testSample_copy[user] continue user, line, recList_user = self.get_recommendation( (user, identified_user, testSample[user]), N) recList[user] = recList_user res.append(line) self.measure = Measure.rankingMeasure(testSample_copy, recList, top) try: self.measure.append("C:{}\n".format(self.C)) except: pass try: self.measure.append("L:{}\n".format(self.L)) except: pass try: self.measure.append("K:{}\n".format(self.K)) except: pass try: self.measure.append("N:{}\n".format(self.N)) except: pass if use_now_time: currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) else: currentTime = self.currentTime if write_to_file: # output prediction result if False and self.isOutput: fileName = '' outDir = self.output['-dir'] fileName = self.config[ 'recommender'] + '@' + currentTime + '-top-' + str( N) + 'items' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, res) # output evaluation result outDir = self.output['-dir'] try: fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '_C{}'.format( self.C) + '.txt' except: fileName = self.config[ 'recommender'] + '@' + currentTime + '-measure' + self.foldInfo + '.txt' FileIO.writeFile(outDir, fileName, self.measure) # FileIO.writeFile(outDir, fileName, "C:{}".format(self.C)) print('The result has been output to ', abspath(outDir), '.') print('The result of %s %s:\n%s' % (self.algorName, self.foldInfo, ''.join(self.measure)))