def test_evaluate_RMSRecommenderEvaluator(self): evaluator = RMSRecommenderEvaluator() recommender = UserRecommender(self.model, self.similarity, self.neighbor, True) evaluationPercentage = 1.0 trainingPercentage = 0.7 numUsers = self.model.NumUsers() trainingUsers = {} testUserPrefs = {} self.total = 0 self.diffs = 0.0 for userID in self.model.UserIDs(): if random() < evaluationPercentage: evaluator.processOneUser(trainingPercentage, trainingUsers, testUserPrefs, userID, self.model) total_training = sum([ len([pref for pref in prefs]) for user, prefs in trainingUsers.iteritems() ]) total_testing = sum([ len([pref for pref in prefs]) for user, prefs in testUserPrefs.iteritems() ]) #self.assertAlmostEquals(total_training/float(total_training+total_testing), 0.7) #self.assertAlmostEquals(total_testing/float(total_training+total_testing), 0.3) trainingModel = DictDataModel(trainingUsers) self.assertEquals(sorted(trainingModel.UserIDs()), sorted([user for user in trainingUsers])) recommender.model = trainingModel self.assertEquals(recommender.model, trainingModel) for userID, prefs in testUserPrefs.iteritems(): estimatedPreference = None for pref in prefs: try: estimatedPreference = recommender.estimatePreference( userID=userID, similarity=self.similarity, itemID=pref) except: pass if estimatedPreference is not None: estimatedPreference = evaluator.capEstimatePreference( estimatedPreference) self.assert_( estimatedPreference <= evaluator.maxPreference and estimatedPreference >= evaluator.minPreference) diff = prefs[pref] - estimatedPreference self.diffs += (diff * diff) self.total += 1 result = sqrt(self.diffs / float(self.total))
def test_evaluate_RMSRecommenderEvaluator(self): evaluator = RMSRecommenderEvaluator() recommender = UserRecommender(self.model,self.similarity,self.neighbor,True) evaluationPercentage = 1.0 trainingPercentage = 0.7 numUsers = self.model.NumUsers() trainingUsers = {} testUserPrefs = {} self.total = 0 self.diffs = 0.0 for userID in self.model.UserIDs(): if random() < evaluationPercentage: evaluator.processOneUser(trainingPercentage,trainingUsers,testUserPrefs,userID,self.model) total_training = sum([ len([pref for pref in prefs]) for user,prefs in trainingUsers.iteritems()]) total_testing = sum([ len([pref for pref in prefs]) for user,prefs in testUserPrefs.iteritems()]) #self.assertAlmostEquals(total_training/float(total_training+total_testing), 0.7) #self.assertAlmostEquals(total_testing/float(total_training+total_testing), 0.3) trainingModel = DictDataModel(trainingUsers) self.assertEquals(sorted(trainingModel.UserIDs()), sorted([user for user in trainingUsers])) recommender.model = trainingModel self.assertEquals(recommender.model,trainingModel) for userID,prefs in testUserPrefs.iteritems(): estimatedPreference = None for pref in prefs: try: estimatedPreference = recommender.estimatePreference(userID=userID,similarity=self.similarity,itemID=pref) except: pass if estimatedPreference is not None: estimatedPreference = evaluator.capEstimatePreference(estimatedPreference) self.assert_(estimatedPreference <= evaluator.maxPreference and estimatedPreference >= evaluator.minPreference) diff = prefs[pref] - estimatedPreference self.diffs+= (diff * diff) self.total += 1 result = sqrt(self.diffs / float(self.total))
def test_evaluate_IRStatsRecommenderEvaluator(self): evaluator = IRStatsRecommenderEvaluator() recommender = UserRecommender(self.model,self.similarity,self.neighbor,True) evaluationPercentage = 1.0 relevanceThreshold = None at = 2 irStats = {'precision': 0.0, 'recall': 0.0, 'fallOut': 0.0, 'nDCG': 0.0} irFreqs = {'precision': 0, 'recall': 0, 'fallOut': 0, 'nDCG': 0} nItems = self.model.NumItems() self.assertEquals(nItems,6) for userID in self.model.UserIDs(): if random() < evaluationPercentage: prefs = self.model.PreferencesFromUser(userID) if len(prefs) < 2 * at: #Really not enough prefs to meaningfully evaluate the user self.assert_(userID in ['Leopoldo Pires', 'Penny Frewman', 'Maria Gabriela']) continue relevantItemIDs = [] #List some most-preferred items that would count as most relevant results relevanceThreshold = relevanceThreshold if relevanceThreshold else evaluator.computeThreshold(prefs) prefs = sorted(prefs,key=lambda x: x[1], reverse=True) self.assertEquals(max([pref[1] for pref in prefs]), prefs[0][1]) for index,pref in enumerate(prefs): if index < at: if pref[1] >= relevanceThreshold: relevantItemIDs.append(pref[0]) self.assertEquals(relevantItemIDs, [ p[0] for p in sorted([ pref for pref in prefs if pref[1] >= relevanceThreshold],key=lambda x: x[1], reverse=True)[:at] ] ) if len(relevantItemIDs) == 0: continue trainingUsers = {} for otherUserID in self.model.UserIDs(): evaluator.processOtherUser(userID,relevantItemIDs,trainingUsers,otherUserID,self.model) trainingModel = DictDataModel(trainingUsers) recommender.model = trainingModel try: prefs = trainingModel.PreferencesFromUser(userID) if not prefs: continue except: #Excluded all prefs for the user. move on. continue recommendedItems = recommender.recommend(userID,at) self.assert_(len(recommendedItems)<= 2) intersectionSize = len([ recommendedItem for recommendedItem in recommendedItems if recommendedItem in relevantItemIDs]) #Precision if len(recommendedItems) > 0: irStats['precision']+= (intersectionSize / float(len(recommendedItems))) irFreqs['precision']+=1 #Recall irStats['recall'] += (intersectionSize/ float(len(relevantItemIDs))) irFreqs['recall']+=1 #Fall-Out if len(relevantItemIDs) < len(prefs): irStats['fallOut'] += (len(recommendedItems) - intersectionSize) / float( nItems - len(relevantItemIDs)) irFreqs['fallOut'] +=1 #nDCG #In computing , assume relevant IDs have relevance 1 and others 0. cumulativeGain = 0.0 idealizedGain = 0.0 for index,recommendedItem in enumerate(recommendedItems): discount = 1.0 if index == 0 else 1.0/ evaluator.log2(index+1) if recommendedItem in relevantItemIDs: cumulativeGain+=discount #Otherwise we are multiplying discount by relevance 0 so it does nothing. #Ideally results would be ordered with all relevant ones first, so this theoretical #ideal list starts with number of relevant items equal to the total number of relevant items if index < len(relevantItemIDs): idealizedGain+= discount irStats['nDCG'] += float(cumulativeGain) / idealizedGain irFreqs['nDCG'] +=1 for key in irFreqs: irStats[key] = irStats[key] / float(irFreqs[key]) sum_score = irStats['precision'] + irStats['recall'] if irStats['precision'] is not None and irStats['recall'] is not None else None irStats['f1Score'] = None if not sum_score else (2.0) * irStats['precision'] * irStats['recall'] / sum_score
def test_evaluate_IRStatsRecommenderEvaluator(self): evaluator = IRStatsRecommenderEvaluator() recommender = UserRecommender(self.model, self.similarity, self.neighbor, True) evaluationPercentage = 1.0 relevanceThreshold = None at = 2 irStats = { 'precision': 0.0, 'recall': 0.0, 'fallOut': 0.0, 'nDCG': 0.0 } irFreqs = {'precision': 0, 'recall': 0, 'fallOut': 0, 'nDCG': 0} nItems = self.model.NumItems() self.assertEquals(nItems, 6) for userID in self.model.UserIDs(): if random() < evaluationPercentage: prefs = self.model.PreferencesFromUser(userID) if len(prefs) < 2 * at: #Really not enough prefs to meaningfully evaluate the user self.assert_( userID in ['Leopoldo Pires', 'Penny Frewman', 'Maria Gabriela']) continue relevantItemIDs = [] #List some most-preferred items that would count as most relevant results relevanceThreshold = relevanceThreshold if relevanceThreshold else evaluator.computeThreshold( prefs) prefs = sorted(prefs, key=lambda x: x[1], reverse=True) self.assertEquals(max([pref[1] for pref in prefs]), prefs[0][1]) for index, pref in enumerate(prefs): if index < at: if pref[1] >= relevanceThreshold: relevantItemIDs.append(pref[0]) self.assertEquals(relevantItemIDs, [ p[0] for p in sorted([ pref for pref in prefs if pref[1] >= relevanceThreshold ], key=lambda x: x[1], reverse=True)[:at] ]) if len(relevantItemIDs) == 0: continue trainingUsers = {} for otherUserID in self.model.UserIDs(): evaluator.processOtherUser(userID, relevantItemIDs, trainingUsers, otherUserID, self.model) trainingModel = DictDataModel(trainingUsers) recommender.model = trainingModel try: prefs = trainingModel.PreferencesFromUser(userID) if not prefs: continue except: #Excluded all prefs for the user. move on. continue recommendedItems = recommender.recommend(userID, at) self.assert_(len(recommendedItems) <= 2) intersectionSize = len([ recommendedItem for recommendedItem in recommendedItems if recommendedItem in relevantItemIDs ]) #Precision if len(recommendedItems) > 0: irStats['precision'] += (intersectionSize / float(len(recommendedItems))) irFreqs['precision'] += 1 #Recall irStats['recall'] += (intersectionSize / float(len(relevantItemIDs))) irFreqs['recall'] += 1 #Fall-Out if len(relevantItemIDs) < len(prefs): irStats['fallOut'] += (len(recommendedItems) - intersectionSize) / float( nItems - len(relevantItemIDs)) irFreqs['fallOut'] += 1 #nDCG #In computing , assume relevant IDs have relevance 1 and others 0. cumulativeGain = 0.0 idealizedGain = 0.0 for index, recommendedItem in enumerate(recommendedItems): discount = 1.0 if index == 0 else 1.0 / evaluator.log2( index + 1) if recommendedItem in relevantItemIDs: cumulativeGain += discount #Otherwise we are multiplying discount by relevance 0 so it does nothing. #Ideally results would be ordered with all relevant ones first, so this theoretical #ideal list starts with number of relevant items equal to the total number of relevant items if index < len(relevantItemIDs): idealizedGain += discount irStats['nDCG'] += float(cumulativeGain) / idealizedGain irFreqs['nDCG'] += 1 for key in irFreqs: irStats[key] = irStats[key] / float(irFreqs[key]) sum_score = irStats['precision'] + irStats['recall'] if irStats[ 'precision'] is not None and irStats['recall'] is not None else None irStats['f1Score'] = None if not sum_score else ( 2.0) * irStats['precision'] * irStats['recall'] / sum_score