def test_evaluate_IRStatsRecommenderEvaluator(self): evaluator = IRStatsRecommenderEvaluator() recommender = UserRecommender(self.model,self.similarity,self.neighbor,True) evaluationPercentage = 1.0 relevanceThreshold = None at = 2 irStats = {'precision': 0.0, 'recall': 0.0, 'fallOut': 0.0, 'nDCG': 0.0} irFreqs = {'precision': 0, 'recall': 0, 'fallOut': 0, 'nDCG': 0} nItems = self.model.NumItems() self.assertEquals(nItems,6) for userID in self.model.UserIDs(): if random() < evaluationPercentage: prefs = self.model.PreferencesFromUser(userID) if len(prefs) < 2 * at: #Really not enough prefs to meaningfully evaluate the user self.assert_(userID in ['Leopoldo Pires', 'Penny Frewman', 'Maria Gabriela']) continue relevantItemIDs = [] #List some most-preferred items that would count as most relevant results relevanceThreshold = relevanceThreshold if relevanceThreshold else evaluator.computeThreshold(prefs) prefs = sorted(prefs,key=lambda x: x[1], reverse=True) self.assertEquals(max([pref[1] for pref in prefs]), prefs[0][1]) for index,pref in enumerate(prefs): if index < at: if pref[1] >= relevanceThreshold: relevantItemIDs.append(pref[0]) self.assertEquals(relevantItemIDs, [ p[0] for p in sorted([ pref for pref in prefs if pref[1] >= relevanceThreshold],key=lambda x: x[1], reverse=True)[:at] ] ) if len(relevantItemIDs) == 0: continue trainingUsers = {} for otherUserID in self.model.UserIDs(): evaluator.processOtherUser(userID,relevantItemIDs,trainingUsers,otherUserID,self.model) trainingModel = DictDataModel(trainingUsers) recommender.model = trainingModel try: prefs = trainingModel.PreferencesFromUser(userID) if not prefs: continue except: #Excluded all prefs for the user. move on. continue recommendedItems = recommender.recommend(userID,at) self.assert_(len(recommendedItems)<= 2) intersectionSize = len([ recommendedItem for recommendedItem in recommendedItems if recommendedItem in relevantItemIDs]) #Precision if len(recommendedItems) > 0: irStats['precision']+= (intersectionSize / float(len(recommendedItems))) irFreqs['precision']+=1 #Recall irStats['recall'] += (intersectionSize/ float(len(relevantItemIDs))) irFreqs['recall']+=1 #Fall-Out if len(relevantItemIDs) < len(prefs): irStats['fallOut'] += (len(recommendedItems) - intersectionSize) / float( nItems - len(relevantItemIDs)) irFreqs['fallOut'] +=1 #nDCG #In computing , assume relevant IDs have relevance 1 and others 0. cumulativeGain = 0.0 idealizedGain = 0.0 for index,recommendedItem in enumerate(recommendedItems): discount = 1.0 if index == 0 else 1.0/ evaluator.log2(index+1) if recommendedItem in relevantItemIDs: cumulativeGain+=discount #Otherwise we are multiplying discount by relevance 0 so it does nothing. #Ideally results would be ordered with all relevant ones first, so this theoretical #ideal list starts with number of relevant items equal to the total number of relevant items if index < len(relevantItemIDs): idealizedGain+= discount irStats['nDCG'] += float(cumulativeGain) / idealizedGain irFreqs['nDCG'] +=1 for key in irFreqs: irStats[key] = irStats[key] / float(irFreqs[key]) sum_score = irStats['precision'] + irStats['recall'] if irStats['precision'] is not None and irStats['recall'] is not None else None irStats['f1Score'] = None if not sum_score else (2.0) * irStats['precision'] * irStats['recall'] / sum_score
def test_full_recommend(self): userID = "Maria Gabriela" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals([], recSys.recommend(userID, 4))
def test_semi_recommend(self): userID = "Leopoldo Pires" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals(["Just My Luck"], recSys.recommend(userID, 1))
def test_recommend(self): userID = "Leopoldo Pires" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals(["Just My Luck", "You, Me and Dupree"], recSys.recommend(userID, 4))
def test_empty_recommend(self): userID = "Marcel Caraciolo" recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals([], recSys.recommend(userID, 4))
def test_semi_recommend(self): userID = 'Leopoldo Pires' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals(['Just My Luck'], recSys.recommend(userID, 1))
def test_full_recommend(self): userID = 'Maria Gabriela' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals([], recSys.recommend(userID, 4))
def test_empty_recommend(self): userID = 'Marcel Caraciolo' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals([], recSys.recommend(userID, 4))
def test_recommend(self): userID = 'Leopoldo Pires' recSys = UserRecommender(self.model, self.similarity, self.neighbor, False) self.assertEquals(['Just My Luck', 'You, Me and Dupree'], recSys.recommend(userID, 4))
def test_evaluate_IRStatsRecommenderEvaluator(self): evaluator = IRStatsRecommenderEvaluator() recommender = UserRecommender(self.model, self.similarity, self.neighbor, True) evaluationPercentage = 1.0 relevanceThreshold = None at = 2 irStats = { 'precision': 0.0, 'recall': 0.0, 'fallOut': 0.0, 'nDCG': 0.0 } irFreqs = {'precision': 0, 'recall': 0, 'fallOut': 0, 'nDCG': 0} nItems = self.model.NumItems() self.assertEquals(nItems, 6) for userID in self.model.UserIDs(): if random() < evaluationPercentage: prefs = self.model.PreferencesFromUser(userID) if len(prefs) < 2 * at: #Really not enough prefs to meaningfully evaluate the user self.assert_( userID in ['Leopoldo Pires', 'Penny Frewman', 'Maria Gabriela']) continue relevantItemIDs = [] #List some most-preferred items that would count as most relevant results relevanceThreshold = relevanceThreshold if relevanceThreshold else evaluator.computeThreshold( prefs) prefs = sorted(prefs, key=lambda x: x[1], reverse=True) self.assertEquals(max([pref[1] for pref in prefs]), prefs[0][1]) for index, pref in enumerate(prefs): if index < at: if pref[1] >= relevanceThreshold: relevantItemIDs.append(pref[0]) self.assertEquals(relevantItemIDs, [ p[0] for p in sorted([ pref for pref in prefs if pref[1] >= relevanceThreshold ], key=lambda x: x[1], reverse=True)[:at] ]) if len(relevantItemIDs) == 0: continue trainingUsers = {} for otherUserID in self.model.UserIDs(): evaluator.processOtherUser(userID, relevantItemIDs, trainingUsers, otherUserID, self.model) trainingModel = DictDataModel(trainingUsers) recommender.model = trainingModel try: prefs = trainingModel.PreferencesFromUser(userID) if not prefs: continue except: #Excluded all prefs for the user. move on. continue recommendedItems = recommender.recommend(userID, at) self.assert_(len(recommendedItems) <= 2) intersectionSize = len([ recommendedItem for recommendedItem in recommendedItems if recommendedItem in relevantItemIDs ]) #Precision if len(recommendedItems) > 0: irStats['precision'] += (intersectionSize / float(len(recommendedItems))) irFreqs['precision'] += 1 #Recall irStats['recall'] += (intersectionSize / float(len(relevantItemIDs))) irFreqs['recall'] += 1 #Fall-Out if len(relevantItemIDs) < len(prefs): irStats['fallOut'] += (len(recommendedItems) - intersectionSize) / float( nItems - len(relevantItemIDs)) irFreqs['fallOut'] += 1 #nDCG #In computing , assume relevant IDs have relevance 1 and others 0. cumulativeGain = 0.0 idealizedGain = 0.0 for index, recommendedItem in enumerate(recommendedItems): discount = 1.0 if index == 0 else 1.0 / evaluator.log2( index + 1) if recommendedItem in relevantItemIDs: cumulativeGain += discount #Otherwise we are multiplying discount by relevance 0 so it does nothing. #Ideally results would be ordered with all relevant ones first, so this theoretical #ideal list starts with number of relevant items equal to the total number of relevant items if index < len(relevantItemIDs): idealizedGain += discount irStats['nDCG'] += float(cumulativeGain) / idealizedGain irFreqs['nDCG'] += 1 for key in irFreqs: irStats[key] = irStats[key] / float(irFreqs[key]) sum_score = irStats['precision'] + irStats['recall'] if irStats[ 'precision'] is not None and irStats['recall'] is not None else None irStats['f1Score'] = None if not sum_score else ( 2.0) * irStats['precision'] * irStats['recall'] / sum_score