예제 #1
0
 def test_mostSimilarUserIDs(self):
     userID = 'Marcel Caraciolo'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              True)
     self.assertEquals(
         ['Leopoldo Pires', 'Steve Gates', 'Lorena Abreu', 'Penny Frewman'],
         recSys.mostSimilarUserIDs(userID, 4))
예제 #2
0
 def test_user_no_preference_mostSimilarUserIDs(self):
     userID = "Maria Gabriela"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, True)
     self.assertEquals(
         ["Leopoldo Pires", "Lorena Abreu", "Luciana Nunes", "Marcel Caraciolo"],
         recSys.mostSimilarUserIDs(userID, 4),
     )
예제 #3
0
 def test_local_estimatePreference(self):
     userID = "Marcel Caraciolo"
     itemID = "Superman Returns"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, True)
     self.assertAlmostEquals(
         3.5, recSys.estimatePreference(userID=userID, similarity=self.similarity, itemID=itemID)
     )
예제 #4
0
 def test_local_not_existing_capper_False_estimatePreference(self):
     userID = "Leopoldo Pires"
     itemID = "You, Me and Dupree"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, False)
     self.assertAlmostEquals(
         2.065394689, recSys.estimatePreference(userID=userID, similarity=self.similarity, itemID=itemID)
     )
예제 #5
0
 def test_semi_watched_allOtherItems(self):
     userID = 'Leopoldo Pires'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              True)
     nearestN = self.neighbor.userNeighborhood(userID)
     self.assertEquals(['Just My Luck', 'You, Me and Dupree'],
                       recSys.allOtherItems(userID, nearestN))
예제 #6
0
    def test_evaluate_RMSRecommenderEvaluator(self):
        evaluator = RMSRecommenderEvaluator()

        recommender = UserRecommender(self.model, self.similarity,
                                      self.neighbor, True)
        evaluationPercentage = 1.0
        trainingPercentage = 0.7

        numUsers = self.model.NumUsers()
        trainingUsers = {}
        testUserPrefs = {}
        self.total = 0
        self.diffs = 0.0

        for userID in self.model.UserIDs():
            if random() < evaluationPercentage:
                evaluator.processOneUser(trainingPercentage, trainingUsers,
                                         testUserPrefs, userID, self.model)

        total_training = sum([
            len([pref for pref in prefs])
            for user, prefs in trainingUsers.iteritems()
        ])
        total_testing = sum([
            len([pref for pref in prefs])
            for user, prefs in testUserPrefs.iteritems()
        ])

        #self.assertAlmostEquals(total_training/float(total_training+total_testing), 0.7)
        #self.assertAlmostEquals(total_testing/float(total_training+total_testing), 0.3)

        trainingModel = DictDataModel(trainingUsers)

        self.assertEquals(sorted(trainingModel.UserIDs()),
                          sorted([user for user in trainingUsers]))

        recommender.model = trainingModel

        self.assertEquals(recommender.model, trainingModel)

        for userID, prefs in testUserPrefs.iteritems():
            estimatedPreference = None
            for pref in prefs:
                try:
                    estimatedPreference = recommender.estimatePreference(
                        userID=userID, similarity=self.similarity, itemID=pref)
                except:
                    pass
                if estimatedPreference is not None:
                    estimatedPreference = evaluator.capEstimatePreference(
                        estimatedPreference)
                    self.assert_(
                        estimatedPreference <= evaluator.maxPreference
                        and estimatedPreference >= evaluator.minPreference)
                    diff = prefs[pref] - estimatedPreference
                    self.diffs += (diff * diff)
                    self.total += 1

        result = sqrt(self.diffs / float(self.total))
예제 #7
0
 def test_user_no_preference_mostSimilarUserIDs(self):
     userID = 'Maria Gabriela'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              True)
     self.assertEquals([
         'Leopoldo Pires', 'Lorena Abreu', 'Luciana Nunes',
         'Marcel Caraciolo'
     ], recSys.mostSimilarUserIDs(userID, 4))
예제 #8
0
 def test_non_watched_allOtherItems(self):
     userID = 'Maria Gabriela'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              True)
     nearestN = self.neighbor.userNeighborhood(userID)
     self.assertEquals([
         'Lady in the Water', 'Snakes on a Plane', 'Just My Luck',
         'Superman Returns', 'You, Me and Dupree', 'The Night Listener'
     ], recSys.allOtherItems(userID, nearestN))
예제 #9
0
 def test_local_not_existing_rescorer_estimatePreference(self):
     userID = "Leopoldo Pires"
     itemID = "You, Me and Dupree"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, False)
     scorer = TanHScorer()
     self.assertAlmostEquals(
         2.5761016605,
         recSys.estimatePreference(userID=userID, similarity=self.similarity, itemID=itemID, rescorer=scorer),
     )
예제 #10
0
 def test_local_not_existing_capper_False_estimatePreference(self):
     userID = 'Leopoldo Pires'
     itemID = 'You, Me and Dupree'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              False)
     self.assertAlmostEquals(
         2.065394689,
         recSys.estimatePreference(userID=userID,
                                   similarity=self.similarity,
                                   itemID=itemID))
예제 #11
0
 def test_local_estimatePreference(self):
     userID = 'Marcel Caraciolo'
     itemID = 'Superman Returns'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              True)
     self.assertAlmostEquals(
         3.5,
         recSys.estimatePreference(userID=userID,
                                   similarity=self.similarity,
                                   itemID=itemID))
예제 #12
0
 def test_local_not_existing_rescorer_estimatePreference(self):
     userID = 'Leopoldo Pires'
     itemID = 'You, Me and Dupree'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              False)
     scorer = TanHScorer()
     self.assertAlmostEquals(
         2.5761016605,
         recSys.estimatePreference(userID=userID,
                                   similarity=self.similarity,
                                   itemID=itemID,
                                   rescorer=scorer))
예제 #13
0
 def test_non_watched_allOtherItems(self):
     userID = "Maria Gabriela"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, True)
     nearestN = self.neighbor.userNeighborhood(userID)
     self.assertEquals(
         [
             "Lady in the Water",
             "Snakes on a Plane",
             "Just My Luck",
             "Superman Returns",
             "You, Me and Dupree",
             "The Night Listener",
         ],
         recSys.allOtherItems(userID, nearestN),
     )
예제 #14
0
파일: test_evaluator.py 프로젝트: ANB2/crab
    def test_evaluate_RMSRecommenderEvaluator(self):
        evaluator = RMSRecommenderEvaluator()
        
        recommender  = UserRecommender(self.model,self.similarity,self.neighbor,True)
        evaluationPercentage = 1.0
        trainingPercentage = 0.7
    
        numUsers = self.model.NumUsers()
        trainingUsers = {}
        testUserPrefs = {}
        self.total = 0
        self.diffs = 0.0

        for userID in self.model.UserIDs():
            if random() < evaluationPercentage:
                evaluator.processOneUser(trainingPercentage,trainingUsers,testUserPrefs,userID,self.model)        

        total_training =  sum([ len([pref  for pref in prefs]) for user,prefs in trainingUsers.iteritems()])
        total_testing =  sum([ len([pref  for pref in prefs]) for user,prefs in testUserPrefs.iteritems()])
        
        #self.assertAlmostEquals(total_training/float(total_training+total_testing), 0.7)
        #self.assertAlmostEquals(total_testing/float(total_training+total_testing), 0.3)
        
        
        trainingModel = DictDataModel(trainingUsers)
        
        self.assertEquals(sorted(trainingModel.UserIDs()), sorted([user for user in trainingUsers]))

        recommender.model = trainingModel

        self.assertEquals(recommender.model,trainingModel)
        
        for userID,prefs in testUserPrefs.iteritems():
            estimatedPreference = None
            for pref in prefs:
                try:
                    estimatedPreference = recommender.estimatePreference(userID=userID,similarity=self.similarity,itemID=pref)
                except:
                    pass
                if estimatedPreference is not None:
                    estimatedPreference = evaluator.capEstimatePreference(estimatedPreference)
                    self.assert_(estimatedPreference <= evaluator.maxPreference and estimatedPreference >= evaluator.minPreference)
                    diff =  prefs[pref] - estimatedPreference
                    self.diffs+= (diff * diff)
                    self.total += 1
        
  
        result = sqrt(self.diffs / float(self.total))
예제 #15
0
 def test_create_UserBasedRecommender(self):
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              True)
     self.assertEquals(recSys.similarity, self.similarity)
     self.assertEquals(recSys.capper, True)
     self.assertEquals(recSys.neighborhood, self.neighbor)
     self.assertEquals(recSys.model, self.model)
예제 #16
0
 def test_evaluate_at_not_enough_IRStatsRecommenderEvaluator(self):
     evaluator = IRStatsRecommenderEvaluator()
     recommender = UserRecommender(self.model, self.similarity,
                                   self.neighbor, True)
     result = evaluator.evaluate(recommender, self.model, 4, 1.0)
     self.assertEquals(
         result, {
             'nDCG': None,
             'recall': None,
             'f1Score': None,
             'precision': None,
             'fallOut': None
         })
예제 #17
0
 def test_mostSimilarUserIDs(self):
     userID = "Marcel Caraciolo"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, True)
     self.assertEquals(
         ["Leopoldo Pires", "Steve Gates", "Lorena Abreu", "Penny Frewman"], recSys.mostSimilarUserIDs(userID, 4)
     )
예제 #18
0
	def test_mostSimilarUserIDs(self):
		userID = 'Marcel Caraciolo'
		recSys = UserRecommender(self.model,self.similarity,self.neighbor,True)
		self.assertEquals(['Leopoldo Pires', 'Steve Gates', 'Lorena Abreu', 'Penny Frewman'],recSys.mostSimilarUserIDs(userID,4))	
예제 #19
0
 def test_semi_watched_allOtherItems(self):
     userID = "Leopoldo Pires"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, True)
     nearestN = self.neighbor.userNeighborhood(userID)
     self.assertEquals(["Just My Luck", "You, Me and Dupree"], recSys.allOtherItems(userID, nearestN))
예제 #20
0
 def test_User_IRStatsRecommenderEvaluator(self):
     evaluator = IRStatsRecommenderEvaluator()
     recommender = UserRecommender(self.model, self.similarity,
                                   self.neighbor, True)
     result = evaluator.evaluate(recommender, self.model, 2, 1.0)
예제 #21
0
 def test_empty_mostSimilarUserIDs(self):
     userID = 'Maria Gabriela'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              True)
     self.assertEquals([], recSys.mostSimilarUserIDs(userID, 0))
예제 #22
0
 def test_all_watched_allOtherItems(self):
     userID = "Luciana Nunes"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, True)
     nearestN = self.neighbor.userNeighborhood(userID)
     self.assertEquals([], recSys.allOtherItems(userID, nearestN))
예제 #23
0
    def test_evaluate_IRStatsRecommenderEvaluator(self):
        evaluator = IRStatsRecommenderEvaluator()

        recommender = UserRecommender(self.model, self.similarity,
                                      self.neighbor, True)
        evaluationPercentage = 1.0
        relevanceThreshold = None
        at = 2

        irStats = {
            'precision': 0.0,
            'recall': 0.0,
            'fallOut': 0.0,
            'nDCG': 0.0
        }
        irFreqs = {'precision': 0, 'recall': 0, 'fallOut': 0, 'nDCG': 0}

        nItems = self.model.NumItems()
        self.assertEquals(nItems, 6)

        for userID in self.model.UserIDs():
            if random() < evaluationPercentage:
                prefs = self.model.PreferencesFromUser(userID)
                if len(prefs) < 2 * at:
                    #Really not enough prefs to meaningfully evaluate the user
                    self.assert_(
                        userID in
                        ['Leopoldo Pires', 'Penny Frewman', 'Maria Gabriela'])
                    continue

                relevantItemIDs = []

                #List some most-preferred items that would count as most relevant results
                relevanceThreshold = relevanceThreshold if relevanceThreshold else evaluator.computeThreshold(
                    prefs)

                prefs = sorted(prefs, key=lambda x: x[1], reverse=True)

                self.assertEquals(max([pref[1] for pref in prefs]),
                                  prefs[0][1])

                for index, pref in enumerate(prefs):
                    if index < at:
                        if pref[1] >= relevanceThreshold:
                            relevantItemIDs.append(pref[0])

                self.assertEquals(relevantItemIDs, [
                    p[0] for p in sorted([
                        pref for pref in prefs if pref[1] >= relevanceThreshold
                    ],
                                         key=lambda x: x[1],
                                         reverse=True)[:at]
                ])

                if len(relevantItemIDs) == 0:
                    continue

                trainingUsers = {}
                for otherUserID in self.model.UserIDs():
                    evaluator.processOtherUser(userID, relevantItemIDs,
                                               trainingUsers, otherUserID,
                                               self.model)

                trainingModel = DictDataModel(trainingUsers)

                recommender.model = trainingModel

                try:
                    prefs = trainingModel.PreferencesFromUser(userID)
                    if not prefs:
                        continue
                except:
                    #Excluded all prefs for the user. move on.
                    continue

                recommendedItems = recommender.recommend(userID, at)

                self.assert_(len(recommendedItems) <= 2)

                intersectionSize = len([
                    recommendedItem for recommendedItem in recommendedItems
                    if recommendedItem in relevantItemIDs
                ])

                #Precision
                if len(recommendedItems) > 0:
                    irStats['precision'] += (intersectionSize /
                                             float(len(recommendedItems)))
                    irFreqs['precision'] += 1

                #Recall
                irStats['recall'] += (intersectionSize /
                                      float(len(relevantItemIDs)))
                irFreqs['recall'] += 1

                #Fall-Out
                if len(relevantItemIDs) < len(prefs):
                    irStats['fallOut'] += (len(recommendedItems) -
                                           intersectionSize) / float(
                                               nItems - len(relevantItemIDs))
                    irFreqs['fallOut'] += 1

                #nDCG
                #In computing , assume relevant IDs have relevance 1 and others 0.
                cumulativeGain = 0.0
                idealizedGain = 0.0
                for index, recommendedItem in enumerate(recommendedItems):
                    discount = 1.0 if index == 0 else 1.0 / evaluator.log2(
                        index + 1)
                    if recommendedItem in relevantItemIDs:
                        cumulativeGain += discount
                    #Otherwise we are multiplying discount by relevance 0 so it does nothing.
                    #Ideally results would be ordered with all relevant ones first, so this theoretical
                    #ideal list starts with number of relevant items equal to the total number of relevant items
                    if index < len(relevantItemIDs):
                        idealizedGain += discount
                irStats['nDCG'] += float(cumulativeGain) / idealizedGain
                irFreqs['nDCG'] += 1

        for key in irFreqs:
            irStats[key] = irStats[key] / float(irFreqs[key])

        sum_score = irStats['precision'] + irStats['recall'] if irStats[
            'precision'] is not None and irStats['recall'] is not None else None
        irStats['f1Score'] = None if not sum_score else (
            2.0) * irStats['precision'] * irStats['recall'] / sum_score
예제 #24
0
	def test_user_no_preference_mostSimilarUserIDs(self):
		userID = 'Maria Gabriela'
		recSys = UserRecommender(self.model,self.similarity,self.neighbor,True)
		self.assertEquals(['Leopoldo Pires', 'Lorena Abreu', 'Luciana Nunes', 'Marcel Caraciolo'],recSys.mostSimilarUserIDs(userID,4))
예제 #25
0
 def test_empty_mostSimilarUserIDs(self):
     userID = "Maria Gabriela"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, True)
     self.assertEquals([], recSys.mostSimilarUserIDs(userID, 0))
예제 #26
0
 def test_recommend(self):
     userID = "Leopoldo Pires"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, False)
     self.assertEquals(["Just My Luck", "You, Me and Dupree"], recSys.recommend(userID, 4))
예제 #27
0
 def test_full_recommend(self):
     userID = "Maria Gabriela"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, False)
     self.assertEquals([], recSys.recommend(userID, 4))
예제 #28
0
 def test_User_AvgDistanceRecSys(self):
     evaluator = AverageAbsoluteDifferenceRecommenderEvaluator()
     recommender = UserRecommender(self.model, self.similarity,
                                   self.neighbor, True)
     result = evaluator.evaluate(recommender, self.model, 0.7, 1.0)
예제 #29
0
 def test_full_recommend(self):
     userID = 'Maria Gabriela'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              False)
     self.assertEquals([], recSys.recommend(userID, 4))
예제 #30
0
 def test_semi_recommend(self):
     userID = 'Leopoldo Pires'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              False)
     self.assertEquals(['Just My Luck'], recSys.recommend(userID, 1))
예제 #31
0
 def test_all_watched_allOtherItems(self):
     userID = 'Luciana Nunes'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              True)
     nearestN = self.neighbor.userNeighborhood(userID)
     self.assertEquals([], recSys.allOtherItems(userID, nearestN))
예제 #32
0
 def test_empty_recommend(self):
     userID = "Marcel Caraciolo"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, False)
     self.assertEquals([], recSys.recommend(userID, 4))
예제 #33
0
 def test_recommend(self):
     userID = 'Leopoldo Pires'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              False)
     self.assertEquals(['Just My Luck', 'You, Me and Dupree'],
                       recSys.recommend(userID, 4))
예제 #34
0
 def test_semi_recommend(self):
     userID = "Leopoldo Pires"
     recSys = UserRecommender(self.model, self.similarity, self.neighbor, False)
     self.assertEquals(["Just My Luck"], recSys.recommend(userID, 1))
예제 #35
0
 def test_empty_recommend(self):
     userID = 'Marcel Caraciolo'
     recSys = UserRecommender(self.model, self.similarity, self.neighbor,
                              False)
     self.assertEquals([], recSys.recommend(userID, 4))
예제 #36
0
파일: test_evaluator.py 프로젝트: ANB2/crab
    def test_evaluate_IRStatsRecommenderEvaluator(self):
        evaluator = IRStatsRecommenderEvaluator()

        recommender  = UserRecommender(self.model,self.similarity,self.neighbor,True)
        evaluationPercentage = 1.0
        relevanceThreshold = None
        at = 2
              
        irStats = {'precision': 0.0, 'recall': 0.0, 'fallOut': 0.0, 'nDCG': 0.0}
        irFreqs = {'precision': 0, 'recall': 0, 'fallOut': 0, 'nDCG': 0}
        
        nItems = self.model.NumItems()
        self.assertEquals(nItems,6)


        for userID in self.model.UserIDs():
            if random() < evaluationPercentage:
                prefs = self.model.PreferencesFromUser(userID)
                if len(prefs)  < 2 * at:
                    #Really not enough prefs to meaningfully evaluate the user
                    self.assert_(userID in ['Leopoldo Pires', 'Penny Frewman', 'Maria Gabriela'])
                    continue 
                
                relevantItemIDs = []
                
                #List some most-preferred items that would count as most relevant results
                relevanceThreshold =  relevanceThreshold if relevanceThreshold else  evaluator.computeThreshold(prefs)
                
                prefs = sorted(prefs,key=lambda x: x[1], reverse=True)
                
                self.assertEquals(max([pref[1] for pref in prefs]), prefs[0][1])
                
                for index,pref in enumerate(prefs):
                    if index < at:
                        if pref[1] >= relevanceThreshold:
                            relevantItemIDs.append(pref[0])
                
                self.assertEquals(relevantItemIDs, [ p[0] for p in sorted([ pref for pref in prefs if pref[1] >= relevanceThreshold],key=lambda x: x[1], reverse=True)[:at] ] )    
                

                if len(relevantItemIDs) == 0:
                    continue
                
                trainingUsers = {}
                for otherUserID in self.model.UserIDs():
                    evaluator.processOtherUser(userID,relevantItemIDs,trainingUsers,otherUserID,self.model)
                
                

                trainingModel = DictDataModel(trainingUsers)
                
                recommender.model = trainingModel
                
                try:
                    prefs = trainingModel.PreferencesFromUser(userID)
                    if not prefs:
                        continue
                except:
                    #Excluded all prefs for the user. move on.
                    continue
                
                recommendedItems = recommender.recommend(userID,at)


                self.assert_(len(recommendedItems)<= 2)

                intersectionSize = len([ recommendedItem  for recommendedItem in recommendedItems if recommendedItem in relevantItemIDs])
                
                
                #Precision
                if len(recommendedItems) > 0:
                    irStats['precision']+= (intersectionSize / float(len(recommendedItems)))
                    irFreqs['precision']+=1
                    
                #Recall
                irStats['recall'] += (intersectionSize/ float(len(relevantItemIDs)))
                irFreqs['recall']+=1
                
                #Fall-Out
                if len(relevantItemIDs) < len(prefs):
                    irStats['fallOut'] +=   (len(recommendedItems)  - intersectionSize) / float( nItems - len(relevantItemIDs))
                    irFreqs['fallOut'] +=1

                    
                #nDCG
                #In computing , assume relevant IDs have relevance 1 and others 0.
                cumulativeGain = 0.0
                idealizedGain = 0.0
                for index,recommendedItem in enumerate(recommendedItems):
                    discount =  1.0 if index == 0 else 1.0/ evaluator.log2(index+1)
                    if recommendedItem in relevantItemIDs:
                        cumulativeGain+=discount
                    #Otherwise we are multiplying discount by relevance 0 so it does nothing.
                    #Ideally results would be ordered with all relevant ones first, so this theoretical
                    #ideal list starts with number of relevant items equal to the total number of relevant items
                    if index < len(relevantItemIDs):
                        idealizedGain+= discount
                irStats['nDCG'] +=  float(cumulativeGain) / idealizedGain
                irFreqs['nDCG'] +=1
        
        for key in irFreqs:
            irStats[key] = irStats[key] / float(irFreqs[key])

        sum_score = irStats['precision'] + irStats['recall']  if irStats['precision'] is not None and irStats['recall'] is not None else None
        irStats['f1Score'] =   None   if not sum_score else (2.0) * irStats['precision'] * irStats['recall'] / sum_score 
예제 #37
0
	def test_non_watched_allOtherItems(self):
		userID = 'Maria Gabriela'
		recSys = UserRecommender(self.model,self.similarity,self.neighbor,True)
		nearestN = self.neighbor.userNeighborhood(userID)
		self.assertEquals(['Lady in the Water', 'Snakes on a Plane', 'Just My Luck', 'Superman Returns', 
							'You, Me and Dupree', 'The Night Listener'],recSys.allOtherItems(userID,nearestN))