Beispiel #1
0
 def test_profile_evolution(self):
     """Test that a profile evolves with a normal context"""
     
     categories = list(DmozCategory.get_for_query(query="study"))      
     other_cats = list(DmozCategory.get_for_query(query="Einstein"))                
     docs = list(DocumentSurrogate.objects.filter(category__in=categories).values_list('pk', flat=True))
     #first pass: both are added
     r = update_profile.apply(args=[self.profile, 'Einstein', docs], kwargs={'lang': 'en', 'terms': True})
     p = self.profile.preferences.all()
     before_boost = {} 
     [before_boost.update({e.category.pk: e.score}) for e in p if e.category in categories]
     before_decay = {} 
     [before_decay.update({e.category.pk: e.score}) for e in p if e.category in other_cats]
     #second pass: only the docs:
     r = update_profile.apply(args=[self.profile, [], docs], kwargs={'lang': 'en', 'terms': True})
     q = self.profile.preferences.all()
     after_boost = {} 
     [after_boost.update({e.category.pk: e.score}) for e in q if e.category in categories]
     after_decay = {} 
     [after_decay.update({e.category.pk: e.score}) for e in q if e.category in other_cats]
     #the docs must've been boosted, the others, decayed:
     
     #REMEMBER: if a preferences gets to 1, it can't go farther up!! (100% is the maximum score)
     self.assertFalse([k for k,v in after_boost.items() if after_boost[k] < before_boost[k]]
                      +[k for k,v in after_decay.items() if after_decay[k] >= before_decay[k]])
Beispiel #2
0
 def test_as_dict(self):
     """Test the dictionary option"""
     results = DmozCategory.get_for_query(self.query)
     dict_results = DmozCategory.get_for_query(self.query, as_dict=True)
     
     result = dict_results.keys().sort()
     expected = [e.pk for e in results].sort()
     self.assertEqual(result, expected)
Beispiel #3
0
 def test_profile_creation_query(self):
     """Test that a profile is created with a direct query and documents"""
     categories = DmozCategory.get_for_query(query="study")      
     other_cats = DmozCategory.get_for_query(query="Einstein")                
     docs = list(DocumentSurrogate.objects.filter(category__in=categories).values_list('pk', flat=True))
     
     r = update_profile.apply(args=[self.profile, 'Einstein', docs], kwargs={'lang': 'en', 'terms': True})
     
     self.assert_(self._check_expected_profile(self.profile, itertools.chain(categories, other_cats)))
Beispiel #4
0
 def test_profile_expansion(self):
     """Test that a profile behaves properly with new preferences"""        
     categories = DmozCategory.get_for_query(query="study")      
     other_cats = DmozCategory.get_for_query(query="Einstein")                
     docs = list(DocumentSurrogate.objects.filter(category__in=categories).values_list('pk', flat=True))
     #first pass: both are added
     r = update_profile.apply(args=[self.profile, 'Einstein'], kwargs={'lang': 'en', 'terms': True})
     before = self.profile.preferences.count()
     r = update_profile.apply(args=[self.profile, [], docs], kwargs={'lang': 'en', 'terms': True})
     after = self.profile.preferences.count()
     
     self.assert_(after > before)
Beispiel #5
0
 def test_query(self):
     """Test that a query returns an iterable of categories"""
             
     results = DmozCategory.get_for_query(self.query)
     self.assert_((len(results)
                   and hasattr(results, '__iter__')                      
                   and isinstance(results[0], DmozCategory)))
Beispiel #6
0
 def test_category_list(self):
     """Test that when given ambiguous queries the classifier returns a list of candidates"""        
     results = sorted([e.pk for e in DmozCategory.get_for_query(self.query)])
     expected = DmozCategory.objects.filter(
                                 title__in=self.query_candidates).order_by('pk').values_list('pk', flat=True)
                          
     self.assertEqual(results, list(expected))
Beispiel #7
0
 def test_classify_terms_spanish(self):
     """Test that a profile is created with a direct query in spanish"""
     
     categories = DmozCategory.get_for_query(query="Einstein", lang='es')                         
     r = update_profile.apply(args=[self.profile, 'Einstein', []], kwargs={'lang': 'es', 'terms': True})
     
     self.assert_(self._check_expected_profile(self.profile, categories))
Beispiel #8
0
 def test_build_query_english(self):
     """Test that a profile is created given an english context"""
     categories = DmozCategory.get_for_query(query="Einstein", lang='en')
     ctx = """Einstein himself is well known for rejecting some of the claims of quantum mechanics."""                 
     r = update_profile.apply(args=[self.profile, ctx, []], kwargs={'lang': 'en', 'terms': False})
     
     self.assert_(self._check_expected_profile(self.profile, categories))
Beispiel #9
0
 def test_classify_only_docs(self):
     """Test that a profile is created with only documents"""
     categories = DmozCategory.get_for_query(query="study")
     docs = list(DocumentSurrogate.objects.filter(category__in=categories).values_list('pk', flat=True))
     
     r = update_profile.apply(args=[self.profile, [], docs])
     
     self.assert_(self._check_expected_profile(self.profile, categories))
Beispiel #10
0
 def test_build_query_spanish(self):
     """Test that a profile is created given a spanish context"""
     
     categories = DmozCategory.get_for_query(query="Einstein", lang='es')
     ctx = """El mismo Einstein es conocido por haber rechazado algunas de las demandas de la mecánica cuántica"""                 
     r = update_profile.apply(args=[self.profile, ctx, []], kwargs={'lang': 'es', 'terms': False})
     
     self.assert_(self._check_expected_profile(self.profile, categories))
Beispiel #11
0
 def test_spanish_query(self):
     """Test that a query in spanish works"""
     
     result = list(DmozCategory.get_for_query(self.exact_queries['es'], 'es'))
     if len(result) != 1:
         self.fail('Only expecting one category!')
     resultC = result[0]
     self.assertEqual(resultC, DmozCategory.objects.get(title=self.exact_candidate))
Beispiel #12
0
 def test_english_query(self):
     """Test that a query for a category in english works
        
        Must return the quantum physics category and only that one!
     """
     result = list(DmozCategory.get_for_query(self.exact_queries['en'], 'en'))
     if len(result) != 1:
         self.fail('Only expecting one category!')
     resultC = result[0]
     self.assertEqual(resultC, DmozCategory.objects.get(title=self.exact_candidate))
Beispiel #13
0
 def test_empty_query(self):
     """Test that no categories are returned for a non-existant word"""
     self.assertFalse(DmozCategory.get_for_query('lorem ipsum'))
Beispiel #14
0
 def test_empty_query_as_dict(self):
     """Test that the dict option returns an empty iterable if an empty query is ensued"""
     self.assertFalse(DmozCategory.get_for_query('lorem ipsum', as_dict=True))
Beispiel #15
0
 def test_max_results(self):
     """Check that the max results constraint is respected"""
     self.assertEqual(len(DmozCategory.get_for_query(self.query, max_results=1)), 1)
Beispiel #16
0
 def test_score(self):
     """Test that an important candidate receives a high score"""
     results = DmozCategory.get_for_query(self.query)
     self.assert_(results[0].relative_weight >= 0.98)
Beispiel #17
0
 def test_score_range(self):
     """Test that scores are numbers in [0,100]"""
     results = DmozCategory.get_for_query(self.query)
     self.assertFalse([e for e in results if (e.relative_weight > 1 or e.relative_weight < 0)])
Beispiel #18
0
 def test_classify_terms_incorrect_lang(self):
     """Test that a profile is created with the default language if a given one is not supported"""
     categories = DmozCategory.get_for_query(query="Einstein", lang='en')                         
     r = update_profile.apply(args=[self.profile, 'Einstein', []], kwargs={'lang': 'fr', 'terms': True})
     
     self.assert_(self._check_expected_profile(self.profile, categories))
Beispiel #19
0
 def test_invalid_lang_query(self):
     """Test that, given an invalid language code, the method defaults to the base lang (english)"""
     results = [e.pk for e in DmozCategory.get_for_query(self.query, lang='fr')].sort()
     expected = [e.pk for e in DmozCategory.get_for_query(self.query)].sort()
     self.assertEqual(results, expected)
Beispiel #20
0
 def test_weighting(self):
     """Test that the categories receive a score"""
     results = DmozCategory.get_for_query(self.query)
     self.assert_(hasattr(results[0], 'relative_weight'))
Beispiel #21
0
def update_profile(profile, context, docs, lang='en', terms=True, **kwargs):
    """Update a profile with a spreading activation algorithm: determine the concepts in which the user might be interested, 
       save the session and update the activation values, proceeding then to update the profile itself
       
       Args:
           profile: the client user
           context: the last context terms or text 
           docs: the ids of the documents of our database the user found interesting
           lang: the language of the user
           terms: whether the context is already a string of index terms or a full text                     
    """
    #build the context list:
    #context = context + list(DocumentSurrogate.)
    lang = lang if lang in [e[0] for e in settings.LANGUAGES] else 'en'
    #STEP 0: build the concepts set and set their activation values:    
    CON = {}      
    if not terms and context:
        context = build_query(context, language=lang)
    if not hasattr(context, '__iter__'):
        context = [context,]
    #Populate the concepts list with a dictionary of the form {concept: similarity}    
    for d in context:             
        CON.update(DmozCategory.get_for_query(d, lang, as_dict=True))
    for d in DocumentSurrogate.objects.filter(pk__in=docs).values_list('category', flat=True).iterator():
        #TODO: should I compute the document's summary similarity to its alleged category?
        CON.update({d:1.0})
    #logging.debug("Concepts gathered %s" % CON)
    #Spreading: add to the interest list the attenuated weight of it's ancestors:    
    for c in CON.keys():        
        curr_concept = c
        parent = DmozCategory.objects.filter(pk=curr_concept).values_list('parent', flat=True)[0]
        while parent:
            #multiple children of a parent might be in CON, ensure that the maximum score is the one that survives
            #by selecting the maximum each time
            ch_weight= DmozCategory.objects.filter(pk=curr_concept).values_list('weight', flat=True)[0]                     
            CON.update({parent: max(CON.get(parent, 0.0), CON[curr_concept] * ch_weight)})
            curr_concept = parent
            parent = DmozCategory.objects.filter(pk=curr_concept).values_list('parent', flat=True)[0]                    
    #logging.debug("After propagation: %s" % CON)
    #STEP 2: Evolve the profile
    #Use linear combination to update
    existing_preferences = []    
    #logging.debug("Profile before update: %s" % profile.preferences.all())
    for preference in profile.preferences.iterator():
        #if the preference is not in this session, decay
        ctg = preference.category.pk
        if not ctg in CON:
            preference.score = DECAY*preference.score
        else: #it is, augment:
            preference.score = DECAY*preference.score + (1-DECAY)*CON[ctg]
        preference.save()
        #add the preference to the set of existing ones:
        existing_preferences += [ctg,] 
            
    #determine which preferences to add to the profile:
    to_add = set(CON.keys()) - set(existing_preferences)
    for newcat in to_add:
        #pref = ClientPreference(category=DmozCategory.objects.get(pk=c), score=CON[newcat], user=profile)
        #DO NOT store zero weighted preferences:
        if CON[newcat]:
            new_pref = ClientPreference(category_id=newcat, score=CON[newcat], user=profile)
            new_pref.save()    
    #logging.debug("Profile after update: %s" %profile.preferences.all())
    return True