Example #1
0
 def tfidf_profile(self,items_repository,size,content_filter):
     """
     Return the most relevant tags for the user list of packages based on
     the sublinear tfidf weight of packages' tags.
     """
     docs = data.axi_search_pkgs(items_repository,self.pkg_profile)
     #weights = data.tfidf_plus(items_repository,docs,content_filter)
     weights = data.tfidf_weighting(items_repository,docs,content_filter)
     # Eliminate duplicated stemmed term
     profile = self._eliminate_duplicated([w[0] for w in weights],size)
     return profile
Example #2
0
 def eset_profile(self,items_repository,size,content_filter):
     """
     Return most relevant tags for a list of packages.
     """
     # Store package documents in a relevant set
     enquire = xapian.Enquire(items_repository)
     docs = data.axi_search_pkgs(items_repository,self.pkg_profile)
     rset_packages = xapian.RSet()
     for d in docs:
         rset_packages.add_document(d.docid)
     # Get expanded query terms (statistically good differentiators)
     eset_tags = enquire.get_eset(size*2,rset_packages,
                                  xapian.Enquire.INCLUDE_QUERY_TERMS,
                                  1,content_filter)
     # Eliminate duplicated stemmed term
     profile = self._eliminate_duplicated([res.term for res in eset_tags],size)
     return profile
def generate_all_terms_tfidf():
    global user_tfidf_weights

    axipath = os.path.expanduser("~/.app-recommender/axi_desktopapps/")
    axi_index = xapian.Database(axipath)

    dpkg_output = commands.getoutput('apt-mark showmanual')
    pkgs = [pkg for pkg in dpkg_output.splitlines()
            if not pkg.startswith('lib')]

    docs = data.axi_search_pkgs(axi_index, pkgs)

    tags_weights = data.tfidf_weighting(axi_index, docs,
                                        FilterTag(0), time_context=0)
    description_weights = (data.tfidf_weighting(axi_index, docs,
                           FilterDescription(), time_context=0))

    user_tfidf_weights = dict(tags_weights + description_weights)