def tfidf_profile(self, items_repository, size, content_filter,
                      time_context=0):
        """
        Return the most relevant tags for the user list of packages based on
        the sublinear tfidf weight of packages' tags.
        """

        docs = data.axi_search_pkgs(items_repository, self.pkg_profile)
        # weights = data.tfidf_plus(items_repository,docs,content_filter)
        weights = data.tfidf_weighting(items_repository, docs, content_filter,
                                       time_context=time_context)
        # Eliminate duplicated stemmed term
        profile = self._eliminate_duplicated([w[0] for w in weights], size)
        return profile
 def eset_profile(self, items_repository, size, content_filter):
     """
     Return most relevant tags for a list of packages.
     """
     # Store package documents in a relevant set
     enquire = xapian.Enquire(items_repository)
     docs = data.axi_search_pkgs(items_repository, self.pkg_profile)
     rset_packages = xapian.RSet()
     for d in docs:
         rset_packages.add_document(d.docid)
     # Get expanded query terms (statistically good differentiators)
     eset_tags = enquire.get_eset(size * 2, rset_packages,
                                  xapian.Enquire.INCLUDE_QUERY_TERMS,
                                  1, content_filter)
     # Eliminate duplicated stemmed term
     profile = self._eliminate_duplicated([res.term for res in eset_tags],
                                          size)
     return profile
Exemple #3
0
 def eset_profile(self, items_repository, size, content_filter):
     """
     Return most relevant tags for a list of packages.
     """
     # Store package documents in a relevant set
     enquire = xapian.Enquire(items_repository)
     docs = data.axi_search_pkgs(items_repository, self.pkg_profile)
     rset_packages = xapian.RSet()
     for d in docs:
         rset_packages.add_document(d.docid)
     # Get expanded query terms (statistically good differentiators)
     eset_tags = enquire.get_eset(size * 2, rset_packages,
                                  xapian.Enquire.INCLUDE_QUERY_TERMS, 1,
                                  content_filter)
     # Eliminate duplicated stemmed term
     profile = self._eliminate_duplicated([res.term for res in eset_tags],
                                          size)
     return profile
Exemple #4
0
    def tfidf_profile(self,
                      items_repository,
                      size,
                      content_filter,
                      time_context=0):
        """
        Return the most relevant tags for the user list of packages based on
        the sublinear tfidf weight of packages' tags.
        """

        docs = data.axi_search_pkgs(items_repository, self.pkg_profile)
        # weights = data.tfidf_plus(items_repository,docs,content_filter)
        weights = data.tfidf_weighting(items_repository,
                                       docs,
                                       content_filter,
                                       time_context=time_context)
        # Eliminate duplicated stemmed term
        profile = self._eliminate_duplicated([w[0] for w in weights], size)
        return profile