Example #1
0
 def __iter__(self):
     for (i, article_id1) in self.mapped_interests:
         doc = []
         ranks = utils.get_article_similarity_ranks(article_id1, 2000).items()
         for (article_id2, rank) in ranks:
             if article_id2 in self.dictionary.token2id:
                 id = self.dictionary.token2id[article_id2]
                 score = 1.0 / (math.log(rank + 5) / math.log(2))
                 doc.append((id, score))
         yield doc
Example #2
0
def make_doc(interest, dictionary):
    article_id1 = utils.get_article_id_for_interest(interest)
    if not article_id1:
        return None
    doc = []
    ranks = utils.get_article_similarity_ranks(article_id1, 2000).items()
    for (article_id2, rank) in ranks:
        if article_id2 in dictionary.token2id:
            id = dictionary.token2id[article_id2]
            score = 1.0 / (math.log(rank + 5) / math.log(2))
            doc.append((id, score))
    return doc
Example #3
0
    def build_dict(self):
        # force interest articles into resultset
        #article_doc = self.mapped_interests.values()
        #for i in range(5):
            #self.dictionary.doc2bow(article_doc, True)

        for (i, article_id) in self.mapped_interests:
            doc = list(utils.get_article_similarity_ranks(article_id, 2000).keys())
            self.dictionary.doc2bow(doc, True)

        self.dictionary.filter_extremes()
        self.dictionary.save_as_text('svd/dictionary.txt')
Example #4
0
def build_article_adjacencies(interests):
    article_sims = collections.defaultdict(list)
    for i in interests:
        article_id = utils.get_article_id_for_interest(i)
        if not article_id:
            continue
        index1 = id_to_index(article_id)
        ranks = utils.get_article_similarity_ranks(article_id, 2000).items()
        ranks.sort(key=lambda pair: pair[1])
        for (article_id2, rank) in ranks:
            article_sims[index1].append(article_id2)

    return article_sims
def build_article_adjacencies(interests):
    article_sims = collections.defaultdict(list)
    for i in interests:
        article_id = utils.get_article_id_for_interest(i)
        if not article_id:
            continue
        index1 = id_to_index(article_id)
        ranks = utils.get_article_similarity_ranks(article_id, 2000).items()
        ranks.sort(key=lambda pair: pair[1])
        for (article_id2, rank) in ranks:
            article_sims[index1].append(article_id2)

    return article_sims