def run(self): """ Fills out the information in the post object and returns a list of URLs for the spider to process """ (num_likes, num_comments) = self.parseNotes(self.post.url) self.post.num_likes = num_likes self.post.num_comments = num_comments self.post.num_reblogs = sum(DictTools.convertToHistogram(self.reblog_map).values()) self.post.num_notes = self.post.num_likes + self.post.num_comments + self.post.num_reblogs for link_lists in self.reblog_map.values(): self.all_outgoing_urls.extend(link_lists) print "found %s outgoing links\n" % len(self.all_outgoing_urls)
def topPosters(self, N=10): """ returns the top N 'posters' (ppl reblogged the most) for this post """ reblog_hist = DictTools.convertToHistogram(self.reblog_map) return DictTools.sortDict(reblog_hist, key=1, reverse=True)[:N]