コード例 #1
0
    def pregenerate(self):
        # overriding as we have to calculate stuff for all users
        print "Generating global TFIDF data"
        pbar = ProgressBar()
        messages = self.threads.messages_by_from_user()
        users = messages.keys()
        self.counters = []

        for user in pbar(users):
            corpus = Corpus.from_messages(messages[user], self.cache)
            corpus.process()
            self.counters.append(corpus.counter())
コード例 #2
0
    def pregenerate(self):
        # overriding as we have to calculate stuff for all users
        print "Generating global TFIDF data"
        pbar = ProgressBar()
        messages = self.threads.messages_by_from_user()
        users = messages.keys()
        self.counters = []

        for user in pbar(users):
            corpus = Corpus.from_messages(messages[user], self.cache)
            corpus.process()
            self.counters.append(corpus.counter())
コード例 #3
0
    def generate_for_user(self, user):
        corpus = Corpus.from_messages(self.threads.messages_including_from_user(user), self.cache)
        corpus.process()

        frequencies = corpus.frequencies(limit = self.WORD_LIMIT)

        # this step is slow, so don't redo it if the file already exists and the hash of frequencies
        # hasn't changed
        filename = "%s/wordcloud_%s.png" % (self.PLOTS_DIR, self.slug(user))
        freqs_hash = hashlib.md5(pickle.dumps(frequencies)).hexdigest()
        cache_key = "wordcloudgenerator_%s" % filename
        if not (os.path.exists(filename) and self.cache.get(cache_key) == freqs_hash):
            self.cache.set(cache_key, freqs_hash)
            WordCloudWrapper.save(frequencies, filename)
コード例 #4
0
    def generate_for_user(self, user):
        corpus = Corpus.from_messages(
            self.threads.messages_including_from_user(user), self.cache)
        corpus.process()

        frequencies = corpus.frequencies(limit=self.WORD_LIMIT)

        # this step is slow, so don't redo it if the file already exists and the hash of frequencies
        # hasn't changed
        filename = "%s/wordcloud_%s.png" % (self.PLOTS_DIR, self.slug(user))
        freqs_hash = hashlib.md5(pickle.dumps(frequencies)).hexdigest()
        cache_key = "wordcloudgenerator_%s" % filename
        if not (os.path.exists(filename)
                and self.cache.get(cache_key) == freqs_hash):
            self.cache.set(cache_key, freqs_hash)
            WordCloudWrapper.save(frequencies, filename)