コード例 #1
0
    def work (unit):
        logger.info('working %r', unit)

        if isinstance(unit, Thread):
            thread = unit.download_and_decode()
            words  = set()

            for post in thread['posts']:
                for field in ('name', 'email', 'sub', 'com', 'filename'):
                    contents = post.get(field, '')
                    contents = sanitize(contents).encode('utf8')

                    words.update(word_pattern.findall(contents))

            return words

        for e in unit.process():
            pool.push(work, e)
コード例 #2
0
    def work(unit):
        logger.info("working %r", unit)

        if isinstance(unit, Thread):
            thread = unit.download_and_decode()
            ngrams = collections.Counter()

            for post in thread["posts"]:
                contents = post.get("com", "")
                contents = sanitize(contents).encode("utf8")

                tokens = token_pattern.findall(contents)
                tokens = [token.lower() for token in tokens]

                ngrams.update(generate_ngrams(tokens))

            return ngrams

        for e in unit.process():
            pool.push(work, e)