コード例 #1
0
ファイル: popular_url.py プロジェクト: pombredanne/atango
 def run(self, hour_range=HOUR_RANGE):
     date_range = kuzuha.build_date_filter_by_range({'hours': hour_range})
     posts = kuzuha.search('http', _filter=date_range, sort=[])
     tweet = ''
     for (url, count) in self._count_url(posts).most_common():
         if url.startswith('https://twitter.com/'):
             tweet_id = self.extract_tweet_id(url)
             if tweet_id:
                 logger.info('RT: id=%s (%s)' % (tweet_id, url))
                 if not self.debug:
                     try:
                         self.twitter.api.statuses.retweet(id=tweet_id)
                     except TwitterHTTPError as e:
                         logger.warn('%s %s' % (type(e), str(e)))
                 continue
         title = self._get_title(url)
         new_url_info = TWEET_FORMAT % (title, url, count)
         expected_length = self.calc_tweet_length(tweet, title, count)
         if expected_length < (MAX_TWEET_LENGTH - len(HASH_TAG)):
             tweet += new_url_info
         else:
             tweet = tweet[:-len(DELIMITER)] + HASH_TAG
             if tweet != HASH_TAG:
                 tweet = tweet.replace('\n', '').replace('\r', '')
                 yield tweet
             tweet = new_url_info
     if tweet:
         if tweet.endswith(DELIMITER):
             tweet = tweet[:-len(DELIMITER)]
         tweet = tweet.replace('\n', '').replace('\r', '')
         yield tweet + HASH_TAG
コード例 #2
0
ファイル: popular_url.py プロジェクト: kuhaku/atango
 def run(self, hour_range=HOUR_RANGE):
     date_range = kuzuha.build_date_filter_by_range({'hours': hour_range})
     posts = kuzuha.search('http', _filter=date_range, sort=[])
     tweet = ''
     for (url, count) in self._count_url(posts).most_common():
         if url.startswith('https://twitter.com/'):
             tweet_id = self.extract_tweet_id(url)
             if tweet_id:
                 logger.info('RT: id=%s (%s)' % (tweet_id, url))
                 if not self.debug:
                     try:
                         self.twitter.api.statuses.retweet(id=tweet_id)
                     except TwitterHTTPError as e:
                         logger.warn('%s %s' % (type(e), str(e)))
                 continue
         title = self._get_title(url)
         new_url_info = TWEET_FORMAT % (title, url, count)
         expected_length = self.calc_tweet_length(tweet, title, count)
         if expected_length < (MAX_TWEET_LENGTH - len(HASH_TAG)):
             tweet += new_url_info
         else:
             tweet = tweet[:-len(DELIMITER)] + HASH_TAG
             if tweet != HASH_TAG:
                 tweet = tweet.replace('\n', '').replace('\r', '')
                 yield tweet
             tweet = new_url_info
     if tweet:
         if tweet.endswith(DELIMITER):
             tweet = tweet[:-len(DELIMITER)]
         tweet = tweet.replace('\n', '').replace('\r', '')
         yield tweet + HASH_TAG
コード例 #3
0
ファイル: ome.py プロジェクト: pombredanne/atango
    def run(self, interval=20):
        posts = kuzuha.search('',
                              _filter=kuzuha.build_date_filter_by_range(
                                  {'minutes': interval}))
        pairs = self.get_post_res_pairs(posts)

        for (parent, responses) in pairs.items():
            if len(responses) >= 2:
                ome_posts = set()
                logger.info('MENTIONED POST: %s' % parent)
                for (lhs, rhs) in itertools.combinations(responses, 2):
                    logger.info('Compare "%s" with "%s"' % (lhs, rhs))
                    if lhs and rhs and self.is_ome(lhs, rhs):
                        logger.info('"%s" and "%s" are OME' % (lhs, rhs))
                        ome_posts |= {lhs, rhs}
                if len(ome_posts) > 1:
                    num_posts = len(ome_posts) + 1  # childs + parent
                    max_length = (body_length - num_posts * 2) // num_posts
                    parent = self.shorten(parent, max_length)
                    message = '%s『%s』' % (PREFIX, parent)
                    for ome_post in sorted(ome_posts):
                        ome_post = self.shorten(ome_post, max_length)
                        message += '「%s」' % ome_post
                    message += HASH_TAG
                    yield message
コード例 #4
0
ファイル: clause_extractor.py プロジェクト: kuhaku/atango
 def run(self, hour_range=24):
     date_range = kuzuha.build_date_filter_by_range({'hours': hour_range})
     result = self.find('しこる', 'でしこ', date_range)
     result += self.find('抜く', 'で抜', date_range)
     result += self.find('オナニュ', 'でオナニュす', date_range)
     result += self.find('オナニュ', 'でオナニュし', date_range)
     if len(result) < MAX_TWEET_LENGTH:
         return result
コード例 #5
0
 def run(self, hour_range=24):
     date_range = kuzuha.build_date_filter_by_range({'hours': hour_range})
     result = self.find('しこる', 'でしこ', date_range)
     result += self.find('抜く', 'で抜', date_range)
     result += self.find('オナニュ', 'でオナニュす', date_range)
     result += self.find('オナニュ', 'でオナニュし', date_range)
     if len(result) < MAX_TWEET_LENGTH:
         return result
コード例 #6
0
ファイル: markov.py プロジェクト: pombredanne/atango
    def run(self, interval=60, min_length=40):
        m_generator = markov.MarkovGenerater()
        m = mecab.MeCabWrapper()
        posts = kuzuha.search('', _filter=kuzuha.build_date_filter_by_range({'minutes': interval}), sort=[])

        words = []
        for post in posts:
            if 'text' not in post:
                continue
            text = regex.re_a_tag.sub('', post['text'])
            text = normalize.normalize(text)
            if 'アニメ時報' in text:
                continue
            words.append([])
            for line in text.splitlines():
                words[-1].append(BOS)
                for n in m.parse_to_node(line):
                    words[-1].append('%s,%s' % (n.surface, ''.join(n.feature.split(',')[:5])))
                words[-1].append(EOS)
        return m_generator.generate(words, min_length)
コード例 #7
0
ファイル: ome.py プロジェクト: kuhaku/atango
    def run(self, interval=20):
        posts = kuzuha.search('', _filter=kuzuha.build_date_filter_by_range({'minutes': interval}))
        pairs = self.get_post_res_pairs(posts)

        for (parent, responses) in pairs.items():
            if len(responses) >= 2:
                ome_posts = set()
                logger.info('MENTIONED POST: %s' % parent)
                for (lhs, rhs) in itertools.combinations(responses, 2):
                    logger.info('Compare "%s" with "%s"' % (lhs, rhs))
                    if lhs and rhs and self.is_ome(lhs, rhs):
                        logger.info('"%s" and "%s" are OME' % (lhs, rhs))
                        ome_posts |= {lhs, rhs}
                if len(ome_posts) > 1:
                    num_posts = len(ome_posts) + 1  # childs + parent
                    max_length = (body_length - num_posts*2) // num_posts
                    parent = self.shorten(parent, max_length)
                    message = '%s『%s』' % (PREFIX, parent)
                    for ome_post in sorted(ome_posts):
                        ome_post = self.shorten(ome_post, max_length)
                        message += '「%s」' % ome_post
                    message += HASH_TAG
                    yield message
コード例 #8
0
ファイル: clause_extractor.py プロジェクト: kuhaku/atango
 def run(self, hour_range=24):
     date_range = kuzuha.build_date_filter_by_range({'hours': hour_range})
     result = self.find('飲む', 'を飲', date_range)
     result += self.find('食べる', 'を食', date_range)
     if len(result) < MAX_TWEET_LENGTH:
         return result
コード例 #9
0
 def run(self, hour_range=24):
     date_range = kuzuha.build_date_filter_by_range({'hours': hour_range})
     result = self.find('飲む', 'を飲', date_range)
     result += self.find('食べる', 'を食', date_range)
     if len(result) < MAX_TWEET_LENGTH:
         return result