コード例 #1
0
ファイル: tweets.py プロジェクト: ooda/cloudly
    def metadata_processor(self):
        """Compute counts and other metadata about this stream.
        Send results to the user provided processor function. The decorator
        makes sure we don't queue too often by waiting for the given amount of
        time between successive calls.
        """
        counts = {key: int(value) for key, value in
                  self.redis.hgetall(self.metadata_cache_key).iteritems()}

        counts['cached'] = len(self.tweet_cache)

        metadata = {'counts': counts}
        log.debug(metadata)

        if self.is_queuing:
            rqworker.enqueue(self.metadata_processor_fct, metadata)
        else:
            self.metadata_processor_fct(metadata)
コード例 #2
0
ファイル: tweets.py プロジェクト: ooda/cloudly
    def run(self, generator, stop_condition_fct=None):
        """Gather tweets and either enqueue them for processing later by a
        worker process or process them immediately. This behavior depends on
        the `is_queuing` parameter.
        """
        log.debug("Starting collection.")
        for data in generator:
            if stop_condition_fct and stop_condition_fct():
                log.debug("Stopped.")
                return
            # For some reason, sometime we can't jsonify TwitterResponseWrapper
            data = dict(data)

            if 'limit' in data:
                if self.metadata_processor_fct:
                    # The argument firehose_count is the total number of
                    # undelivered tweets since the connection was opened. Since
                    # we want to count irrespective of connection
                    # opening/closing we compute a delta since last count and
                    # add that to the firehose count. This allows us to keep a
                    # correct count in-between connections.
                    firehose_count = data['limit']['track']
                    firehose_delta = firehose_count - int(
                        (self.redis.getset(self.firehose_count_key,
                                           firehose_count) or 0))

                    self.redis.hincrby(self.metadata_cache_key, 'firehose',
                                       firehose_delta)
            else:
                self.tweet_cache.append(data)
                # Increment the total number of tweets in the stream.
                self.redis.hincrby(self.metadata_cache_key, "stream", 1)
                # Increment the total number of tweets in the firehose.
                # Remember, the firehose count provided by Twitter (track)
                # is the number of undelivered tweets:
                # Total = undelivered + delivered
                self.redis.hincrby(self.metadata_cache_key, 'firehose', 1)

                if len(self.tweet_cache) >= self.cache_length:
                    if self.is_queuing:
                        rqworker.enqueue(self.tweet_processor,
                                         self.tweet_cache)
                        now = datetime.now()
                        if self.previous_queue_time:
                            delta_time = now - self.previous_queue_time
                            log.debug(
                                "Queued {}. Elapsed {:2.2f} secs.".format(
                                    len(self.tweet_cache),
                                    delta_time.total_seconds()))
                        self.previous_queue_time = now
                    else:
                        self.tweet_processor(self.tweet_cache)
                    # Empty cache for next batch.
                    self.tweet_cache = []

            # We might not receive limit message, be sure to call
            # metadata_processor. Don't have to worry calling it too often,
            # it's throttled.
            if self.metadata_processor_fct:
                self.metadata_processor()
        log.debug("Terminating.")