コード例 #1
0
ファイル: stream.py プロジェクト: ncvc/TwitterScraper
class DBThread(Thread):
    def __init__(self, queue):
        Thread.__init__(self, name="Thread-DB")
        self.queue = queue
        self.db = DB()
        self.logit = logging.getLogger("logit")
        self.to_commit = Queue()
        self.commit_size = 1

    def run(self):
        self.db.connect()

        while True:
            tweet = self.queue.get()

            if tweet == None:
                self.logit.info("Terminal sentinel encountered")
                self.queue.task_done()
                break

            if "delete" in tweet:
                self.logit.info("delete: %s" % str(tweet))
            elif "scrub_geo" in tweet:
                self.logit.info("scrub_geo: %s" % str(tweet))
            elif "limit" in tweet:
                self.logit.warning("limit: %s" % str(tweet))
            elif "status_withheld" in tweet:
                self.logit.info("status_withheld: %s" % str(tweet))
            elif "user_withheld" in tweet:
                self.logit.info("user_withheld: %s" % str(tweet))
            elif "disconnect" in tweet:
                self.logit.warning("disconnect: %s" % str(tweet))
                break
            elif "warning" in tweet:
                self.logit.warning("warning: %s" % str(tweet))
            elif "id" in tweet and "text" in tweet:
                try:
                    self.db.add_tweet(tweet)
                except:
                    self.logit.exception("db add_tweet exception %s" % str(tweet))

                    # TODO: Get multi-queries working, so we don't need a super low-latency connection to the database
                    # self.to_commit.put(tweet)

                    # Commit self.commit_size tweets at once
                    # if self.to_commit.qsize() % 100 == 0:
                    # 	self.logit.info('to_commit size: %d' % self.to_commit.qsize())

                    # if self.to_commit.qsize() >= self.commit_size:
                    # 	self.logit.info('start committing')
                    # 	self.db.add_tweets(self.to_commit, self.logit)
                    # 	self.logit.info('end committing')

            self.queue.task_done()

        self.logit.info("Closing db connection")
        self.db.close()