Example #1
0
class Scheduler(object):
    def __init__(self):
        self.crawler = Crawler()
        self.db = Mongo(MONGO_DB)

    def main(self):
        """
        程序主逻辑函数
        :return:
        """
        self.db.add(MONGO_COLLECTION_URL, {'url': START_URL})
        while self.db.count(MONGO_COLLECTION_URL) > 0:
            url = self.db.remove_one(MONGO_COLLECTION_URL)['url']
            userinfo, new_urls = self.crawler.main(url)
            if userinfo or new_urls:
                self.db.add(MONGO_COLLECTION_USERINFO, userinfo)
                for new_url in new_urls:
                    self.db.add(MONGO_COLLECTION_URL, {'url': new_url})
            else:
                self.db.add(MONGO_COLLECTION_USERINFO, {
                    'user_url': url,
                    'declare': '该账户可能已经注销'
                })