コード例 #1
0
ファイル: spider.py プロジェクト: 0xa-cc/know_quiz
class Spider(object):

    def __init__(self, thread_num, logfile, debug_level,
                 dbfile, keyword, degree):
        self.urls = []
        self.logfile = logfile
        self.debug_level = debug_level
        self.dbfile = dbfile
        self.thread_num = thread_num
        self.keyword = keyword
        self.degree = degree

    def run(self, urls):
        logger.info("开始运行爬虫程序")
        self.thread_pool = ThreadPool(self.dbfile, self.thread_num)
        for url in urls:
            # import spider template
            from knowquiz.modules import get_my_blog
            logger.info("添加任务 %s 到队列中" % get_my_blog.__name__)
            args = (url, self.keyword, self.degree)
            self.thread_pool.add_job(get_my_blog, args)
        while self.thread_pool.check_job() > 0:
            try:
                logger.info("当前可用任务 %d" % self.thread_pool.check_job())
                time.sleep(5)
            except KeyboardInterrupt:
                self.thread_pool.stop_job()

    def quit(self):
        logger.warn("退出程序")
        self.thread_pool.stop_all()
コード例 #2
0
ファイル: spider.py プロジェクト: 0xa-cc/know_quiz
 def run(self, urls):
     logger.info("开始运行爬虫程序")
     self.thread_pool = ThreadPool(self.dbfile, self.thread_num)
     for url in urls:
         # import spider template
         from knowquiz.modules import get_my_blog
         logger.info("添加任务 %s 到队列中" % get_my_blog.__name__)
         args = (url, self.keyword, self.degree)
         self.thread_pool.add_job(get_my_blog, args)
     while self.thread_pool.check_job() > 0:
         try:
             logger.info("当前可用任务 %d" % self.thread_pool.check_job())
             time.sleep(5)
         except KeyboardInterrupt:
             self.thread_pool.stop_job()