Example #1
0
 def set_max(self, proxy):
     """
     set proxy to max score
     :param proxy:
     :return:
     """
     app_logger.info(f'{proxy} is valid, set to {PROXY_SCORE_MAX}')
     maping = {proxy: PROXY_SCORE_MAX}
     return self.db.zadd(REDIS_KEY, maping)
Example #2
0
 def run_getter(self, cycle=CYCLE_GETTER):
     """
     定时开启爬虫补充代理
     :param cycle:
     :return:
     """
     if not ENABLE_GETTER:
         app_logger.info('getter not enabled, exit')
         return
     getter = Getproxies()
     sch.add_job(getter.run, 'interval', seconds=cycle, id='run_getter')
     sch.start()
Example #3
0
 def run_tester(self, cycle=CYCLE_TESTER):
     """
     定时启动检测器
     :param cycle:
     :return:
     """
     if not ENABLE_TESTER:
         app_logger.info('tester not enabled, exit')
         return
     tester = Tester()
     sch.add_job(tester.run, 'interval', seconds=cycle, id='run_tester')
     sch.start()
Example #4
0
 def crawl(self):
     """
     crawl main method
     """
     for url in self.urls:
         app_logger.info(f'fetching {url}')
         # html = self.LOOP.run_until_complete(asyncio.gather(self._get_page(url)))
         html = self.LOOP.run_until_complete(self._get_page(url))
         # print('html', html)
         for proxy in self.parse(html):
             # app_logger.info(f'fetched proxy {proxy.string()} from {url}')
             yield proxy
Example #5
0
    def decrease(self, proxy) -> int:
        """
        decrease score of proxy, if small than PROXY_SCORE_MIN, delete it
        :param proxy:
        :return: new score
        """
        score = self.db.zscore(REDIS_KEY, proxy)
        if score and score > PROXY_SCORE_MIN:
            app_logger.error(f'{proxy} current score {score}, decrease 1')

            return self.db.zincrby(REDIS_KEY, -1, proxy)
        else:
            app_logger.info(f'{proxy} current score {score},  remove it')
            self.db.zrem(REDIS_KEY, proxy)
Example #6
0
 def run(self):
     """
     test main method
     :return:
     """
     # event loop of aiohttp
     app_logger.info('stating tester...')
     count = self.redis.count()
     app_logger.debug(f'{count} proxies to test')
     for i in range(0, count, TEST_BATCH):
         # start end end offset
         start, end = i, min(i + TEST_BATCH, count)
         app_logger.debug(f'testing proxies from {start} to {end} indices')
         proxies = self.redis.batch(start, end)
         tasks = [self.test(proxy) for proxy in proxies]
         # run tasks using event loop
         self.loop.run_until_complete(asyncio.wait(tasks))
Example #7
0
    def run(self):
        """调度器启动"""
        global tester_process, getter_process, server_process
        try:
            app_logger.info('starting proxypool...')
            if ENABLE_TESTER:
                tester_process = multiprocessing.Process(
                    target=self.run_tester)
                app_logger.info(
                    f'starting tester, pid {tester_process.pid}...')
                tester_process.start()

            if ENABLE_GETTER:
                getter_process = multiprocessing.Process(
                    target=self.run_getter)
                # print(dir(getter_process),getter_process)
                app_logger.info(f'starting getter, pid{getter_process.pid}...')
                getter_process.start()

            if ENABLE_SERVER:
                server_process = multiprocessing.Process(
                    target=self.run_server)
                app_logger.info(f'starting server, pid{server_process.pid}...')
                server_process.start()

            tester_process.join()
            getter_process.join()
            server_process.join()
        except KeyboardInterrupt:
            app_logger.info('received keyboard interrupt signal')
            tester_process.terminate()
            getter_process.terminate()
            server_process.terminate()
        finally:
            # must call join method before calling is_alive
            tester_process.join()
            getter_process.join()
            server_process.join()
            app_logger.info(
                f'tester is {"alive" if tester_process.is_alive() else "dead"}'
            )
            app_logger.info(
                f'getter is {"alive" if getter_process.is_alive() else "dead"}'
            )
            app_logger.info(
                f'server is {"alive" if server_process.is_alive() else "dead"}'
            )
            app_logger.info('proxy terminated')
Example #8
0
 def run_server(self):
     """web服务端开启"""
     if not ENABLE_SERVER:
         app_logger.info('server not enabled, exit')
         return
     app.run(host=API_HOST, port=API_PORT, threaded=API_THREADED)