Example #1
0
    def run(self):
        """
        run crawlers to get proxy
        :return:
        """
        if self.is_full():
            return
        proxyfile = "staticproxy.txt"
        with open(proxyfile, 'r') as fh:
            proxylines = fh.readlines()
        logger.info(f'read {proxyfile}')
        for line in proxylines:
            if line.strip() != "" and not line.startswith("#"):
                line = line.replace("\r\n", "").replace("\n", "")
                pattern = re.compile(
                    r'((?P<username>\S*?)\:(?P<password>\S*?)@)?(?P<ip>[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3})\:(?P<port>\d*)'
                )
                match = re.search(pattern, line)
                if match:
                    username = match.groupdict()['username']
                    password = match.groupdict()['password']
                    ip = match.groupdict()['ip']
                    port = match.groupdict()['port']
                    proxy = Proxy(host=ip,
                                  port=port,
                                  username=username,
                                  password=password)
                    logger.info("getproxy " + proxy.string())
                    self.redis.add(proxy)

        for crawler in self.crawlers:
            logger.info(f'crawler {crawler} to get proxy')
            for proxy in crawler.crawl():
                print(proxy.string())
                self.redis.add(proxy)
Example #2
0
 def max(self, proxy: Proxy) -> int:
     """
     set proxy to max score
     :param proxy: proxy
     :return: new score
     """
     logger.info(f'{proxy.string()} is valid, set to {PROXY_SCORE_MAX}')
     if IS_REDIS_VERSION_2:
         return self.db.zadd(REDIS_KEY, PROXY_SCORE_MAX, proxy.string())
     return self.db.zadd(REDIS_KEY, {proxy.string(): PROXY_SCORE_MAX})
Example #3
0
 def max(self, proxy: Proxy) -> int:
     """
     将代理设置为 MAX_SCORE
     :param proxy: 代理
     :return: 设置结果
     """
     logger.info(f'{proxy.string()} is valid, set to {PROXY_SCORE_MAX}')
     if IS_REDIS_VERSION_2:
         return self.db.zadd(REDIS_KEY, PROXY_SCORE_MAX, proxy.string())
     return self.db.zadd(REDIS_KEY, {proxy.string(): PROXY_SCORE_MAX})
Example #4
0
 def max(self, proxy: Proxy) -> int:
     '''
     将代理分数设成最大
     :param proxy: 代理
     :return: 新的分数
     '''
     logger.info(f'{proxy.string()} is valid, set to {PROXY_SCORE_MAX}')
     if IS_REDIS_VERSION_2:
         return self.db.zadd(REDIS_KEY, PROXY_SCORE_MAX, proxy.string())
     return self.db.zadd(REDIS_KEY, {proxy.string(): PROXY_SCORE_MAX})
Example #5
0
 def decrease(self, proxy: Proxy) -> int:
     """
     decrease score of proxy, if mall than PROXY_SCORE_MIN, delete it
     :param proxy: proxy
     :return new score
     """
     if IS_REDIS_VERSION_2:
         self.db.zincrby(REDIS_KEY, proxy.string(), -1)
     else:
         self.db.zincrby(REDIS_KEY, -1, proxy.string())
     score = self.db.zscore(REDIS_KEY, proxy.string())
     logger.info(f'{proxy.string()} curent score {score}, remove')
     self.db.zrem(REDIS_KEY, proxy.string())
Example #6
0
 def add(self, proxy: Proxy, score=PROXY_SCORE_INIT) -> int:
     """
     add proxy and set it to init score
     :param proxy: proxy, ip:port, like 8.8.8.8:88
     :param score: int score
     :return: result
     """
     if not is_valid_proxy(f'{proxy.host}:{proxy.port}'):
         logger.info(f'invalid proxy {proxy}, throw it')
         return
     if not self.exists(proxy):
         if IS_REDIS_VERSION_2:
             return self.db.zadd(REDIS_KEY, score, proxy.string())
         return self.db.zadd(REDIS_KEY, {proxy.string(): score})
Example #7
0
 def add(self, proxy: Proxy, score=PROXY_SCORE_INIT):
     """
     添加代理,设置分数为最高
     :param proxy: 代理
     :param score: 分数
     :return: 添加结果
     """
     if not is_valid_proxy(f'{proxy.host}:{proxy.port}'):
         logger.info(f'invalid proxy {proxy}, throw it')
         return
     if not self.exists(proxy):
         if IS_REDIS_VERSION_2:
             return self.db.zadd(REDIS_KEY, score, proxy.string())
         return self.db.zadd(REDIS_KEY, {proxy.string(): score})
Example #8
0
 def exists(self, proxy: Proxy) -> bool:
     """
     if proxy exists
     :param proxy: proxy
     :return: if exists, bool
     """
     return not self.db.zscore(REDIS_KEY, proxy.string()) is None
Example #9
0
 def exists(self, proxy: Proxy) -> bool:
     """
     判断是否存在
     :param proxy: 代理
     :return: 是否存在
     """
     return not self.db.zscore(REDIS_KEY, proxy.string()) is None
Example #10
0
 def add(self, proxy: Proxy, score=PROXY_SCORE_INIT) -> int:
     '''
     将代理加到 redis 中,并设置初始分数
     :param proxy: 代理,格式 ip:port, 例如 8.8.8.8:888
     :param score: 代理初始化的分数
     :type score: int
     :return: 成功添加的数量
     '''
     if not is_valid_proxy(f'{proxy.host}:{proxy.port}'):
         logger.info(f'invalid proxy {proxy}, throw it')
         return
     # if not self.db.exists(proxy):
     # 将代理添加到有序集合中
     if IS_REDIS_VERSION_2:
         return self.db.zadd(REDIS_KEY, score, proxy.string())
     return self.db.zadd(REDIS_KEY, {proxy.string(): score})
Example #11
0
 def decrease(self, proxy: Proxy) -> int:
     """
     decrease score of proxy, if small than PROXY_SCORE_MIN, delete it
     :param proxy: proxy
     :return: new score
     """
     score = self.db.zscore(REDIS_KEY, proxy.string())
     # current score is larger than PROXY_SCORE_MIN
     if score and score > PROXY_SCORE_MIN:
         logger.info(f'{proxy.string()} current score {score}, decrease 1')
         if IS_REDIS_VERSION_2:
             return self.db.zincrby(REDIS_KEY, proxy.string(), -1)
         return self.db.zincrby(REDIS_KEY, -1, proxy.string())
     # otherwise delete proxy
     else:
         logger.info(f'{proxy.string()} current score {score}, remove')
         return self.db.zrem(REDIS_KEY, proxy)
Example #12
0
 def decrease(self, proxy: Proxy) -> int:
     """
     代理值减一分,小于最小值则删除
     :param proxy: 代理
     :return: 修改后的代理分数
     """
     score = self.db.zscore(REDIS_KEY, proxy.string())
     # current score is larger than PROXY_SCORE_MIN
     if score and score > PROXY_SCORE_MIN:
         logger.info(f'{proxy.string()} current score {score}, decrease 1')
         if IS_REDIS_VERSION_2:
             return self.db.zincrby(REDIS_KEY, proxy.string(), -1)
         return self.db.zincrby(REDIS_KEY, -1, proxy.string())
     # otherwise delete proxy
     else:
         logger.info(f'{proxy.string()} current score {score}, remove')
         return self.db.zrem(REDIS_KEY, proxy.string())
Example #13
0
 def decrease(self, proxy: Proxy) -> int:
     '''
     降低代理的分数,如果比最小值还低,则删除
     :param proxy: proxy
     :return: new score
     '''
     score = self.db.zscore(REDIS_KEY, proxy.string())
     # 当前分数比最小值大
     if score and score > PROXY_SCORE_MIN:
         logger.info(f'{proxy.string()} current score {score}, decrease 1')
         if IS_REDIS_VERSION_2:
             return self.db.zincrby(REDIS_KEY, proxy.string(), -1)
         return self.db.zincrby(REDIS_KEY, -1, proxy.string())
     # 当前分数比最小值小
     else:
         logger.info(f'{proxy.string()} current score {score}, remove')
         return self.db.zrem(REDIS_KEY, proxy.string())