Example #1
0
 async def push_to_checked_pool(cls, ips) -> int:
     if not isinstance(ips, list):
         ips = [ips]
     with await Redis.share() as redis:
         await redis.sadd(Config.REDIS_KEY_CHECKED_POOL, *ips)
         Logger.info('[check] send %d ip to checked pools' % len(ips))
     return len(ips)
Example #2
0
 async def show_result(self, session, site, result, resp: SiteResponse):
     Logger.info('[get] Url: %s' % resp.url)
     for item in result:
         if isinstance(item, SiteRequestData):
             await self.crawl_single_page(session, site, item)
         if not isinstance(item, SiteResponseData):
             continue
         Logger.info('[get] Get ip: %s' % item.to_str())
Example #3
0
 async def remove_legacy_ip(self):
     with await Redis.share() as redis:
         count = await redis.zremrangebyscore(
             Config.REDIS_KEY_IP_LEGACY_POOL, 0,
             time_int() - Config.DEFAULT_LEGACY_IP_RETAINED_TIME)
         if count:
             Logger.info('[check] remove legacy ip count %d' % count)
         return count
Example #4
0
async def load_file(f_path):
    with open(f_path) as f:
        ips = []
        for ip in f.readlines():
            if ip and ip.find(':') and ip.find('#') < 0:
                ip = ip.strip()
                ips.append(ip)
        if ips:
            Logger.info('Find ip count %d' % len(ips))
            await IPGet.push_to_pool(ips)
Example #5
0
 async def get_random_ip(cls,
                         https: bool = False,
                         rule: str = None) -> IPData:
     ips = await cls.get_ips(https=https, rule=rule)
     if not ips:
         return None
     ip = random.choice(ips)
     assert isinstance(ip, IPData), 'Error format'
     Logger.info('[factory] get ip %s', ip.to_str())
     return ip
Example #6
0
 async def remove_low_score_ip(self):
     saver = IPSaver()
     needs_remove = []
     with await Redis.share() as redis:
         ips = await redis.zrangebyscore(Config.REDIS_KEY_IP_POOL, -100, 0)
         if len(ips) > 0:
             ips = [ip_str.decode() for ip_str in ips]
             needs_remove = ips
     if needs_remove:
         await saver.remove_ip(ips)
         Logger.info('[check] remove ip %s', ','.join(ips))
Example #7
0
 async def save_parse_result(self, session, site: SiteData, result):
     ips = []
     for item in result:
         if isinstance(item, SiteRequestData):
             await self.crawl_single_page(session, site, item)
         if not isinstance(item, SiteResponseData):
             continue
         ips.append(item.to_str())
     if ips:
         Logger.info('[get] Get %d new ip' % len(ips))
         await self.push_to_pool(ips)
Example #8
0
async def load_from_url(url: str):
    import re
    headers = {'User-Agent': get_user_agent()}
    async with aiohttp.ClientSession(headers=headers) as session:
        async with session.get(url) as resp:
            text = await resp.text()
            matched = re.findall(r'(?:\d{1,3}\.){3}\d{1,3}:\d+', text)
            ips = []
            for ip in matched:
                if ip and ip.find(':') and ip.find('#') < 0:
                    ip = ip.strip()
                    ips.append(ip)
            if ips:
                Logger.info('Find ip count %d' % len(ips))
                await IPGet.push_to_pool(ips)
Example #9
0
 async def dump_to_file(self):
     from os import path, mkdir
     if not path.isdir(Config.DUMPED_DIR):
         mkdir(Config.DUMPED_DIR)
     with await Redis.share() as redis:
         members = await redis.zrangebyscore(Config.REDIS_KEY_IP_POOL, Config.DEFAULT_MINI_SCORE,
                                             Config.DEFAULT_MAX_SCORE + Config.DEFAULT_INC_SCORE)
         if members:
             members = [m.decode() for m in members]
             from datetime import datetime
             file_name = 'ip_pool_%s.ip.txt' % datetime.now().strftime('%Y-%m-%d %H:%M:%S')
             with open(Config.DUMPED_DIR + file_name, 'w') as f:
                 f.write('\n'.join(members))
             Logger.info('Dump %d ip to file %s' % (len(members), file_name))
     return True
Example #10
0
 async def start_check(self):
     with await Redis.share() as redis:
         ip_str = await redis.blpop(Config.REDIS_KEY_CHECK_POOL)
     ip_str = ip_str[1].decode()
     Logger.info('[check] got ip %s' % ip_str)
     Prometheus.IP_CHECK_TOTAL.inc(1)
     ip = IPData.with_str(ip_str)
     async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(
             Config.DEFAULT_REQUEST_CHECK_TIME_OUT)) as session:
         ip = await self.http_check(ip, session)
         ip = await self.https_check(ip, session)
         ip = await self.rules_check(ip, session)
         Logger.info(
             '[check] Check result %s http %s https %s %s', ip.to_str(),
             ip.http, ip.https,
             " ".join(["%s %s" % (k, r) for k, r in ip.rules.items()]))
     await IPSaver().save_ip(ip)
Example #11
0
 async def push_to_pool(cls, ips):
     from src.app.ip_checker import IPChecker
     if not isinstance(ips, list):
         ips = [ips]
     with await Redis.share() as redis:
         needs_ip = []
         for ip in ips:
             exists = await redis.zscore(Config.REDIS_KEY_IP_POOL, ip)
             if exists is not None:
                 continue
             exists = await redis.zscore(Config.REDIS_KEY_IP_LEGACY_POOL,
                                         ip)
             if exists is not None:
                 continue
             await redis.zadd(Config.REDIS_KEY_IP_POOL,
                              Config.DEFAULT_SCORE, ip)
             needs_ip.append(ip)
         if needs_ip:
             await IPChecker.push_to_pool(needs_ip)
         Logger.info('[get] send %d ip to ip pools' % len(needs_ip))
     return len(ips)