ip = random.choice(ips) assert isinstance(ip, IPData), 'Error format' Logger.info('[factory] get ip %s', ip.to_str()) return ip @classmethod async def get_ips(cls, http: bool = True, https: bool = False, delay: int = None, rule: str = None): keys = [] if http: keys.append(Config.REDIS_KEY_ABLE_HTTP) if https: keys.append(Config.REDIS_KEY_ABLE_HTTPS) if delay: keys.append(Config.REDIS_KEY_NET_DELAY % delay) if rule: keys.append(Config.REDIS_KEY_ABLE_RULES % rule) with await Redis.share() as redis: ips = await redis.sinter(*keys) ips = [IPData.with_str(ip.decode()) for ip in ips] return ips if __name__ == '__main__': from src.lib.func import run_until_complete run_until_complete(IPFactory.get_random_ip())
# remove checked pool # await redis.delete(Config.REDIS_KEY_CHECKED_POOL) @classmethod async def push_to_pool(cls, ips) -> int: if not isinstance(ips, list): ips = [ips] with await Redis.share() as redis: await redis.rpush(Config.REDIS_KEY_CHECK_POOL, *ips) Logger.info('[check] send %d ip to check pools' % len(ips)) return len(ips) @classmethod async def push_to_checked_pool(cls, ips) -> int: if not isinstance(ips, list): ips = [ips] with await Redis.share() as redis: await redis.sadd(Config.REDIS_KEY_CHECKED_POOL, *ips) Logger.info('[check] send %d ip to checked pools' % len(ips)) return len(ips) async def handle_task_exception(self, e): Logger.error('[error] ' + str(e)) await asyncio.sleep(5) # if __name__ == '__main__': from src.lib.func import run_until_complete run_until_complete(IPChecker().run())
@IPGet.config(key) def config(): site = SiteData() site.name = 'Spys.me' site.pages = ['http://spys.me/proxy.txt'] return site @IPGet.parse(key) def parse(resp: SiteResponse): import re ips = re.findall(r'(?:\d{1,3}\.){3}\d{1,3}:\d+', resp.text) for ip in ips: try: item = ip.split(':') res = SiteResponseData() res.ip = item[0] res.port = item[1] yield res except Exception: continue if __name__ == '__main__': from src.lib.func import run_until_complete runner = IPGet.test_crawl(key) run_until_complete(runner)
return f return decorator @classmethod def parse(cls, name): self = cls.share() def decorator(f): self._parsers[name] = f return f return decorator def get_user_agent(self) -> str: import random return 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%d.0.3770.80 Safari/537.36' % random.randint( 70, 76) async def handle_task_exception(self, e): Logger.error('[error] ' + str(e)) await asyncio.sleep(5) # if __name__ == '__main__': from src.lib.func import run_until_complete from src.sites import * from src.app.ip_get import IPGet, SiteResponse run_until_complete(IPGet.share().run())