async def parse_page(self, session, url): proxys = await self.get(session, url) >> to_doc >> extra_xpath( '//table[@id="ip_list"]//tr[position()>1]') rst = [] for proxy in proxys: host = proxy >> extra_xpath( './/td[position()=2]/text()') >> extra_head port = proxy >> extra_xpath( './/td[position()=3]/text()') >> extra_head rst.append((host, port)) return rst
async def parse_page(self, session, url): text = await self.get(session, url) proxys = text >> to_doc >> extra_xpath("//table//tr[position()>1]") rst = [] for proxy in proxys: host = proxy >> extra_xpath( "./td[position()=1]/text()") >> extra_head port = proxy >> extra_xpath( "./td[position()=2]/text()") >> extra_head rst.append((host, port)) return rst
async def parse_page(self, session, url): text = await self.get(session, url) self.headers["Referer"] = 'https://ip.ihuan.me/ti.html' text = await self.get(session, 'https://ip.ihuan.me/mouse.do') val = self.pattern.search(text).group(0).replace('val("', '') data = { 'num': 500, 'port': '', 'kill_port': '', 'address': '', 'kill_address': '', 'anonymity': '', 'type': '', 'post': '', 'sort': '', 'key': val } self.headers['Origin'] = 'https://ip.ihuan.me' self.headers['Content-Type'] = 'application/x-www-form-urlencoded' text = await self.post(session, 'https://ip.ihuan.me/tqdl.html', data=data) proxys = text >> to_doc >> extra_xpath( '//div[@class="panel-body"]/text()') rst = [] for proxy in proxys: try: host, port = (proxy >> extra_head).split(':') except Exception: continue rst.append((host, port)) return rst
async def prepare(self, session): urls = await self.get( session, "http://www.xsdaili.com/") >> to_doc >> extra_xpath( '//div[@class="panel-body"]//div[@class="title"]/a/@href') self.funcmap = { self.parse_page: ['http://www.xsdaili.com' + ele for ele in urls] }
async def parse_page(self, session, url): proxies = await self.get(session, url) >> to_doc >> extra_xpath("//text()") rst = [] for proxy in proxies[7:-1]: [host, port] = proxy.strip('\n\t\r ').split(':') rst.append((host, port)) return rst
async def parse_page(self, session, url): text = await self.get(session, url) proxys = text >> to_doc >> extra_xpath("//body//text()") rst = [] for proxy in proxys[:-10]: proxy = proxy.strip('" \t\r\n') [host, port] = proxy.split(':') rst.append((host, port)) return rst
async def parse_page(self, session, url): proxys = await self.get( session, url) >> to_doc >> extra_xpath('//div[@class="cont"]/text()') rst = [] for proxy in proxys: proxy = proxy.strip().split('@')[0] rst.append(tuple(proxy.split(":"))) return rst