Esempio n. 1
0
 async def parse_page(self, session, url):
     proxys = await self.get(session, url) >> to_doc >> extra_xpath(
         '//table[@id="ip_list"]//tr[position()>1]')
     rst = []
     for proxy in proxys:
         host = proxy >> extra_xpath(
             './/td[position()=2]/text()') >> extra_head
         port = proxy >> extra_xpath(
             './/td[position()=3]/text()') >> extra_head
         rst.append((host, port))
     return rst
Esempio n. 2
0
 async def parse_page(self, session, url):
     text = await self.get(session, url)
     proxys = text >> to_doc >> extra_xpath("//table//tr[position()>1]")
     rst = []
     for proxy in proxys:
         host = proxy >> extra_xpath(
             "./td[position()=1]/text()") >> extra_head
         port = proxy >> extra_xpath(
             "./td[position()=2]/text()") >> extra_head
         rst.append((host, port))
     return rst
Esempio n. 3
0
 async def parse_page(self, session, url):
     text = await self.get(session, url)
     self.headers["Referer"] = 'https://ip.ihuan.me/ti.html'
     text = await self.get(session, 'https://ip.ihuan.me/mouse.do')
     val = self.pattern.search(text).group(0).replace('val("', '')
     data = {
         'num': 500,
         'port': '',
         'kill_port': '',
         'address': '',
         'kill_address': '',
         'anonymity': '',
         'type': '',
         'post': '',
         'sort': '',
         'key': val
     }
     self.headers['Origin'] = 'https://ip.ihuan.me'
     self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
     text = await self.post(session,
                            'https://ip.ihuan.me/tqdl.html',
                            data=data)
     proxys = text >> to_doc >> extra_xpath(
         '//div[@class="panel-body"]/text()')
     rst = []
     for proxy in proxys:
         try:
             host, port = (proxy >> extra_head).split(':')
         except Exception:
             continue
         rst.append((host, port))
     return rst
Esempio n. 4
0
 async def prepare(self, session):
     urls = await self.get(
         session, "http://www.xsdaili.com/") >> to_doc >> extra_xpath(
             '//div[@class="panel-body"]//div[@class="title"]/a/@href')
     self.funcmap = {
         self.parse_page: ['http://www.xsdaili.com' + ele for ele in urls]
     }
Esempio n. 5
0
 async def parse_page(self, session, url):
     proxies = await self.get(session,
                              url) >> to_doc >> extra_xpath("//text()")
     rst = []
     for proxy in proxies[7:-1]:
         [host, port] = proxy.strip('\n\t\r ').split(':')
         rst.append((host, port))
     return rst
Esempio n. 6
0
 async def parse_page(self, session, url):
     text = await self.get(session, url)
     proxys = text >> to_doc >> extra_xpath("//body//text()")
     rst = []
     for proxy in proxys[:-10]:
         proxy = proxy.strip('" \t\r\n')
         [host, port] = proxy.split(':')
         rst.append((host, port))
     return rst
Esempio n. 7
0
 async def parse_page(self, session, url):
     proxys = await self.get(
         session,
         url) >> to_doc >> extra_xpath('//div[@class="cont"]/text()')
     rst = []
     for proxy in proxys:
         proxy = proxy.strip().split('@')[0]
         rst.append(tuple(proxy.split(":")))
     return rst