Ejemplo n.º 1
0
 def crawl_xicidaili(self, page_count=10):
     start_url = "https://www.xicidaili.com/nn/{}"
     urls = [start_url.format(page) for page in range(1, page_count + 1)]
     for url in urls:
         html = get_page(url)
         selected = etree.HTML(html)
         ips = selected.xpath('//table[@id="ip_list"]//tr')
         ips.remove(ips[0])
         for ippath in ips:
             ip = ippath.xpath('./td/text()')[0]
             port = ippath.xpath('./td/text()')[1]
             yield ":".join([ip, port])
Ejemplo n.º 2
0
 def crawl_ip3366(self, page_count=5):
     start_url = "http://www.ip3366.net/?stype=1&page={}"
     urls = [start_url.format(page) for page in range(1, page_count + 1)]
     for url in urls:
         html = get_page(url)
         selected = etree.HTML(html)
         ips = selected.xpath('//div[@id="list"]/table//tr')
         ips.remove(ips[0])
         for ippath in ips:
             ip = ippath.xpath('./td/text()')[0]
             port = ippath.xpath('./td/text()')[1]
             yield ":".join([ip, port])
Ejemplo n.º 3
0
 def crawl_89ip(self, page_count=5):
     start_url = "http://www.89ip.cn/index_{}.html"
     urls = [start_url.format(page) for page in range(1, page_count + 1)]
     for url in urls:
         html = get_page(url)
         selected = etree.HTML(html)
         ips = selected.xpath(
             '//div[@class="fly-panel"]/div[@class="layui-form"]/table//tr')
         ips.remove(ips[0])
         for ippath in ips:
             ip = ippath.xpath('./td/text()')[0].strip()
             port = ippath.xpath('./td/text()')[1].strip()
             yield ":".join([ip, port])
Ejemplo n.º 4
0
 def crawl_daili66(self, page_count=5):
     start_url = "http://www.66ip.cn/{}.html"
     urls = [start_url.format(page) for page in range(1, page_count + 1)]
     for url in urls:
         html = get_page(url)
         selected = etree.HTML(html)
         ips = selected.xpath(
             '//div[@class="container"]/div[@class="containerbox boxindex"]/div[@align="center"]/table//tr'
         )
         ips.remove(ips[0])
         for ippath in ips:
             ip = ippath.xpath('./td/text()')[0]
             port = ippath.xpath('./td/text()')[1]
             address = ippath.xpath('./td/text()')[2]
             ip_type = ippath.xpath('./td/text()')[3]
             test_time = ippath.xpath('./td/text()')[4]
             yield ":".join([ip, port])