Esempio n. 1
0
 def crawl_66ip():
     """
     66ip 代理:http://www.66ip.cn
     """
     url = 'http://www.66ip.cn/nmtq.php?getnum=100&isp=0&anonymoustype=4&start=&ports=&export=&ipaddress=&area=0&proxytype=2&api=66ip'
     res = redefine_requests(url)
     if not res:
         return
     items = re.findall(pattern=r'\d+\.\d+\.\d+\.\d+\:\d+', string=res.text)
     for item in items:
         ip, port = item.split(':')
         RedisClient.add_proxy_nofilter(check_str='{}|{}'.format(ip, port))
Esempio n. 2
0
    def crawl_kuaidaili():
        """
        快代理:https://www.kuaidaili.com
        """
        for page in range(1, 6):
            url = "https://www.kuaidaili.com/free/inha/{}/".format(page)
            res = redefine_requests(url)
            if not res:
                continue

            soup = BeautifulSoup(res.text, 'lxml')
            items = soup.select('#list table tbody tr')
            for item in items:
                ip = item.find('td', {'data-title': 'IP'}).text.strip()
                port = item.find('td', {'data-title': 'PORT'}).text.strip()
                RedisClient.add_proxy_nofilter(check_str='{}|{}'.format(ip, port))
Esempio n. 3
0
 def crawl_ip3366():
     """
     云代理:http://www.ip3366.net
     """
     for page in range(1, 6):
         url = "http://www.ip3366.net/?stype=1&page={}".format(page)
         res = redefine_requests(url)
         if not res:
             return
         soup = BeautifulSoup(res.text, 'lxml')
         items = soup.select('#list table tbody tr')
         for item in items:
             tds = item.select('td')
             ip = tds[0].text.strip()
             port = tds[1].text.strip()
             RedisClient.add_proxy_nofilter(check_str='{}|{}'.format(ip, port))
Esempio n. 4
0
 def crawl_swei360():
     """
     360 代理:http://www.swei360.com
     """
     for page in range(1, 2):
         for style in [1, 3]:
             print(page,style)
             url = "http://www.swei360.com/free/?stype={}&page={}".format(style, page)
             res = redefine_requests(url,timeout=20)
             if not res:
                 continue
             soup = BeautifulSoup(res.text, 'lxml')
             items = soup.select('#list table tbody tr')
             for item in items:
                 tds = item.select('td')
                 ip = tds[0].text.strip()
                 port = tds[1].text.strip()
                 RedisClient.add_proxy_nofilter(check_str='{}|{}'.format(ip, port))
Esempio n. 5
0
 def crawl_data5u():
     """
     无忧代理:http://www.data5u.com/
     """
     urls = ["http://www.data5u.com/free/gwgn/index.shtml",
             'http://www.data5u.com/free/gwpt/index.shtml',
             'http://www.data5u.com/free/gwpt/index.shtml',
             'http://www.data5u.com/free/gwpt/index.shtml',
             'http://www.data5u.com/free/index.shtml'
             ]
     for url in urls:
         res = redefine_requests(url)
         if not res:
             return
         soup = BeautifulSoup(res.text, 'lxml')
         items = soup.find_all('ul', class_='l2')
         for item in items:
             spans = item.find_all('span')
             ip = spans[0].text.strip()
             port = spans[1].text.strip()
             RedisClient.add_proxy_nofilter(check_str='{}|{}'.format(ip, port))
Esempio n. 6
0
 def crawl_xici():
     """
     西刺代理:http://www.xicidaili.com
     """
     urls = ['http://www.xicidaili.com/nn/',
             'http://www.xicidaili.com/nt/',
             'http://www.xicidaili.com/wn/',
             'http://www.xicidaili.com/wt/'
             ]
     for url in urls:
         for page in range(1, 2):
             res =redefine_requests('{}{}'.format(url, page))
             if not res:
                 continue
             soup = BeautifulSoup(res.text, 'lxml')
             # print(soup)
             # time.sleep(1000)
             items = soup.select('#ip_list .odd')
             for item in items:
                 tds = item.select('td')
                 ip = tds[1].text.strip()
                 port = tds[2].text.strip()
                 RedisClient.add_proxy_nofilter(check_str='{}|{}'.format(ip, port))