Beispiel #1
0
 def get_66ip_proxies(num=50):
     url = "http://m.66ip.cn/mo.php?sxb=&tqsl={}&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=".format(
         num)
     html = getHTMLText(url, headers = HEADERS)
     pattern = re.compile("\d+\.\d+\.\d+\.\d+\:\d+")
     proxies = pattern.findall(html)
     for proxy in proxies:
         yield proxy
 def freeProxyFirst():
     """
     抓取米扑代理 http://proxy.mimvp.com/api/fetch.php?orderid=860170713165250819
     :return:
     """
     url = 'http://proxy.mimvp.com/api/fetch.php?orderid=860170713165250819'
     html = getHTMLText(url, headers=HEADER())
     proxy_list = html.split('\r\n')
     for proxy in proxy_list:
         yield proxy
    def freeProxySecond(proxy_number=100):
        """
        抓取代理66 http://www.66ip.cn/
        :param proxy_number: 代理数量
        :return:
        """
        url = "http://m.66ip.cn/mo.php?sxb=&tqsl={}&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=".format(
            proxy_number)

        html = getHTMLText(url, headers=HEADER())
        for proxy in re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}',
                                html):
            yield proxy