Ejemplo n.º 1
0
    def freeProxySecondByBrowser():
        """
         http://www.bugng.com/ 爬取虫代理前5页的内容
        :return:
        """
        # 浏览器对象
        driver = webdriver.Chrome()

        url_list = [
            "http://www.bugng.com/gngn?page=%s" % i for i in range(1, 5)
        ]
        arr = []
        try:
            for url in url_list:
                print url
                tree = getHtmlTreeByBrowser(url, driver)

                proxy_list = tree.xpath('.//tbody[@id="target"]//tr')
                for proxy in proxy_list[1:]:
                    arr.append(':'.join(proxy.xpath('./td/text()')[0:2]))

        except:
            print "抓取bugng理ip报错"
        finally:
            if driver: driver.close()
        return arr
Ejemplo n.º 2
0
    def freeProxySixthBYBrowser():
        """
         http://www.ip181.com/  云代理 前10页 高匿
        :return:
        """
        # 浏览器对象
        driver = webdriver.Chrome()

        url_list = [
            "http://www.ip3366.net/?stype=1&page=%s" % i for i in range(1, 10)
        ]
        arr = []
        try:
            for url in url_list:
                print url
                tree = getHtmlTreeByBrowser(url, driver)

                proxy_list = tree.xpath('.//div[@id="list"]//table//tr')
                for proxy in proxy_list[1:]:
                    arr.append(':'.join(proxy.xpath('./td/text()')[0:2]))

        except:
            print "抓取云代理ip报错"
        finally:
            if driver: driver.close()
        return arr
Ejemplo n.º 3
0
    def freeProxyThirdBYBrowser():
        """
         http://www.bugng.com/gngn?page=2  米扑代理 第一页
        :return:
        """
        #TODO 该代理网站端口号是图片
        # 浏览器对象
        driver = webdriver.Chrome()

        url_list = [
            "https://proxy.mimvp.com/free.php?proxy=in_hp&sort=&page=1"
        ]
        arr = []
        try:
            for url in url_list:
                print url
                tree = getHtmlTreeByBrowser(url, driver)

                proxy_list = tree.xpath(
                    './/div[@class="free-list"]//table//tr')
                for proxy in proxy_list[1:]:
                    arr.append(':'.join(proxy.xpath('./td/text()')[1:3]))

        except:
            print "抓取米扑理ip报错"
        finally:
            if driver: driver.close()
        return arr
Ejemplo n.º 4
0
    def freeProxyFirstBYBrowser(page=5):
        """
        抓取快代理IP前5页的内容 http://www.kuaidaili.com/
        :return:
        """
        # 浏览器对象
        driver = webdriver.Chrome()
        url_list = ('http://www.kuaidaili.com/free/inha/{page}/'.format(
            page=page) for page in range(1, page + 1))
        arr = []

        try:
            for url in url_list:
                tree = getHtmlTreeByBrowser(url, driver)
                proxy_list = tree.xpath('.//div[@id="list"]//table//tbody/tr')
                for proxy in proxy_list:
                    arr.append(':'.join(proxy.xpath('./td/text()')[0:2]))
        except:
            print "获取快代理ip报错"
        finally:
            if driver: driver.close()
        return arr
Ejemplo n.º 5
0
    def freeProxyFifthBYBrowser():
        """
         http://www.xdaili.cn/freeproxy  讯代理 获取第一页 高匿
        :return:
        """
        # 浏览器对象
        driver = webdriver.Chrome()
        url = "http://www.xdaili.cn/freeproxy"
        arr = []
        try:
            print url

            tree = getHtmlTreeByBrowser(url, driver)

            proxy_list = tree.xpath('.//tr[@class="warning"]')
            for proxy in proxy_list:
                arr.append(':'.join(proxy.xpath('./td/text()')[0:2]))

        except:
            print "抓取讯代理ip报错"
        finally:
            if driver: driver.close()
        return arr
Ejemplo n.º 6
0
 def freeProxyFourthBYBrowser():
     """
     抓取西刺代理 http://api.xicidaili.com/第一页内容
     :return:
     """
     url_list = [
         'http://www.xicidaili.com/nn',  # 高匿
         #'http://www.xicidaili.com/nt',  # 透明
     ]
     arr = []
     # 浏览器对象
     driver = webdriver.Chrome()
     try:
         for each_url in url_list:
             tree = getHtmlTreeByBrowser(each_url, driver)
             proxy_list = tree.xpath('.//table[@id="ip_list"]//tr')
             for proxy in proxy_list:
                 arr.append(':'.join(proxy.xpath('./td/text()')[0:2]))
     except:
         print "获取西刺代理ip报错"
     finally:
         if driver: driver.close()
     return arr