def freeProxySecondByBrowser(): """ http://www.bugng.com/ 爬取虫代理前5页的内容 :return: """ # 浏览器对象 driver = webdriver.Chrome() url_list = [ "http://www.bugng.com/gngn?page=%s" % i for i in range(1, 5) ] arr = [] try: for url in url_list: print url tree = getHtmlTreeByBrowser(url, driver) proxy_list = tree.xpath('.//tbody[@id="target"]//tr') for proxy in proxy_list[1:]: arr.append(':'.join(proxy.xpath('./td/text()')[0:2])) except: print "抓取bugng理ip报错" finally: if driver: driver.close() return arr
def freeProxySixthBYBrowser(): """ http://www.ip181.com/ 云代理 前10页 高匿 :return: """ # 浏览器对象 driver = webdriver.Chrome() url_list = [ "http://www.ip3366.net/?stype=1&page=%s" % i for i in range(1, 10) ] arr = [] try: for url in url_list: print url tree = getHtmlTreeByBrowser(url, driver) proxy_list = tree.xpath('.//div[@id="list"]//table//tr') for proxy in proxy_list[1:]: arr.append(':'.join(proxy.xpath('./td/text()')[0:2])) except: print "抓取云代理ip报错" finally: if driver: driver.close() return arr
def freeProxyThirdBYBrowser(): """ http://www.bugng.com/gngn?page=2 米扑代理 第一页 :return: """ #TODO 该代理网站端口号是图片 # 浏览器对象 driver = webdriver.Chrome() url_list = [ "https://proxy.mimvp.com/free.php?proxy=in_hp&sort=&page=1" ] arr = [] try: for url in url_list: print url tree = getHtmlTreeByBrowser(url, driver) proxy_list = tree.xpath( './/div[@class="free-list"]//table//tr') for proxy in proxy_list[1:]: arr.append(':'.join(proxy.xpath('./td/text()')[1:3])) except: print "抓取米扑理ip报错" finally: if driver: driver.close() return arr
def freeProxyFirstBYBrowser(page=5): """ 抓取快代理IP前5页的内容 http://www.kuaidaili.com/ :return: """ # 浏览器对象 driver = webdriver.Chrome() url_list = ('http://www.kuaidaili.com/free/inha/{page}/'.format( page=page) for page in range(1, page + 1)) arr = [] try: for url in url_list: tree = getHtmlTreeByBrowser(url, driver) proxy_list = tree.xpath('.//div[@id="list"]//table//tbody/tr') for proxy in proxy_list: arr.append(':'.join(proxy.xpath('./td/text()')[0:2])) except: print "获取快代理ip报错" finally: if driver: driver.close() return arr
def freeProxyFifthBYBrowser(): """ http://www.xdaili.cn/freeproxy 讯代理 获取第一页 高匿 :return: """ # 浏览器对象 driver = webdriver.Chrome() url = "http://www.xdaili.cn/freeproxy" arr = [] try: print url tree = getHtmlTreeByBrowser(url, driver) proxy_list = tree.xpath('.//tr[@class="warning"]') for proxy in proxy_list: arr.append(':'.join(proxy.xpath('./td/text()')[0:2])) except: print "抓取讯代理ip报错" finally: if driver: driver.close() return arr
def freeProxyFourthBYBrowser(): """ 抓取西刺代理 http://api.xicidaili.com/第一页内容 :return: """ url_list = [ 'http://www.xicidaili.com/nn', # 高匿 #'http://www.xicidaili.com/nt', # 透明 ] arr = [] # 浏览器对象 driver = webdriver.Chrome() try: for each_url in url_list: tree = getHtmlTreeByBrowser(each_url, driver) proxy_list = tree.xpath('.//table[@id="ip_list"]//tr') for proxy in proxy_list: arr.append(':'.join(proxy.xpath('./td/text()')[0:2])) except: print "获取西刺代理ip报错" finally: if driver: driver.close() return arr