Ejemplo n.º 1
0
def Proxy_kuaidaili():
    """
    快代理 https://www.kuaidaili.com
    """
    for page in range(1, 50):
        url = 'https://www.kuaidaili.com/free/inha/{page}/'.format(page=page)
        tree = get_html_tree(url)
        proxy_list = tree.xpath('.//table//tr')
        for tr in proxy_list[1:]:
            time.sleep(0.5)
            print("快代理~~~~http://" + ':'.join(tr.xpath('./td/text()')[0:2]))
Ejemplo n.º 2
0
def Proxy_xici():
    """
    西刺代理 http://www.xicidaili.com
    :return:
    """
    for page in range(1, 50):
        url = 'http://www.xicidaili.com/nn/{page}'.format(page=page)
        tree = get_html_tree(url)
        proxy_list = tree.xpath('.//table[@id="ip_list"]//tr[position()>1]')
        for proxy in proxy_list:
            try:
                http_s = proxy.xpath('./td/text()')[5]
                ip = ':'.join(proxy.xpath('./td/text()')[0:2])
                time.sleep(0.5)
                print("西刺~~~~~{}://{}".format(http_s.lower(), ip))
            except Exception as e:
                print(e)
Ejemplo n.º 3
0
 def Proxy_5U():
     """
     无忧代理 http://www.data5u.com/
     :return:
     """
     url = 'http://www.data5u.com/free/gngn/index.shtml'
     html_tree = get_html_tree(url)
     ul_list = html_tree.xpath(
         '//ul[@class="l2"]')  # /html/body/div[5]/ul/li[2]/ul[2]/span[1]/li
     for ul in ul_list:
         try:
             http_s = ul.xpath('.//li/a/text()')[1]
             http_s = http_s.lower()
             ip = ':'.join(ul.xpath('.//li/text()')[0:2])
             # print("{}://{}".format(http_s, ip))
             # print("{}://{}".format(http_s, ip))  # /html/body/div[5]/ul/li[2]/ul/span[4]/li/a
             db_class().save_proxy("{}://{}".format(http_s, ip))
         except Exception as e:
             print(e)
Ejemplo n.º 4
0
def Proxy_66ip():
    """
    代理66 http://www.66ip.cn/
    :param area: 抓取代理页数,page=1北京代理页,page=2上海代理页......
    :param page: 翻页
    :return:
    """
    for area_index in range(1, 33):
        for i in range(1, 11):
            url = "http://www.66ip.cn/areaindex_{}/{}.html".format(
                area_index, i)
            html_tree = get_html_tree(url)
            tr_list = html_tree.xpath(
                "//*[@id='footer']/div/table/tr[position()>1]")
            if len(tr_list) == 0:
                continue
            for tr in tr_list:
                time.sleep(0.5)
                print("66IP~~~~http://" + tr.xpath("./td[1]/text()")[0] + ":" +
                      tr.xpath("./td[2]/text()")[0])