Ejemplo n.º 1
0
def loadTree(url):
    wr = WebRequest()
    time.sleep(2)
    html = wr.get(url, headers=wr.header)
    content = html.content
    tree = etree.HTML(content)
    return tree
 def freeProxySecond(proxy_number=100):
     """
     抓取代理66 http://www.66ip.cn/
     :param proxy_number: 代理数量
     :return:
     """
     url = "http://www.66ip.cn/mo.php?sxb=&tqsl={}&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=".format(
         proxy_number)
     request = WebRequest()
     html = request.get(url).content
     for proxy in re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}',
                             html):
         yield proxy
Ejemplo n.º 3
0
def run():
    proxy = "27.208.24.164:8060"
    type = "http"
    targetUrl = "http://httpbin.org/ip"
    validate(proxy, type, targetUrl)

    proxies = {
        type: "{type}://{url}".format(type=type, url=proxy)
    }
    wr = WebRequest()
    response = wr.get(url=targetUrl, proxies=proxies)
    origin = json.loads(response.content)['origin']
    print(origin)
Ejemplo n.º 4
0
 def freeProxySixth():
     """
     抓取讯代理免费proxy http://www.xdaili.cn/ipagent/freeip/getFreeIps?page=1&rows=10
     :return:
     """
     url = 'http://www.xdaili.cn/ipagent/freeip/getFreeIps?page=1&rows=10'
     request = WebRequest()
     try:
         res = request.get(url).json()
         for row in res['RESULT']['rows']:
             yield '{}:{}'.format(row['ip'], row['port'])
     except Exception as e:
         pass
def getHtmlTree(url, **kwargs):
    header = {
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko)',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
    }
    wr = WebRequest()
    html = wr.get(url=url, header=header).content
    return etree.HTML(html)
Ejemplo n.º 6
0
from proxy.ProxyValidator import validate
from util.WebRequest import WebRequest
import logging, json

format = "%(asctime)s - [%(levelname)s] - [%(funcName)s] - %(message)s"
logging.basicConfig(level=logging.INFO, format=format)

wr = WebRequest()

def run():
    proxy = "27.208.24.164:8060"
    type = "http"
    targetUrl = "http://httpbin.org/ip"
    validate(proxy, type, targetUrl)

    proxies = {
        type: "{type}://{url}".format(type=type, url=proxy)
    }
    wr = WebRequest()
    response = wr.get(url=targetUrl, proxies=proxies)
    origin = json.loads(response.content)['origin']
    print(origin)


if __name__ == '__main__':
    run()