Esempio n. 1
0
File: util.py Progetto: pyfisch/nltk
def set_proxy(proxy, user=None, password=""):
    """
    Set the HTTP proxy for Python to download through.

    If ``proxy`` is None then tries to set proxy from environment or system
    settings.

    :param proxy: The HTTP proxy server to use. For example:
        'http://proxy.example.com:3128/'
    :param user: The username to authenticate with. Use None to disable
        authentication.
    :param password: The password to authenticate with.
    """
    from nltk import compat

    if proxy is None:
        # Try and find the system proxy settings
        try:
            proxy = getproxies()["http"]
        except KeyError:
            raise ValueError("Could not detect default proxy settings")

    # Set up the proxy handler
    proxy_handler = ProxyHandler({"https": proxy, "http": proxy})
    opener = build_opener(proxy_handler)

    if user is not None:
        # Set up basic proxy authentication if provided
        password_manager = HTTPPasswordMgrWithDefaultRealm()
        password_manager.add_password(realm=None, uri=proxy, user=user, passwd=password)
        opener.add_handler(ProxyBasicAuthHandler(password_manager))
        opener.add_handler(ProxyDigestAuthHandler(password_manager))

    # Overide the existing url opener
    install_opener(opener)
from urllib.request import HTTPPasswordMgrWithDefaultRealm as mgr
from urllib.request import ProxyBasicAuthHandler, build_opener, install_opener, Request, urlopen

user = '******'
passwd = 'vch6gryw'
proxyserver = '117.48.199.230:16816'

# 构建一个密码管理器,用来保存以上信息
passwdmgr = mgr()
# 添加信息参数,第一个参数是与远程服务器相关的域信息,一般写None
passwdmgr.add_password(None, proxyserver, user, passwd)
# 创建一个ProxyBasicAuthHandler处理器对象
proxy_handler = ProxyBasicAuthHandler(passwdmgr)
#替换了之前的写法: proxy_handler = ProxyHandler({"http":"1050521852:[email protected]:16816"})
opener = build_opener(proxy_handler)
opener.addheaders = [(
    'User-Agent',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
)]
install_opener(opener)

url = 'http://www.ipdizhichaxun.com/'
req = Request(url)
response = urlopen(req)
html = response.read().decode('utf-8')
print(html)
Esempio n. 3
0
    def setup_opener(self, url, timeout):
        """
        Sets up a urllib OpenerDirector to be used for requests. There is a
        fair amount of custom urllib code in Package Control, and part of it
        is to handle proxies and keep-alives. Creating an opener the way
        below is because the handlers have been customized to send the
        "Connection: Keep-Alive" header and hold onto connections so they
        can be re-used.

        :param url:
            The URL to download

        :param timeout:
            The int number of seconds to set the timeout to
        """

        if not self.opener:
            http_proxy = self.settings.get('http_proxy')
            https_proxy = self.settings.get('https_proxy')
            if http_proxy or https_proxy:
                proxies = {}
                if http_proxy:
                    proxies['http'] = http_proxy
                if https_proxy:
                    proxies['https'] = https_proxy
                proxy_handler = ProxyHandler(proxies)
            else:
                proxy_handler = ProxyHandler()

            password_manager = HTTPPasswordMgrWithDefaultRealm()
            proxy_username = self.settings.get('proxy_username')
            proxy_password = self.settings.get('proxy_password')
            if proxy_username and proxy_password:
                if http_proxy:
                    password_manager.add_password(None, http_proxy,
                                                  proxy_username,
                                                  proxy_password)
                if https_proxy:
                    password_manager.add_password(None, https_proxy,
                                                  proxy_username,
                                                  proxy_password)

            handlers = [proxy_handler]

            basic_auth_handler = ProxyBasicAuthHandler(password_manager)
            digest_auth_handler = ProxyDigestAuthHandler(password_manager)
            handlers.extend([digest_auth_handler, basic_auth_handler])

            debug = self.settings.get('debug')

            if debug:
                console_write(u"Urllib Debug Proxy", True)
                console_write(u"  http_proxy: %s" % http_proxy)
                console_write(u"  https_proxy: %s" % https_proxy)
                console_write(u"  proxy_username: %s" % proxy_username)
                console_write(u"  proxy_password: %s" % proxy_password)

            secure_url_match = re.match('^https://([^/]+)', url)
            if secure_url_match != None:
                secure_domain = secure_url_match.group(1)
                bundle_path = self.check_certs(secure_domain, timeout)
                bundle_path = bundle_path.encode(sys.getfilesystemencoding())
                handlers.append(
                    ValidatingHTTPSHandler(
                        ca_certs=bundle_path,
                        debug=debug,
                        passwd=password_manager,
                        user_agent=self.settings.get('user_agent')))
            else:
                handlers.append(
                    DebuggableHTTPHandler(debug=debug,
                                          passwd=password_manager))
            self.opener = build_opener(*handlers)
Esempio n. 4
0
from urllib.request import ProxyBasicAuthHandler, HTTPPasswordMgrWithDefaultRealm, build_opener, Request

user = '******'
password = '******'
server_host = '114.67.224.159:16816'
password_mgr = HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, server_host, user, password)
ProxyHandler = ProxyBasicAuthHandler(password_mgr)

opener = build_opener(ProxyHandler)

url = 'http://www.atguigu.com'
headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
request = Request(url, headers=headers)
response = opener.open(request)
data = response.read()
print(data)
from urllib.request import ProxyHandler, ProxyBasicAuthHandler, build_opener

# 설명을 위한 코드라고 함
proxy_handler = ProxyHandler()
proxy_auth_handler = ProxyBasicAuthHandler()
proxy_auth_handler.add_password('realm', 'host', 'username', 'password')

opener = build_opener(proxy_handler, proxy_auth_handler)
response = opener.open("http://www.example.com/login.html")


Esempio n. 6
0
# webserver_host_port = {"http":"trygf521:[email protected]:16816"}

#代理服务器ip+端口
webserver_host_port = "114.67.224.159:16816"

#用户
user = "******"
#密码
password = "******"

#第一个参数:None

http_password_mgr.add_password(None,webserver_host_port,user,password)

#代理服务器类
proxy_handler = ProxyBasicAuthHandler(http_password_mgr)

#自定义opener
opener = build_opener(proxy_handler)

url = "http://www.atguigu.com"

#模拟浏览器请求头信息
head = {
	"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
}
request = Request(url,headers=head)

#HttpRespnse实例对象
response = opener.open(request)
Esempio n. 7
0
def spiderRequest(url=None,
                  method="GET",
                  data={},
                  headers={},
                  timeout=10,
                  auth={},
                  proxy={}):
    headers['Cache-Control'] = 'no-chache'
    method = method.upper()
    start = time()
    try:
        #跳转记录
        redirect_handler = RedirectHandler()

        #basic验证
        auth_handler = HTTPBasicAuthHandler()
        if auth and 'user' in auth.keys() and 'passwd' in auth.keys():
            passwdHandler = HTTPPasswordMgrWithDefaultRealm()
            passwdHandler.add_password(realm=None,
                                       uri=url,
                                       user=auth['user'],
                                       passwd=auth['passwd'])
            auth_handler = HTTPBasicAuthHandler(passwdHandler)

        #代理
        proxy_handler = ProxyHandler()
        if proxy and 'url' in proxy.keys():
            proxy_handler = ProxyHandler({'http': proxy['url']})

        #代理验证
        proxy_auth_handler = ProxyBasicAuthHandler()
        if proxy and 'url' in proxy.keys() and 'user' in proxy.keys(
        ) and 'passwd' in proxy.keys():
            proxyPasswdHandler = HTTPPasswordMgrWithDefaultRealm()
            proxyPasswdHandler.add_password(realm=None,
                                            uri=proxy['url'],
                                            user=proxy['user'],
                                            passwd=proxy['passwd'])
            proxy_auth_handler = ProxyBasicAuthHandler(proxyPasswdHandler)

        opener = build_opener(redirect_handler, auth_handler, proxy_handler,
                              proxy_auth_handler)
        request_handler = Request(quote(url, safe=string.printable),
                                  method=method)
        for key, value in headers.items():
            request_handler.add_header(key, value)
        response = opener.open(request_handler, timeout=timeout)
        end = time()
        return {
            'url': url,
            'method': method,
            'request_headers': request_handler.headers,
            'response_headers': formatHeaders(response.getheaders()),
            'http_code': response.status,
            'redirects': redirect_handler.redirects,
            'body': response.read(),
            'nettime': end - start,
            'error': ''
        }
    except HTTPError as e:  # 400 401 402 403 500 501 502 503 504
        logger.error(url + "::::::::" + repr(e))
        end = time()
        return {
            'url': url,
            'method': method,
            'request_headers': headers,
            'response_headers': dict(e.headers),
            'http_code': e.code,
            'redirects': [],
            'body': b'',
            'nettime': end - start,
            'error': repr(e)
        }
    except URLError as e:
        logger.error(url + "::::::::" + repr(e))
        end = time()
        return {
            'url': url,
            'method': method,
            'request_headers': headers,
            'response_headers': {},
            'http_code': 0,
            'redirects': [],
            'body': b'',
            'nettime': end - start,
            'error': repr(e)
        }
    except timeoutError as e:
        logger.error(url + "::::::::" + repr(e))
        end = time()
        return {
            'url': url,
            'method': method,
            'request_headers': headers,
            'response_headers': {},
            'http_code': 0,
            'redirects': [],
            'body': b'',
            'nettime': end - start,
            'error': repr(e)
        }
    except Exception as e:
        logger.exception(e)
        logger.error(url + "::::::::" + repr(e))
        return {
            'url': url,
            'method': method,
            'request_headers': headers,
            'response_headers': {},
            'http_code': 0,
            'redirects': [],
            'body': b'',
            'nettime': 0,
            'error': repr(e)
        }
Esempio n. 8
0
from urllib.request import Request, build_opener, ProxyBasicAuthHandler, HTTPPasswordMgrWithDefaultRealm

http_mgr_real = HTTPPasswordMgrWithDefaultRealm()

user = '******'
passwd = 'sunfengchun'

pro = '97.64.107.241:8899'

http_mgr_real.add_password(None, pro, user, passwd)

pro_auth = ProxyBasicAuthHandler(http_mgr_real)

opener = build_opener(pro_auth)

request = Request('http://www.baidu.com')

response = opener.open(request)
print(response.read().decode('utf-8'))