def testscheme_open(self, req):
     try:
         selector = req.get_selector()
         if selector == u'/ws_newcompass.asmx?WSDL':
             return urllib.addinfourl(
                 pkg_resources.resource_stream(__name__, 'tests/testdata/wsdl.xml'),
                 httplib.HTTPMessage(open('/dev/null')),
                 req.get_full_url(),
                 200
             )
         elif selector == u'/ws_newcompass.asmx':
             soapResponse = urlparse.urlparse(req.get_header('Soapaction')).path.strip('"').split('/')[-1] + '.xml'
             return urllib.addinfourl(
                 pkg_resources.resource_stream(__name__, 'tests/testdata/' + soapResponse),
                 httplib.HTTPMessage(open('/dev/null')),
                 req.get_full_url(),
                 200
             )
         elif selector == u'/biomuta.tsv':
             return urllib2.addinfourl(
                 pkg_resources.resource_stream(__name__, 'tests/testdata/Biomuta.tsv'),
                 httplib.HTTPMessage(open('/dev/null')),
                 req.get_full_url(),
                 200
             )
         else:
             raise urllib2.URLError('Not found')
     except Exception:
         raise urllib2.URLError('Not found')
Exemple #2
0
 def fake_urlopen(self, url):
     """Fake urlopen using test client"""
     if 'example' in url:
         response = cStringIO.StringIO('')
         return addinfourl(response, {'X-Pingback': '/xmlrpc.php'}, url)
     elif 'localhost' in url:
         response = cStringIO.StringIO('<link rel="pingback" href="/xmlrpc/">')
         return addinfourl(response, {}, url)
 def fake_urlopen(self, url):
     """Fake urlopen using test client"""
     if 'example' in url:
         response = cStringIO.StringIO('')
         return addinfourl(response, {'X-Pingback': '/xmlrpc.php'}, url)
     else:
         response = cStringIO.StringIO(self.client.get(url).content)
         return addinfourl(response, {}, url)
Exemple #4
0
 def fake_urlopen(self, url):
     """Fake urlopen using test client"""
     if 'example' in url:
         response = cStringIO.StringIO('')
         return addinfourl(response, {'X-Pingback': '/xmlrpc.php',
                                      'Content-Type': 'text/html'}, url)
     elif 'localhost' in url:
         response = cStringIO.StringIO(
             '<link rel="pingback" href="/xmlrpc/">')
         return addinfourl(response, {'Content-Type': 'text/xhtml'}, url)
     elif 'google' in url:
         response = cStringIO.StringIO('PNG CONTENT')
         return addinfourl(response, {'content-type': 'image/png'}, url)
     elif 'error' in url:
         raise URLError('Invalid ressource')
Exemple #5
0
    def test_returns_response_when_successful_response(self, urlopen):
        resp = addinfourl(StringIO(u"mock_content"), "mock headers", url="http://www.example.com/", code="200")
        urlopen.return_value = resp

        api_stub = ClientStub()
        response = api_stub.do_something()
        self.assertEqual(resp, response)
Exemple #6
0
def decode (page):
    """Gunzip or deflate a compressed page."""
    log.debug(LOG_CHECK, "page info %d %s", page.code, str(page.info()))
    encoding = page.info().get("Content-Encoding")
    if encoding in ('gzip', 'x-gzip', 'deflate'):
        # cannot seek in socket descriptors, so must get content now
        content = page.read()
        try:
            if encoding == 'deflate':
                fp = StringIO(zlib.decompress(content))
            else:
                fp = gzip.GzipFile('', 'rb', 9, StringIO(content))
        except zlib.error as msg:
            log.debug(LOG_CHECK, "uncompressing had error "
                 "%s, assuming non-compressed content", str(msg))
            fp = StringIO(content)
        # remove content-encoding header
        headers = httplib.HTTPMessage(StringIO(""))
        ceheader = re.compile(r"(?i)content-encoding:")
        for h in page.info().keys():
            if not ceheader.match(h):
                headers[h] = page.info()[h]
        newpage = urllib.addinfourl(fp, headers, page.geturl())
        newpage.code = page.code
        newpage.msg = page.msg
        return newpage
    return page
Exemple #7
0
    def open_local_file(self, req):
        import mimetypes
        import mimetools

        host = req.get_host()
        file = req.get_selector()
        localfile = urllib.url2pathname(file)
        stats = os.stat(localfile)
        size = stats[stat.ST_SIZE]
        modified = rfc822.formatdate(stats[stat.ST_MTIME])
        mtype = mimetypes.guess_type(file)[0]
        if host:
            host, port = urllib.splitport(host)
            if port or socket.gethostbyname(host) not in self.get_names():
                raise urllib2.URLError("file not on local host")
        fo = open(localfile, "rb")
        brange = req.headers.get("Range", None)
        brange = range_header_to_tuple(brange)
        assert brange != ()
        if brange:
            (fb, lb) = brange
            if lb == "":
                lb = size
            if fb < 0 or fb > size or lb > size:
                raise RangeError("Requested Range Not Satisfiable")
            size = lb - fb
            fo = RangeableFileObject(fo, (fb, lb))
        headers = mimetools.Message(
            StringIO(
                "Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n" % (mtype or "text/plain", size, modified)
            )
        )
        return urllib.addinfourl(fo, headers, "file:" + file)
Exemple #8
0
    def open_local_file(self, req):
        import email.utils
        import mimetypes

        host = req.get_host()
        filename = req.get_selector()
        localfile = url2pathname(filename)
        try:
            stats = os.stat(localfile)
            size = stats.st_size
            modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
            mtype = mimetypes.guess_type(filename)[0]
            headers = mimetools.Message(
                StringIO(
                    "Content-type: %s\nContent-length: %d\nLast-modified: %s\n"
                    % (mtype or "text/plain", size, modified)
                )
            )
            if host:
                host, port = splitport(host)
            if not host or (not port and _safe_gethostbyname(host) in self.get_names()):
                if host:
                    origurl = "file://" + host + filename
                else:
                    origurl = "file://" + filename
                return addinfourl(open(localfile, "rb"), headers, origurl)
        except OSError, msg:
            # urllib2 users shouldn't expect OSErrors coming from urlopen()
            raise URLError(msg)
 def open_local_file(self, req):
     try:
         import email.utils as emailutils
     except ImportError:
         # python 2.4
         import email.Utils as emailutils
     import mimetypes
     host = req.get_host()
     file = req.get_selector()
     localfile = url2pathname(file)
     try:
         stats = os.stat(localfile)
         size = stats.st_size
         modified = emailutils.formatdate(stats.st_mtime, usegmt=True)
         mtype = mimetypes.guess_type(file)[0]
         headers = mimetools.Message(StringIO(
             'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
             (mtype or 'text/plain', size, modified)))
         if host:
             host, port = splitport(host)
         if not host or \
             (not port and socket.gethostbyname(host) in self.get_names()):
             return addinfourl(open(localfile, 'rb'),
                               headers, 'file:'+file)
     except OSError, msg:
         # urllib2 users shouldn't expect OSErrors coming from urlopen()
         raise URLError(msg)
Exemple #10
0
def decode(page):
    """
    Gunzip or deflate a compressed page.
    """
    encoding = page.info().get("Content-Encoding")
    # note: some servers send content encoding gzip if file ends with ".gz"
    # but we don't want to decompress such files
    if encoding in ('gzip', 'x-gzip', 'deflate') and \
       not page.geturl().endswith(".gz"):
        # cannot seek in socket descriptors, so must get content now
        content = page.read()
        if encoding == 'deflate':
            fp = StringIO.StringIO(zlib.decompress(content))
        else:
            fp = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
        # remove content-encoding header
        headers = {}
        ceheader = re.compile(r"(?i)content-encoding:")
        for h in page.info().keys():
            if not ceheader.match(h):
                headers[h] = page.info()[h]
        newpage = urllib.addinfourl(fp, headers, page.geturl())
        if hasattr(page, "code"):
            # python 2.4 compatibility
            newpage.code = page.code
        if hasattr(page, "msg"):
            # python 2.4 compatibility
            newpage.msg = page.msg
        page = newpage
    return page
Exemple #11
0
    def do_open(self, http_class, req):
        host = req.get_host()
        if not host:
            raise URLError('no host given')

        try:
            h = http_class(host) # will parse host:port
            if req.has_data():
                data = req.get_data()
                h.putrequest('POST', req.get_selector())
                if not req.headers.has_key('Content-type'):
                    h.putheader('Content-type',
                                'application/x-www-form-urlencoded')
                if not req.headers.has_key('Content-length'):
                    h.putheader('Content-length', '%d' % len(data))
            else:
                h.putrequest('GET', req.get_selector())
        except socket.error(err):
            raise URLError(err)

        h.putheader('Host', host)
        for args in self.parent.addheaders:
            h.putheader(*args)
        for k, v in req.headers.items():
            h.putheader(k, v)
        h.endheaders()
        if req.has_data():
            h.send(data)

        code, msg, hdrs = h.getreply()
        fp = h.getfile()
        if code == 200:
            return addinfourl(fp, hdrs, req.get_full_url())
        else:
            return self.parent.error('http', req, fp, code, msg, hdrs)
	def _create_urllib_data(self, qt_network_reply):
		qt_network_request = qt_network_reply.request()

		request_url = qt_network_request.url()
		request_headers = {}
		for header_name in qt_network_request.rawHeaderList():
			header = qt_network_request.rawHeader(header_name)
			request_headers.update({header_name.data():header.data()})

		url = request_url.toEncoded().data()
		self.urllib_request = UrllibRequest(url, headers=request_headers)

		#py2: output_file = StringIO.StringIO()
		output_file = StringIO()
		raw_header_pairs = qt_network_reply.rawHeaderPairs()
		headers = []
		for header in raw_header_pairs:
			hd_string = '%s: %s' % (header[0], header[1])
			output_file.write(hd_string)
			headers.append(hd_string)
		output_file.write("\n")
		output_file.write(str(qt_network_reply.sniffed_data))

		headers_mstr = email.message_from_string('\n'.join(headers))

		origurl = qt_network_reply.url().toEncoded().data()

		self.urllib_response = addinfourl(output_file, headers_mstr, origurl)
Exemple #13
0
 def open(self, fullurl, data=None, method=None):
     """Use URLopener().open(file) instead of open(file, 'r')."""
     fullurl = unwrap(toBytes(fullurl))
     # percent encode url, fixing lame server errors for e.g, like space
     # within url paths.
     fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
     if self.tempcache and fullurl in self.tempcache:
         filename, headers = self.tempcache[fullurl]
         fp = open(filename, 'rb')
         return addinfourl(fp, headers, fullurl)
     urltype, url = splittype(fullurl)
     if not urltype:
         urltype = 'file'
     if urltype in self.proxies:
         proxy = self.proxies[urltype]
         urltype, proxyhost = splittype(proxy)
         host, selector = splithost(proxyhost)
         url = (host, fullurl) # Signal special case to open_*()
     else:
         proxy = None
     name = 'open_' + urltype
     self.type = urltype
     name = name.replace('-', '_')
     if not hasattr(self, name):
         if proxy:
             return self.open_unknown_proxy(proxy, fullurl, data)
         else:
             return self.open_unknown(fullurl, data)
     try:
         return getattr(self, name)(url, data, method)
     except socket.error, msg:
         raise IOError, ('socket error', msg), sys.exc_info()[2]
Exemple #14
0
    def response_to_twill(self, response):
        """
        Wrap Django response to work with Twill.
        """
        path = response.request.get('PATH_INFO')
        url = path and SITE + path.lstrip('/') or path

        headers_msg = '\n'.join('%s: %s' % (k, v) for k, v in response.items())
        headers_msg = StringIO(headers_msg)
        headers = httplib.HTTPMessage(headers_msg)

        io_response = StringIO(response.content)
        urllib_response = addinfourl(io_response,
                                     headers,
                                     url,
                                     response.status_code)
        urllib_response._headers = headers
        urllib_response._url = url
        urllib_response.msg = u'OK'
        urllib_response.seek = urllib_response.fp.seek

        self.get_browser()._browser._set_response(urllib_response, False)
        self.get_browser().result = ResultWrapper(response.status_code,
                                                  url,
                                                  response.content)

        self._apply_xhtml()
def decode (page):
    "gunzip or deflate a compressed page"
    #print page.info().headers
    encoding = page.info().get("Content-Encoding") 
    if encoding in ('gzip', 'x-gzip', 'deflate'):
        from cStringIO import StringIO
        # cannot seek in socket descriptors, so must get content now
        content = page.read()
        if encoding == 'deflate':
            import zlib
            fp = StringIO(zlib.decompress(content))
        else:
            import gzip
            fp = gzip.GzipFile('', 'rb', 9, StringIO(content))
        # remove content-encoding header
        headers = httplib.HTTPMessage(StringIO(""))
        ceheader = re.compile(r"(?i)content-encoding:")
        for h in page.info().keys():
            if not ceheader.match(h):
                headers[h] = page.info()[h]
        newpage = urllib.addinfourl(fp, headers, page.geturl())
        # Propagate code, msg through
        if hasattr(page, 'code'):
            newpage.code = page.code
        if hasattr(page, 'msg'):
            newpage.msg = page.msg
        return newpage
    return page
    def http_error_302(self, req, fp, code, msg, headers):
        import urllib

        infourl = urllib.addinfourl(fp, headers, headers["Location"])
        infourl.status = code
        infourl.code = code
        return infourl
Exemple #17
0
 def _make_response(self, result, url):
     data = "\r\n".join(["%s: %s" % (k, v) for k, v in result.header_items])
     headers = httplib.HTTPMessage(StringIO(data))
     response = urllib.addinfourl(StringIO(result.data), headers, url)
     code, msg = result.status.split(None, 1)
     response.code, response.msg = int(code), msg
     return response
Exemple #18
0
    def text_to_twill(self, text):
        """
        Wrap text to work with Twill.
        """
        headers_msg = 'Content: text-plain; encoding=utf-8\n'
        headers_msg = StringIO(headers_msg)
        headers = httplib.HTTPMessage(headers_msg)

        status_code = 200
        url = 'text://'

        io_response = StringIO(text)
        urllib_response = addinfourl(io_response,
                                     headers,
                                     url,
                                     status_code)
        urllib_response._headers = headers
        urllib_response._url = url
        urllib_response.msg = u'OK'
        urllib_response.seek = urllib_response.fp.seek

        self.get_browser()._browser._factory.set_response(urllib_response)
        self.get_browser().result = ResultWrapper(status_code, url, text)

        self._apply_xhtml()
Exemple #19
0
  def exec_open (self, req):
    path = req.get_selector()
    args = path.split("?", 1)
    if len(args) == 1: args.append('')
    #print "args ", args
    # Prepare CGI-like environment
    os.putenv ('GATEWAY_INTERFACE', 'CGI/1.1')
    os.putenv ('HTTP_ACCEPT_ENCODING', req.headers.get ('Accept-encoding'))
    os.putenv ('HTTP_USER_AGENT', 'DBS-CGI-Direct-call')
    os.putenv ('REQUEST_METHOD', 'POST')
    os.putenv ('CONTENT_LENGTH', str(req.headers.get ('Content-length')))
    os.putenv ('CONTENT_TYPE', req.headers.get ('Content-type'))
    os.putenv ('QUERY_STRING', args[1])
    os.putenv ('REQUEST_URI', path)
    os.putenv ('SCRIPT_NAME', args[0])
    os.putenv ('SERVER_NAME', 'localhost')
    os.putenv ('SERVER_PORT', str(80))
    os.putenv ('SERVER_PROTOCOL', 'HTTP/1.1')
    os.putenv ('SERVER_SOFTWARE', 'Builtin')

    # Open subprocess and write form data
    r, w = os.popen2(args[0])
    r.write (req.get_data())
    r.close ()

    # Read back headers, then leave the body to be read
    msg = httplib.HTTPMessage (w, 0)
    msg.fp = None
    return urllib.addinfourl (w, msg, path)
    def do_open(self, http_class, req):
        host = req.get_host()
        if not host:
            raise URLError('no host given')
        h = http_class(host, timeout=req.timeout)
        h.set_debuglevel(self._debuglevel)
        headers = dict(req.unredirected_hdrs)
        headers.update(dict(((k, v) for k, v in req.headers.items() if k not in headers)))
        headers['Connection'] = 'close'
        headers = dict(((name.title(), val) for name, val in headers.items()))
        if req._tunnel_host:
            tunnel_headers = {}
            proxy_auth_hdr = 'Proxy-Authorization'
            if proxy_auth_hdr in headers:
                tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
                del headers[proxy_auth_hdr]
            h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
        try:
            h.request(req.get_method(), req.get_selector(), req.data, headers)
            try:
                r = h.getresponse(buffering=True)
            except TypeError:
                r = h.getresponse()

        except socket.error as err:
            raise URLError(err)

        fp = socket._fileobject(RecvWrapper(r), close=True)
        resp = addinfourl(fp, r.msg, req.get_full_url())
        resp.code = r.status
        resp.msg = r.reason
        return resp
Exemple #21
0
 def open_local_file(self, req):
     import mimetypes
     import email
     host = req.get_host()
     file = req.get_selector()
     localfile = urllib.url2pathname(file)
     stats = os.stat(localfile)
     size = stats[stat.ST_SIZE]
     modified = email.Utils.formatdate(stats[stat.ST_MTIME])
     mtype = mimetypes.guess_type(file)[0]
     if host:
         host, port = urllib.splitport(host)
         if port or socket.gethostbyname(host) not in self.get_names():
             raise urllib2.URLError('file not on local host')
     fo = open(localfile,'rb')
     brange = req.headers.get('Range', None)
     brange = range_header_to_tuple(brange)
     assert brange != ()
     if brange:
         (fb, lb) = brange
         if lb == '':
             lb = size
         if fb < 0 or fb > size or lb > size:
             raise RangeError('Requested Range Not Satisfiable')
         size = (lb - fb)
         fo = RangeableFileObject(fo, (fb, lb))
     headers = email.message_from_string(
         'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' %
         (mtype or 'text/plain', size, modified))
     return urllib.addinfourl(fo, headers, 'file:'+file)
 def http_error_302(self, req, fp, code, msg, headers):
     infourl = urllib.addinfourl(fp, headers, req.get_full_url())
     infourl.status = code
     infourl.code = code
     logging.debug('NoRedirectHandler got redirect to + ' + headers['Location'])
     self.got_redirect = True
     return infourl
Exemple #23
0
    def open_http(self, url, data=None):
        """Use HTTP protocol."""
        import httplib
        user_passwd = None
        if type(url) is type(""):
            host, selector = splithost(url)
            if host:
                user_passwd, host = splituser(host)
                host = unquote(host)
            realhost = host
        else:
            host, selector = url
            urltype, rest = splittype(selector)
            url = rest
            user_passwd = None
            if string.lower(urltype) != 'http':
                realhost = None
            else:
                realhost, rest = splithost(rest)
                if realhost:
                    user_passwd, realhost = splituser(realhost)
                if user_passwd:
                    selector = "%s://%s%s" % (urltype, realhost, rest)
            #print "proxy via http:", host, selector
        if not host: raise IOError, ('http error', 'no host given')
        if user_passwd:
            import base64
            auth = string.strip(base64.encodestring(user_passwd))
        else:
            auth = None
        h = httplib.HTTP(host)
        if data is not None:
            h.putrequest('POST', selector)
            h.putheader('Content-type', 'application/x-www-form-urlencoded')
            h.putheader('Content-length', '%d' % len(data))
        else:
            h.putrequest('GET', selector)
        for cookie in self.cookies.items():
            h.putheader('Cookie', '%s=%s;' % cookie)

        if auth: h.putheader('Authorization', 'Basic %s' % auth)
        if realhost: h.putheader('Host', realhost)
        for args in self.addheaders: apply(h.putheader, args)
        h.endheaders()
        if data is not None:
            h.send(data + '\r\n')
        errcode, errmsg, headers = h.getreply()
        if headers and headers.has_key('set-cookie'):
            cookies = headers.getallmatchingheaders('set-cookie')
            for cookie in cookies: self.cookies.load(cookie)

        fp = h.getfile()
        if errcode == 200:
            return addinfourl(fp, headers, "http:" + url)
        else:
            if data is None:
                return self.http_error(url, fp, errcode, errmsg, headers)
            else:
                return self.http_error(url, fp, errcode, errmsg, headers, data)
    def retry_using_http_NTLM_auth(self, req, auth_header_field, realm, headers):
        user, pw = self.passwd.find_user_password(realm, req.get_full_url())
        if pw is not None:
            user_parts = user.split('\\', 1)
            if len(user_parts) == 1:
                UserName = user_parts[0]
                DomainName = ''
                type1_flags = ntlm.NTLM_TYPE1_FLAGS & ~ntlm.NTLM_NegotiateOemDomainSupplied
            else:
                DomainName = user_parts[0].upper()
                UserName = user_parts[1]
                type1_flags = ntlm.NTLM_TYPE1_FLAGS
            # ntlm secures a socket, so we must use the same socket for the complete handshake
            headers = dict(req.headers)
            headers.update(req.unredirected_hdrs)
            auth = 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(user, type1_flags)
            if req.headers.get(self.auth_header, None) == auth:
                return None
            headers[self.auth_header] = auth

            host = req.get_host()
            if not host:
                raise urllib2.URLError('no host given')
            h = None
            if req.get_full_url().startswith('https://'):
                h = httplib.HTTPSConnection(host) # will parse host:port
            else:
                h = httplib.HTTPConnection(host) # will parse host:port
            h.set_debuglevel(self._debuglevel)
            # we must keep the connection because NTLM authenticates the connection, not single requests
            headers["Connection"] = "Keep-Alive"
            headers = dict((name.title(), val) for name, val in headers.items())
            h.request(req.get_method(), req.get_selector(), req.data, headers)
            r = h.getresponse()
            r.begin()
            r._safe_read(int(r.getheader('content-length')))
            if r.getheader('set-cookie'):
                # this is important for some web applications that store authentication-related info in cookies (it took a long time to figure out)
                headers['Cookie'] = r.getheader('set-cookie')
            r.fp = None # remove the reference to the socket, so that it can not be closed by the response object (we want to keep the socket open)
            auth_header_value = r.getheader(auth_header_field, None)
            (ServerChallenge, NegotiateFlags) = ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value[5:])
            auth = 'NTLM %s' % ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, UserName, DomainName, pw, NegotiateFlags)
            headers[self.auth_header] = auth
            headers["Connection"] = "Close"
            headers = dict((name.title(), val) for name, val in headers.items())
            try:
                h.request(req.get_method(), req.get_selector(), req.data, headers)
                # none of the configured handlers are triggered, for example redirect-responses are not handled!
                response = h.getresponse()
                def notimplemented():
                    raise NotImplementedError
                response.readline = notimplemented
                infourl = addinfourl(response, response.msg, req.get_full_url())
                infourl.code = response.status
                infourl.msg = response.reason
                return infourl
            except socket.error, err:
                raise urllib2.URLError(err)
Exemple #25
0
    def test_returns_custom_response_class_when_declared_on_method(self, urlopen):
        resp = addinfourl(StringIO(u"mock_content"), "mock headers", url="http://www.example.com/", code="200")
        urlopen.return_value = resp

        api_stub = CustomResponseClientStub()
        response = api_stub.do_simple()
        self.assertIsInstance(response, apyclient.JSONApiResponse)
        self.assertEqual(resp, response.original_response)
Exemple #26
0
    def test_returns_custom_response_class_when_declared_on_api_class(self, urlopen):
        resp = addinfourl(StringIO(u"mock_content"), "mock headers", url="http://www.example.com/", code="200")
        urlopen.return_value = resp

        api_stub = ApiCustomResponseStub()
        response = api_stub.do_something()
        self.assertIsInstance(response, CustomResponseTwo)
        self.assertEqual(resp, response._response)
Exemple #27
0
    def resolve(self, uriRef, baseUri=None):
        """
        Takes a URI or a URI reference plus a base URI, produces a absolutized URI
        if a base URI was given, then attempts to obtain access to an entity
        representing the resource identified by the resulting URI,
        returning the entity as a stream (a file-like object).

        Raises a IriError if the URI scheme is unsupported or if a stream
        could not be obtained for any reason.
        """
        if not isinstance(uriRef, urllib.request.Request):
            if baseUri is not None:
                uri = self.absolutize(uriRef, baseUri)
                scheme = get_scheme(uri)
            else:
                uri = uriRef
                scheme = get_scheme(uriRef)
                # since we didn't use absolutize(), we need to verify here
                if scheme not in self._supported_schemes:
                    if scheme is None:
                        raise ValueError('When the URI to resolve is a relative '
                            'reference, it must be accompanied by a base URI.')
                    else:
                        raise IriError(IriError.UNSUPPORTED_SCHEME,
                                           scheme=scheme, resolver=self.__class__.__name__)
            req = urllib.request.Request(uri)
        else:
            req, uri = uriRef, uriRef.get_full_url()

        if self.authorizations and not self.authorize(uri):
            raise IriError(IriError.DENIED_BY_RULE, uri=uri)
        # Bypass urllib for opening local files.
        if scheme == 'file':
            path = uri_to_os_path(uri, attemptAbsolute=False)
            try:
                stream = open(path, 'rb')
            except IOError as e:
                raise IriError(IriError.RESOURCE_ERROR,
                                   loc='%s (%s)' % (uri, path),
                                   uri=uri, msg=str(e))
            # Add the extra metadata that urllib normally provides (sans
            # the poorly guessed Content-Type header).
            stats = os.stat(path)
            size = stats.st_size
            mtime = _formatdate(stats.st_mtime)
            headers = email.Message(io.StringIO(
                'Content-Length: %s\nLast-Modified: %s\n' % (size, mtime)))
            stream = urllib.addinfourl(stream, headers, uri)
        else:
            # urllib.request.urlopen, wrapped by us, will suffice for http, ftp,
            # data and gopher
            try:
                stream = urllib.request.urlopen(req)
            except IOError as e:
                raise IriError(IriError.RESOURCE_ERROR,
                                   uri=uri, loc=uri, msg=str(e))
        return stream
Exemple #28
0
 def redirect_request(self, req, fp, code, msg, headers, newurl):
     if 'WWW-Authenticate' in headers:
         response = urllib.addinfourl(fp, msg, req.get_full_url())
         response.code = 401
         response.msg = headers
         return self.parent.error(
             'http', req, response, 401, msg, headers)
     else:
         return urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
Exemple #29
0
    def http_error_302(self, req, fp, code, msg, headers):
        """
        Stop request when finding a redirection
        """
        infourl = urllib.addinfourl(fp, headers, req.get_full_url())
        infourl.status = code
        infourl.code = code

        return infourl
    def http_error_302(self, req, fp, code, msg, headers):

        infourl = urllib.addinfourl(fp, headers, req.get_full_url())

        infourl.status = code

        infourl.code = code

        return infourl
Exemple #31
0
class myHTTPSHandler(M2Crypto.m2urllib2.HTTPSHandler):
    handler_order = 499
    saved_session = None

    def __init__(self, *args, **kwargs):
        self.appname = kwargs.pop('appname', 'generic')
        M2Crypto.m2urllib2.HTTPSHandler.__init__(self, *args, **kwargs)

    # copied from M2Crypto.m2urllib2.HTTPSHandler
    # it's sole purpose is to use our myHTTPSHandler/myHTTPSProxyHandler class
    # ideally the m2urllib2.HTTPSHandler.https_open() method would be split into
    # "do_open()" and "https_open()" so that we just need to override
    # the small "https_open()" method...)
    def https_open(self, req):
        host = req.get_host()
        if not host:
            raise M2Crypto.m2urllib2.URLError('no host given: ' +
                                              req.get_full_url())

        # Our change: Check to see if we're using a proxy.
        # Then create an appropriate ssl-aware connection.
        full_url = req.get_full_url()
        target_host = urlparse.urlparse(full_url)[1]

        if (target_host != host):
            h = myProxyHTTPSConnection(host=host,
                                       appname=self.appname,
                                       ssl_context=self.ctx)
            # M2Crypto.ProxyHTTPSConnection.putrequest expects a fullurl
            selector = full_url
        else:
            h = myHTTPSConnection(host=host,
                                  appname=self.appname,
                                  ssl_context=self.ctx)
            selector = req.get_selector()
        # End our change
        h.set_debuglevel(self._debuglevel)
        if self.saved_session:
            h.set_session(self.saved_session)

        headers = dict(req.headers)
        headers.update(req.unredirected_hdrs)
        # We want to make an HTTP/1.1 request, but the addinfourl
        # class isn't prepared to deal with a persistent connection.
        # It will try to read all remaining data from the socket,
        # which will block while the server waits for the next request.
        # So make sure the connection gets closed after the (only)
        # request.
        headers["Connection"] = "close"
        try:
            h.request(req.get_method(), selector, req.data, headers)
            s = h.get_session()
            if s:
                self.saved_session = s
            r = h.getresponse()
        except socket.error, err:  # XXX what error?
            err.filename = full_url
            raise M2Crypto.m2urllib2.URLError(err)

        # Pick apart the HTTPResponse object to get the addinfourl
        # object initialized properly.

        # Wrap the HTTPResponse object in socket's file object adapter
        # for Windows.  That adapter calls recv(), so delegate recv()
        # to read().  This weird wrapping allows the returned object to
        # have readline() and readlines() methods.

        # XXX It might be better to extract the read buffering code
        # out of socket._fileobject() and into a base class.

        r.recv = r.read
        fp = socket._fileobject(r)

        resp = urllib.addinfourl(fp, r.msg, req.get_full_url())
        resp.code = r.status
        resp.msg = r.reason
        return resp
Exemple #32
0
                r.recv = r.read

                # no data, just have to read
                r.read()

                class fp_dummy(object):
                    def read(self):
                        return ""

                    def readline(self):
                        return ""

                    def close(self):
                        pass

                resp = addinfourl(fp_dummy(), r.msg, req.get_full_url())
                resp.code = r.status
                resp.msg = r.reason

                # Close connection when server request it.
                if fetch.connection_cache is not None:
                    if 'Connection' in r.msg and r.msg['Connection'] == 'close':
                        fetch.connection_cache.remove_connection(
                            h.host, h.port)

                return resp

        class HTTPMethodFallback(urllib2.BaseHandler):
            """
            Fallback to GET if HEAD is not allowed (405 HTTP error)
            """
Exemple #33
0
 def http_error_302(self,req,fp,code,msg,headers): infourl=urllib.addinfourl(fp,headers,req.get_full_url()); infourl.status=code; infourl.code=code; return infourl
 http_error_300=http_error_302; http_error_301=http_error_302; http_error_303=http_error_302; http_error_307=http_error_302
Exemple #34
0
    def open_http(self, url, data=None):
        """Use HTTP protocol."""
        import httplib
        user_passwd = None
        if type(url) is type(""):
            host, selector = splithost(url)
            if host:
                user_passwd, host = splituser(host)
                host = unquote(host)
            realhost = host
        else:
            host, selector = url
            urltype, rest = splittype(selector)
            url = rest
            user_passwd = None
            if string.lower(urltype) != 'http':
                realhost = None
            else:
                realhost, rest = splithost(rest)
                if realhost:
                    user_passwd, realhost = splituser(realhost)
                if user_passwd:
                    selector = "%s://%s%s" % (urltype, realhost, rest)
            #print "proxy via http:", host, selector
        if not host: raise IOError('http error', 'no host given')
        if user_passwd:
            import base64
            auth = string.strip(base64.encodestring(user_passwd))
        else:
            auth = None
        h = httplib.HTTP(host)
        if data is not None:
            h.putrequest('POST', selector)
            h.putheader('Content-type', 'application/x-www-form-urlencoded')
            h.putheader('Content-length', '%d' % len(data))
        else:
            h.putrequest('GET', selector)
        for cookie in self.cookies.items():
            h.putheader('Cookie', '%s=%s;' % cookie)

        if auth: h.putheader('Authorization', 'Basic %s' % auth)
        if realhost: h.putheader('Host', realhost)
        for args in self.addheaders:
            apply(h.putheader, args)
        h.endheaders()
        if data is not None:
            h.send(data + '\r\n')
        errcode, errmsg, headers = h.getreply()
        if headers and headers.has_key('set-cookie'):
            cookies = headers.getallmatchingheaders('set-cookie')
            for cookie in cookies:
                self.cookies.load(cookie)

        fp = h.getfile()
        if errcode == 200:
            return addinfourl(fp, headers, "http:" + url)
        else:
            if data is None:
                return self.http_error(url, fp, errcode, errmsg, headers)
            else:
                return self.http_error(url, fp, errcode, errmsg, headers, data)
Exemple #35
0
 def http_error_302(self, req, fp, code, msg, headers):
     import urllib
     infourl = urllib.addinfourl(fp, headers, headers["Location"])
     infourl.status = code
     infourl.code = code
     return infourl
Exemple #36
0
class AbstractHTTPHandler(BaseHandler):
    def __init__(self, debuglevel=0):
        self._debuglevel = debuglevel

    def set_http_debuglevel(self, level):
        self._debuglevel = level

    def do_request_(self, request):
        host = request.get_host()
        if not host:
            raise URLError('no host given')

        if request.has_data():  # POST
            data = request.get_data()
            if not request.has_header('Content-type'):
                request.add_unredirected_header(
                    'Content-type', 'application/x-www-form-urlencoded')
            if not request.has_header('Content-length'):
                request.add_unredirected_header('Content-length',
                                                '%d' % len(data))

        scheme, sel = splittype(request.get_selector())
        sel_host, sel_path = splithost(sel)
        if not request.has_header('Host'):
            request.add_unredirected_header('Host', sel_host or host)
        for name, value in self.parent.addheaders:
            name = name.capitalize()
            if not request.has_header(name):
                request.add_unredirected_header(name, value)

        return request

    def do_open(self, http_class, req):
        """Return an addinfourl object for the request, using http_class.

        http_class must implement the HTTPConnection API from httplib.
        The addinfourl return value is a file-like object.  It also
        has methods and attributes including:
            - info(): return a mimetools.Message object for the headers
            - geturl(): return the original request URL
            - code: HTTP status code
        """
        host = req.get_host()
        if not host:
            raise URLError('no host given')

        h = http_class(host)  # will parse host:port
        h.set_debuglevel(self._debuglevel)

        headers = dict(req.headers)
        headers.update(req.unredirected_hdrs)
        # We want to make an HTTP/1.1 request, but the addinfourl
        # class isn't prepared to deal with a persistent connection.
        # It will try to read all remaining data from the socket,
        # which will block while the server waits for the next request.
        # So make sure the connection gets closed after the (only)
        # request.
        headers["Connection"] = "close"
        headers = dict((name.title(), val) for name, val in headers.items())
        try:
            h.request(req.get_method(), req.get_selector(), req.data, headers)
            r = h.getresponse()
        except socket.error, err:  # XXX what error?
            raise URLError(err)

        # Pick apart the HTTPResponse object to get the addinfourl
        # object initialized properly.

        # Wrap the HTTPResponse object in socket's file object adapter
        # for Windows.  That adapter calls recv(), so delegate recv()
        # to read().  This weird wrapping allows the returned object to
        # have readline() and readlines() methods.

        # XXX It might be better to extract the read buffering code
        # out of socket._fileobject() and into a base class.

        r.recv = r.read
        fp = socket._fileobject(r, close=True)

        resp = addinfourl(fp, r.msg, req.get_full_url())
        resp.code = r.status
        resp.msg = r.reason
        return resp
 def rtmp_open(self, req):
     url = req.get_selector()
     return urllib.addinfourl(StringIO(''), req.headers, req.get_full_url())
Exemple #38
0
class FTPChunkHandler(FTPHandler):
    """The code was taken from urllib2.py.

    The only difference is that offsets are supported by this class
    using the REST-command. Offsets are needed for chunked loading.
    """
    def ftp_open(self, req):
        import mimetypes
        host = req.get_host()
        if not host:
            raise URLError('ftp error: no host given')
        host, port = splitport(host)
        if port is None:
            port = ftplib.FTP_PORT
        else:
            port = int(port)

        # username/password handling
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = unquote(user or '')
        passwd = unquote(passwd or '')

        try:
            host = socket.gethostbyname(host)
        except socket.error, msg:
            raise URLError(msg)
        path, attrs = splitattr(req.get_selector())
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        dirs, file = dirs[:-1], dirs[-1]
        if dirs and not dirs[0]:
            dirs = dirs[1:]
        try:
            fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
            type = file and 'I' or 'D'
            for attr in attrs:
                attr, value = splitvalue(attr)
                if attr.lower() == 'type' and \
                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
                    type = value.upper()

            # EDIT START
            # get REST (file offset) from headers
            rest = 0
            offset = req.headers.get('Offset', None)
            if offset is not None and offset > 0:
                rest = offset
            # EDIT END

            fp, retrlen = fw.retrfile(file, type, rest)
            headers = ""
            mtype = mimetypes.guess_type(req.get_full_url())[0]
            if mtype:
                headers += "Content-type: %s\n" % mtype
            if retrlen is not None and retrlen >= 0:
                headers += "Content-length: %d\n" % retrlen
            sf = StringIO(headers)
            headers = mimetools.Message(sf)
            return addinfourl(fp, headers, req.get_full_url())
        except ftplib.all_errors, msg:
            raise URLError, ('ftp error: %s' % msg), sys.exc_info()[2]
 else:
     raise CrawlerFatalError('Maximum retry limit reached: %r' %
                             url)
 # read the page at once (for some reason specifying read(n) causes errors.)
 buf = resp.read()
 if MAX_PAGE_LEN < len(buf):
     raise CrawlerPageError('Too long page (>%dbytes): %r' %
                            (MAX_PAGE_LEN, url))
 # interpret the encoding.
 if 'gzip' in resp.getheader('Content-Encoding', '').lower():
     fp = GzipFile(fileobj=StringIO(buf))
 else:
     fp = StringIO(buf)
 # get cookie received.
 if self.cookiejar:
     r = addinfourl(fp, resp.msg, url)
     r.code = resp.status
     self.cookiejar.extract_cookies(r, req)
 # check the result code.
 status = resp.status
 if status in (301, 302):
     url0 = urljoin(url, resp.getheader('Location', ''))
     url1 = self.accept_url(url0)
     if url1 and (url1 not in self.crawled
                  or self.crawled[url1] != 2):
         print >> stderr, 'REDIRECTED: Status=%d: %r' % (status,
                                                         url1)
         url = url1
         continue
     else:
         raise CrawlerWarning('Status=%d: Ignore redirect: %r' %
Exemple #40
0
def mock_response(url, response_text, headers=DEFAULT_HEADERS):
    all_headers = headers + [("Content-length", len(response_text))]
    headers_obj = _headers(all_headers)
    return addinfourl(StringIO(response_text), headers_obj, url)
Exemple #41
0
class newHTTPHandler(urllib2.BaseHandler):
    def http_open(self, req):
        return self.do_open(httplib.HTTP, req)

    def do_open(self, http_class, req):
        data = req.get_data()
        v_files = []
        v_vars = []
        # mapping object (dict)
        if req.has_data() and type(data) != str:
            if hasattr(data, 'items'):
                data = data.items()
            else:
                try:
                    if len(data) and not isinstance(data[0], tuple):
                        raise TypeError
                except TypeError:
                    ty, va, tb = sys.exc_info()
                    raise TypeError, "not a valid non-string sequence or mapping object", tb
            for (k, v) in data:
                if hasattr(v, 'read'):
                    v_files.append((k, v))
                else:
                    v_vars.append((k, v))
        # no file ? convert to string
        if len(v_vars) > 0 and len(v_files) == 0:
            data = urllib.urlencode(v_vars)
            v_files = []
            v_vars = []
        host = req.get_host()
        if not host:
            raise urllib2.URLError('no host given')
        h = http_class(host)  # will parse host:port
        if req.has_data():
            h.putrequest('POST', req.get_selector())
            if not 'Content-type' in req.headers:
                if len(v_files) > 0:
                    boundary = mimetools.choose_boundary()
                    l = send_data(v_vars, v_files, boundary)
                    h.putheader('Content-Type',
                                'multipart/form-data; boundary=%s' % boundary)
                    h.putheader('Content-length', str(l))
                else:
                    h.putheader('Content-type',
                                'application/x-www-form-urlencoded')
                    if not 'Content-length' in req.headers:
                        h.putheader('Content-length', '%d' % len(data))
        else:
            h.putrequest('GET', req.get_selector())

        scheme, sel = urllib.splittype(req.get_selector())
        sel_host, sel_path = urllib.splithost(sel)
        h.putheader('Host', sel_host or host)
        for name, value in self.parent.addheaders:
            name = name.capitalize()
            if name not in req.headers:
                h.putheader(name, value)
        for k, v in req.headers.items():
            h.putheader(k, v)
        # httplib will attempt to connect() here.  be prepared
        # to convert a socket error to a URLError.
        try:
            h.endheaders()
        except socket.error, err:
            raise urllib2.URLError(err)

        if req.has_data():
            if len(v_files) > 0:
                l = send_data(v_vars, v_files, boundary, h)
            elif len(v_vars) > 0:
                # if data is passed as dict ...
                data = urllib.urlencode(v_vars)
                h.send(data)
            else:
                # "normal" urllib2.urlopen()
                h.send(data)

        code, msg, hdrs = h.getreply()
        fp = h.getfile()
        if code == 200:
            resp = urllib.addinfourl(fp, hdrs, req.get_full_url())
            resp.code = code
            resp.msg = msg
            return resp
        else:
            return self.parent.error('http', req, fp, code, msg, hdrs)
class http2handler(urllib2.HTTPHandler, urllib2.HTTPSHandler):
    def __init__(self, ui, pwmgr):
        global _configuredlogging
        urllib2.AbstractHTTPHandler.__init__(self)
        self.ui = ui
        self.pwmgr = pwmgr
        self._connections = {}
        loglevel = ui.config('ui', 'http2debuglevel', default=None)
        if loglevel and not _configuredlogging:
            _configuredlogging = True
            logger = logging.getLogger('mercurial.httpclient')
            logger.setLevel(getattr(logging, loglevel.upper()))
            handler = logging.StreamHandler()
            handler.setFormatter(logging.Formatter(LOGFMT))
            logger.addHandler(handler)

    def close_all(self):
        """Close and remove all connection objects being kept for reuse."""
        for openconns in self._connections.values():
            for conn in openconns:
                conn.close()
        self._connections = {}

    # shamelessly borrowed from urllib2.AbstractHTTPHandler
    def do_open(self, http_class, req, use_ssl):
        """Return an addinfourl object for the request, using http_class.

        http_class must implement the HTTPConnection API from httplib.
        The addinfourl return value is a file-like object.  It also
        has methods and attributes including:
            - info(): return a mimetools.Message object for the headers
            - geturl(): return the original request URL
            - code: HTTP status code
        """
        # If using a proxy, the host returned by get_host() is
        # actually the proxy. On Python 2.6.1, the real destination
        # hostname is encoded in the URI in the urllib2 request
        # object. On Python 2.6.5, it's stored in the _tunnel_host
        # attribute which has no accessor.
        tunhost = getattr(req, '_tunnel_host', None)
        host = req.get_host()
        if tunhost:
            proxyhost = host
            host = tunhost
        elif req.has_proxy():
            proxyhost = req.get_host()
            host = req.get_selector().split('://', 1)[1].split('/', 1)[0]
        else:
            proxyhost = None

        if proxyhost:
            if ':' in proxyhost:
                # Note: this means we'll explode if we try and use an
                # IPv6 http proxy. This isn't a regression, so we
                # won't worry about it for now.
                proxyhost, proxyport = proxyhost.rsplit(':', 1)
            else:
                proxyport = 3128  # squid default
            proxy = (proxyhost, proxyport)
        else:
            proxy = None

        if not host:
            raise urllib2.URLError('no host given')

        connkey = use_ssl, host, proxy
        allconns = self._connections.get(connkey, [])
        conns = [c for c in allconns if not c.busy()]
        if conns:
            h = conns[0]
        else:
            if allconns:
                self.ui.debug('all connections for %s busy, making a new '
                              'one\n' % host)
            timeout = None
            if req.timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
                timeout = req.timeout
            h = http_class(host, timeout=timeout, proxy_hostport=proxy)
            self._connections.setdefault(connkey, []).append(h)

        headers = dict(req.headers)
        headers.update(req.unredirected_hdrs)
        headers = dict((name.title(), val) for name, val in headers.items())
        try:
            path = req.get_selector()
            if '://' in path:
                path = path.split('://', 1)[1].split('/', 1)[1]
            if path[0] != '/':
                path = '/' + path
            h.request(req.get_method(), path, req.data, headers)
            r = h.getresponse()
        except socket.error, err:  # XXX what error?
            raise urllib2.URLError(err)

        # Pick apart the HTTPResponse object to get the addinfourl
        # object initialized properly.
        r.recv = r.read

        resp = urllib.addinfourl(r, r.headers, req.get_full_url())
        resp.code = r.status
        resp.msg = r.reason
        return resp
Exemple #43
0
 def get_response(self):
     """Returns a copy of the current response."""
     return urllib.addinfourl(StringIO(self.data), self._response.info(),
                              self._response.geturl())
Exemple #44
0
 def http_error_300(self, req, fp, code, msg, header_list):
     data = urllib.addinfourl(fp, header_list, req.get_full_url())
     data.status = code
     data.code = code
     return data
Exemple #45
0
 def http_error_default(self, url, fp, errcode, errmsg, headers):
     return urllib.addinfourl(fp, [headers, errcode], "http:" + url)
Exemple #46
0
    def smb_open(self, req):
        global USE_NTLM, MACHINE_NAME

        host = req.get_host()
        if not host:
            raise urllib2.URLError('SMB error: no host given')
        host, port = splitport(host)
        if port is None:
            port = 139
        else:
            port = int(port)

        # username/password handling
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = user or ''
        passwd = passwd or ''
        myname = MACHINE_NAME or self.generateClientMachineName()

        n = NetBIOS()
        names = n.queryIPForName(host)
        if names:
            server_name = names[0]
        else:
            raise urllib2.URLError(
                'SMB error: Hostname does not reply back with its machine name'
            )

        path, attrs = splitattr(req.get_selector())
        if path.startswith('/'):
            path = path[1:]
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        service, path = dirs[0], '/'.join(dirs[1:])

        try:
            conn = SMBConnection(user,
                                 passwd,
                                 myname,
                                 server_name,
                                 use_ntlm_v2=USE_NTLM)
            conn.connect(host, port)

            if req.has_data():
                data_fp = req.get_data()
                filelen = conn.storeFile(service, path, data_fp)

                headers = "Content-length: 0\n"
                fp = StringIO("")
            else:
                fp = self.createTempFile()
                file_attrs, retrlen = conn.retrieveFile(service, path, fp)
                fp.seek(0)

                headers = ""
                mtype = mimetypes.guess_type(req.get_full_url())[0]
                if mtype:
                    headers += "Content-type: %s\n" % mtype
                if retrlen is not None and retrlen >= 0:
                    headers += "Content-length: %d\n" % retrlen

            sf = StringIO(headers)
            headers = mimetools.Message(sf)

            return addinfourl(fp, headers, req.get_full_url())
        except Exception, ex:
            raise urllib2.URLError, ('smb error: %s' % ex), sys.exc_info()[2]
Exemple #47
0
 def http_error_416(self, req, fp, code, msg, hdrs):
     # HTTP's Range Not Satisfiable error
     r = urllib.addinfourl(fp, hdrs, req.get_full_url())
     r.code = code
     r.msg = msg
     return r
Exemple #48
0
 def _mocked(url, *args, **kwargs):
     response_data, headers = response_method(url)
     return addinfourl(WhateverIO(response_data), headers, url)
Exemple #49
0
    def retry_using_http_NTLM_auth(self, req, auth_header_field, realm,
                                   headers):
        user, pw = self.passwd.find_user_password(realm, req.get_full_url())
        if pw is not None:
            user_parts = user.split('\\', 1)
            if len(user_parts) == 1:
                UserName = user_parts[0]
                DomainName = ''
                type1_flags = ntlm.NTLM_TYPE1_FLAGS & ~ntlm.NTLM_NegotiateOemDomainSupplied
            else:
                DomainName = user_parts[0].upper()
                UserName = user_parts[1]
                type1_flags = ntlm.NTLM_TYPE1_FLAGS
                # ntlm secures a socket, so we must use the same socket for the complete handshake
            headers = dict(req.headers)
            headers.update(req.unredirected_hdrs)
            auth = 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(
                user, type1_flags)
            if req.headers.get(self.auth_header, None) == auth:
                return None
            headers[self.auth_header] = auth

            host = req.host

            if not host:
                raise urllib2.URLError('no host given')

            h = None

            if req.get_full_url().startswith('https://'):
                h = HTTPSConnection(host)  # will parse host:port
            else:
                h = HTTPConnection(host)  # will parse host:port

            h.set_debuglevel(self._debuglevel)

            # we must keep the connection because NTLM authenticates the connection, not single requests
            headers["Connection"] = "Keep-Alive"
            headers = dict(
                (name.title(), val) for name, val in headers.items())

            # For some reason, six doesn't do this translation correctly
            # TODO rsanders low - find bug in six & fix it
            try:
                selector = req.selector
            except AttributeError:
                selector = req.get_selector()

            h.request(req.get_method(), selector, req.data, headers)

            r = h.getresponse()

            r.begin()

            r._safe_read(int(r.getheader('content-length')))
            if r.getheader('set-cookie'):
                # this is important for some web applications that store authentication-related info in cookies (it took a long time to figure out)
                headers['Cookie'] = r.getheader('set-cookie')
            r.fp = None  # remove the reference to the socket, so that it can not be closed by the response object (we want to keep the socket open)
            auth_header_value = r.getheader(auth_header_field, None)

            # some Exchange servers send two WWW-Authenticate headers, one with the NTLM challenge
            # and another with the 'Negotiate' keyword - make sure we operate on the right one
            m = re.match('(NTLM [A-Za-z0-9+\-/=]+)', auth_header_value)
            if m:
                auth_header_value, = m.groups()

            (ServerChallenge,
             NegotiateFlags) = ntlm.parse_NTLM_CHALLENGE_MESSAGE(
                 auth_header_value[5:])
            auth = 'NTLM %s' % ntlm.create_NTLM_AUTHENTICATE_MESSAGE(
                ServerChallenge, UserName, DomainName, pw, NegotiateFlags)
            headers[self.auth_header] = auth
            headers["Connection"] = "Close"
            headers = dict(
                (name.title(), val) for name, val in headers.items())
            try:
                h.request(req.get_method(), selector, req.data, headers)
                # none of the configured handlers are triggered, for example redirect-responses are not handled!
                response = h.getresponse()

                def notimplemented():
                    raise NotImplementedError

                response.readline = notimplemented
                infourl = urllib.addinfourl(response, response.msg,
                                            req.get_full_url())
                infourl.code = response.status
                infourl.msg = response.reason
                return infourl
            except socket.error as err:
                raise urllib2.URLError(err)
        else:
            return None
Exemple #50
0
class FTPRangeHandler(urllib2.FTPHandler):
    def ftp_open(self, req):
        host = req.get_host()
        if not host:
            raise IOError, ('ftp error', 'no host given')
        host, port = splitport(host)
        if port is None:
            port = ftplib.FTP_PORT
        else:
            port = int(port)

        # username/password handling
        user, host = splituser(host)
        if user:
            user, passwd = splitpasswd(user)
        else:
            passwd = None
        host = unquote(host)
        user = unquote(user or '')
        passwd = unquote(passwd or '')
        
        try:
            host = socket.gethostbyname(host)
        except socket.error, msg:
            raise urllib2.URLError(msg)
        path, attrs = splitattr(req.get_selector())
        dirs = path.split('/')
        dirs = map(unquote, dirs)
        dirs, file = dirs[:-1], dirs[-1]
        if dirs and not dirs[0]:
            dirs = dirs[1:]
        try:
            fw = self.connect_ftp(user, passwd, host, port, dirs)
            type = file and 'I' or 'D'
            for attr in attrs:
                attr, value = splitattr(attr)
                if attr.lower() == 'type' and \
                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
                    type = value.upper()
            
            # -- range support modifications start here
            rest = None
            range_tup = range_header_to_tuple(req.headers.get('Range',None))    
            assert range_tup != ()
            if range_tup:
                (fb,lb) = range_tup
                if fb > 0: rest = fb
            # -- range support modifications end here
            
            fp, retrlen = fw.retrfile(file, type, rest)
            
            # -- range support modifications start here
            if range_tup:
                (fb,lb) = range_tup
                if lb == '': 
                    if retrlen is None or retrlen == 0:
                        raise RangeError(9, 'Requested Range Not Satisfiable due to unobtainable file length.')
                    lb = retrlen
                    retrlen = lb - fb
                    if retrlen < 0:
                        # beginning of range is larger than file
                        raise RangeError(9, 'Requested Range Not Satisfiable')
                else:
                    retrlen = lb - fb
                    fp = RangeableFileObject(fp, (0,retrlen))
            # -- range support modifications end here
            
            headers = ""
            mtype = mimetypes.guess_type(req.get_full_url())[0]
            if mtype:
                headers += "Content-Type: %s\n" % mtype
            if retrlen is not None and retrlen >= 0:
                headers += "Content-Length: %d\n" % retrlen
            sf = StringIO(headers)
            headers = mimetools.Message(sf)
            return addinfourl(fp, headers, req.get_full_url())
        except ftplib.all_errors, msg:
            raise IOError, ('ftp error', msg), sys.exc_info()[2]
 def http_error_302(self, req, fp, code, msg, headers):
     infourl = urllib.addinfourl(fp, headers, req.get_full_url())
     infourl.status = code
     infourl.code = code
     return infourl
Exemple #52
0
 def http_error_206(self, req, fp, code, msg, hdrs):
     # 206 Partial Content Response
     r = urllib.addinfourl(fp, hdrs, req.get_full_url())
     r.code = code
     r.msg = msg
     return r
Exemple #53
0
    def do_open(self, http_class, req):
        # Large portions from Python 3.3 Lib/urllib/request.py and
        # Python 2.6 Lib/urllib2.py

        if sys.version_info >= (3,):
            host = req.host
        else:
            host = req.get_host()

        if not host:
            raise URLError('no host given')

        if self.connection and self.connection.host != host:
            self.close()

        # Re-use the connection if possible
        self.use_count += 1
        if not self.connection:
            h = http_class(host, timeout=req.timeout)
        else:
            h = self.connection
            if self._debuglevel == 5:
                console_write(
                    u'''
                    Urllib %s Debug General
                      Re-using connection to %s on port %s for request #%s
                    ''',
                    (h._debug_protocol, h.host, h.port, self.use_count)
                )

        if sys.version_info >= (3,):
            headers = dict(req.unredirected_hdrs)
            headers.update(dict((k, v) for k, v in req.headers.items()
                                if k not in headers))
            headers = dict((name.title(), val) for name, val in headers.items())

        else:
            h.set_debuglevel(self._debuglevel)

            headers = dict(req.headers)
            headers.update(req.unredirected_hdrs)
            headers = dict(
                (name.title(), val) for name, val in headers.items())

        if req._tunnel_host and not self.connection:
            tunnel_headers = {}
            proxy_auth_hdr = "Proxy-Authorization"
            if proxy_auth_hdr in headers:
                tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
                del headers[proxy_auth_hdr]

            if sys.version_info >= (3,):
                h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
            else:
                h._set_tunnel(req._tunnel_host, headers=tunnel_headers)

        try:
            if sys.version_info >= (3,):
                h.request(req.get_method(), req.selector, req.data, headers)
            else:
                h.request(req.get_method(), req.get_selector(), req.data, headers)
        except socket.error as err:  # timeout error
            h.close()
            raise URLError(err)
        else:
            r = h.getresponse()

        # Keep the connection around for re-use
        if r.is_keep_alive():
            self.connection = h
        else:
            if self._debuglevel == 5:
                s = '' if self.use_count == 1 else 's'
                console_write(
                    u'''
                    Urllib %s Debug General
                      Closing connection to %s on port %s after %s request%s
                    ''',
                    (h._debug_protocol, h.host, h.port, self.use_count, s)
                )
            self.use_count = 0
            self.connection = None

        if sys.version_info >= (3,):
            r.url = req.get_full_url()
            r.msg = r.reason
            return r

        r.recv = r.read
        fp = socket._fileobject(r, close=True)

        resp = addinfourl(fp, r.msg, req.get_full_url())
        resp.code = r.status
        resp.msg = r.reason
        return resp
Exemple #54
0
 def http_error_206(self, request, fp, errcode, msg, headers):
     return urllib.addinfourl(fp, headers, request.get_full_url())
Exemple #55
0
    def resolve(self, uriRef, baseUri=None):
        """
        Takes a URI or a URI reference plus a base URI, produces a absolutized URI
        if a base URI was given, then attempts to obtain access to an entity
        representing the resource identified by the resulting URI,
        returning the entity as a stream (a file-like object).

        Raises a IriError if the URI scheme is unsupported or if a stream
        could not be obtained for any reason.
        """
        if not isinstance(uriRef, urllib.request.Request):
            if baseUri is not None:
                uri = self.absolutize(uriRef, baseUri)
                scheme = get_scheme(uri)
            else:
                uri = uriRef
                scheme = get_scheme(uriRef)
                # since we didn't use absolutize(), we need to verify here
                if scheme not in self._supported_schemes:
                    if scheme is None:
                        raise ValueError(
                            'When the URI to resolve is a relative '
                            'reference, it must be accompanied by a base URI.')
                    else:
                        raise IriError(IriError.UNSUPPORTED_SCHEME,
                                       scheme=scheme,
                                       resolver=self.__class__.__name__)
            req = urllib.request.Request(uri)
        else:
            req, uri = uriRef, uriRef.get_full_url()

        if self.authorizations and not self.authorize(uri):
            raise IriError(IriError.DENIED_BY_RULE, uri=uri)
        # Bypass urllib for opening local files.
        if scheme == 'file':
            path = uri_to_os_path(uri, attemptAbsolute=False)
            try:
                stream = open(path, 'rb')
            except IOError as e:
                raise IriError(IriError.RESOURCE_ERROR,
                               loc='%s (%s)' % (uri, path),
                               uri=uri,
                               msg=str(e))
            # Add the extra metadata that urllib normally provides (sans
            # the poorly guessed Content-Type header).
            stats = os.stat(path)
            size = stats.st_size
            mtime = _formatdate(stats.st_mtime)
            headers = email.Message(
                io.StringIO('Content-Length: %s\nLast-Modified: %s\n' %
                            (size, mtime)))
            stream = urllib.addinfourl(stream, headers, uri)
        else:
            # urllib.request.urlopen, wrapped by us, will suffice for http, ftp,
            # data and gopher
            try:
                stream = urllib.request.urlopen(req)
            except IOError as e:
                raise IriError(IriError.RESOURCE_ERROR,
                               uri=uri,
                               loc=uri,
                               msg=str(e))
        return stream
Exemple #56
0
 def add_response(self, url, status_code, headers, body=None):
     response = addinfourl(StringIO(body or ''), headers, url, status_code)
     responses = list(self.http_mock.side_effect)
     responses.append(response)
     self.http_mock.side_effect = responses
Exemple #57
0
    def retry_using_http_NTLM_auth(self, req, auth_header_field, realm,
                                   headers):
        user, pw = self.passwd.find_user_password(realm, req.get_full_url())
        if pw is not None:
            # ntlm secures a socket, so we must use the same socket for the complete handshake
            headers = dict(req.headers)
            headers.update(req.unredirected_hdrs)
            auth = 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(user)
            if req.headers.get(self.auth_header, None) == auth:
                return None
            headers[self.auth_header] = auth

            host = req.get_host()
            if not host:
                raise urllib2.URLError('no host given')
            h = None
            if req.get_full_url().startswith('https://'):
                h = httplib.HTTPSConnection(host)  # will parse host:port
            else:
                h = httplib.HTTPConnection(host)  # will parse host:port
            h.set_debuglevel(self._debuglevel)
            # we must keep the connection because NTLM authenticates the connection, not single requests
            headers["Connection"] = "Keep-Alive"
            headers = dict(
                (name.title(), val) for name, val in headers.items())
            h.request(req.get_method(), req.get_selector(), req.data, headers)
            r = h.getresponse()
            r.begin()
            r._safe_read(int(r.getheader('content-length')))
            if r.getheader('set-cookie'):
                # this is important for some web applications that store authentication-related info in cookies (it took a long time to figure out)
                headers['Cookie'] = r.getheader('set-cookie')
            r.fp = None  # remove the reference to the socket, so that it can not be closed by the response object (we want to keep the socket open)
            auth_header_value = r.getheader(auth_header_field, None)
            (ServerChallenge,
             NegotiateFlags) = ntlm.parse_NTLM_CHALLENGE_MESSAGE(
                 auth_header_value[5:])
            user_parts = user.split('\\', 1)
            DomainName = user_parts[0].upper()
            UserName = user_parts[1]
            auth = 'NTLM %s' % ntlm.create_NTLM_AUTHENTICATE_MESSAGE(
                ServerChallenge, UserName, DomainName, pw, NegotiateFlags)
            headers[self.auth_header] = auth
            headers["Connection"] = "Close"
            headers = dict(
                (name.title(), val) for name, val in headers.items())
            try:
                h.request(req.get_method(), req.get_selector(), req.data,
                          headers)
                # none of the configured handlers are triggered, for example redirect-responses are not handled!
                response = h.getresponse()

                def notimplemented():
                    raise NotImplementedError

                response.readline = notimplemented
                infourl = addinfourl(response, response.msg,
                                     req.get_full_url())
                infourl.code = response.status
                infourl.msg = response.reason
                return infourl
            except socket.error, err:
                raise urllib2.URLError(err)
Exemple #58
0
 def http_error_206(self, url, fp, errcode, errmsg, headers, data=None):
     # The next line is taken from urllib's URLopener.open_http
     # method, at the end after the line "if errcode == 200:"
     return urllib.addinfourl(fp, headers, 'http:' + url)
Exemple #59
0
 def http_error_default(self, req, fp, code, msg, hdrs):
     infourl = addinfourl(fp, hdrs, req.get_full_url())
     infourl.status = code
     infourl.code = code
     return infourl
Exemple #60
0
"""An extensible library for opening URLs using a variety of protocols