def _mech_open(self, url, data=None, update_history=True, visit=None,
                   timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
        try:
            url.get_full_url
        except AttributeError:
            # string URL -- convert to absolute URL if required
            scheme, authority = _rfc3986.urlsplit(url)[:2]
            if scheme is None:
                # relative URL
                if self._response is None:
                    raise BrowserStateError(
                        "can't fetch relative reference: "
                        "not viewing any document")
                url = _rfc3986.urljoin(self._response.geturl(), url)

        request = self._request(url, data, visit, timeout)
        visit = request.visit
        if visit is None:
            visit = True

        if visit:
            self._visit_request(request, update_history)

        success = True
        try:
            response = UserAgentBase.open(self, request, data)
        except urllib2.HTTPError, error:
            success = False
            if error.fp is None:  # not a response
                raise
            response = error
Example #2
0
    def _mech_open(self,
                   url,
                   data=None,
                   update_history=True,
                   visit=None,
                   timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
        try:
            url.get_full_url
        except AttributeError:
            # string URL -- convert to absolute URL if required
            scheme, authority = _rfc3986.urlsplit(url)[:2]
            if scheme is None:
                # relative URL
                if self._response is None:
                    raise BrowserStateError("can't fetch relative reference: "
                                            "not viewing any document")
                url = _rfc3986.urljoin(self._response.geturl(), url)

        request = self._request(url, data, visit, timeout)
        visit = request.visit
        if visit is None:
            visit = True

        if visit:
            self._visit_request(request, update_history)

        success = True
        try:
            response = UserAgentBase.open(self, request, data)
        except urllib2.HTTPError, error:
            success = False
            if error.fp is None:  # not a response
                raise
            response = error
Example #3
0
    def http_error_302(self, req, fp, code, msg, headers):
        # Some servers (incorrectly) return multiple Location headers
        # (so probably same goes for URI).  Use first header.
        if "location" in headers:
            newurl = headers.getheaders("location")[0]
        elif "uri" in headers:
            newurl = headers.getheaders("uri")[0]
        else:
            return
        newurl = _rfc3986.clean_url(newurl, "latin-1")
        newurl = _rfc3986.urljoin(req.get_full_url(), newurl)

        # XXX Probably want to forget about the state of the current
        # request, although that might interact poorly with other
        # handlers that also use handler-specific request attributes
        new = self.redirect_request(newurl, req, fp, code, msg, headers)
        if new is None:
            return

        # loop detection
        # .redirect_dict has a key url if url was previously visited.
        if hasattr(req, "redirect_dict"):
            visited = new.redirect_dict = req.redirect_dict
            if visited.get(newurl, 0) >= self.max_repeats or len(visited) >= self.max_redirections:
                raise HTTPError(req.get_full_url(), code, self.inf_msg + msg, headers, fp)
        else:
            visited = new.redirect_dict = req.redirect_dict = {}
        visited[newurl] = visited.get(newurl, 0) + 1

        # Don't close the fp until we are sure that we won't use it
        # with HTTPError.
        fp.read()
        fp.close()

        return self.parent.open(new)
Example #4
0
    def _mech_open(self,
                   url,
                   data=None,
                   update_history=True,
                   visit=None,
                   timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
        try:
            url.get_full_url
        except AttributeError:
            # string URL -- convert to absolute URL if required
            scheme, authority = _rfc3986.urlsplit(url)[:2]
            if scheme is None:
                # relative URL
                if self._response is None:
                    raise BrowserStateError("can't fetch relative reference: "
                                            "not viewing any document")
                url = _rfc3986.urljoin(self._response.geturl(), url)

        request = self._request(url, data, visit, timeout)
        visit = request.visit
        if visit is None:
            visit = True

        if visit:
            self._visit_request(request, update_history)

        success = True
        try:
            response = UserAgentBase.open(self, request, data)
        except urllib2.HTTPError as error:
            success = False
            if error.fp is None:  # not a response
                raise
            response = error
# except (IOError, socket.error, OSError), error:
# Yes, urllib2 really does raise all these :-((
# See test_urllib2.py for examples of socket.gaierror and OSError,
# plus note that FTPHandler raises IOError.
# XXX I don't seem to have an example of exactly socket.error being
# raised, only socket.gaierror...
# I don't want to start fixing these here, though, since this is a
# subclass of OpenerDirector, and it would break old code.  Even in
# Python core, a fix would need some backwards-compat. hack to be
# acceptable.
# raise

        if visit:
            self._set_response(response, False)
            response = copy.copy(self._response)
        elif response is not None:
            response = _response.upgrade_response(response)

        if not success:
            raise response
        return response
Example #5
0
    def http_error_302(self, req, fp, code, msg, headers):
        # Some servers (incorrectly) return multiple Location headers
        # (so probably same goes for URI).  Use first header.
        if headers.has_key('location'):
            newurl = headers.getheaders('location')[0]
        elif headers.has_key('uri'):
            newurl = headers.getheaders('uri')[0]
        else:
            return
        newurl = _rfc3986.clean_url(newurl, "latin-1")
        newurl = _rfc3986.urljoin(req.get_full_url(), newurl)

        # XXX Probably want to forget about the state of the current
        # request, although that might interact poorly with other
        # handlers that also use handler-specific request attributes
        new = self.redirect_request(newurl, req, fp, code, msg, headers)
        if new is None:
            return

        # loop detection
        # .redirect_dict has a key url if url was previously visited.
        if hasattr(req, 'redirect_dict'):
            visited = new.redirect_dict = req.redirect_dict
            if (visited.get(newurl, 0) >= self.max_repeats or
                len(visited) >= self.max_redirections):
                raise HTTPError(req.get_full_url(), code,
                                self.inf_msg + msg, headers, fp)
        else:
            visited = new.redirect_dict = req.redirect_dict = {}
        visited[newurl] = visited.get(newurl, 0) + 1

        # Don't close the fp until we are sure that we won't use it
        # with HTTPError.  
        fp.read()
        fp.close()

        return self.parent.open(new)
Example #6
0
 def __init__(self, base_url, url, text, tag, attrs):
     assert None not in [url, tag, attrs]
     self.base_url = base_url
     self.absolute_url = _rfc3986.urljoin(base_url, url)
     self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
Example #7
0
 def __init__(self, base_url, url, text, tag, attrs):
     assert None not in [url, tag, attrs]
     self.base_url = base_url
     self.absolute_url = _rfc3986.urljoin(base_url, url)
     self.url, self.text, self.tag, self.attrs = url, text, tag, attrs