def _mech_open(self, url, data=None, update_history=True, visit=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): try: url.get_full_url except AttributeError: # string URL -- convert to absolute URL if required scheme, authority = _rfc3986.urlsplit(url)[:2] if scheme is None: # relative URL if self._response is None: raise BrowserStateError( "can't fetch relative reference: " "not viewing any document") url = _rfc3986.urljoin(self._response.geturl(), url) request = self._request(url, data, visit, timeout) visit = request.visit if visit is None: visit = True if visit: self._visit_request(request, update_history) success = True try: response = UserAgentBase.open(self, request, data) except urllib2.HTTPError, error: success = False if error.fp is None: # not a response raise response = error
def _mech_open(self, url, data=None, update_history=True, visit=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): try: url.get_full_url except AttributeError: # string URL -- convert to absolute URL if required scheme, authority = _rfc3986.urlsplit(url)[:2] if scheme is None: # relative URL if self._response is None: raise BrowserStateError("can't fetch relative reference: " "not viewing any document") url = _rfc3986.urljoin(self._response.geturl(), url) request = self._request(url, data, visit, timeout) visit = request.visit if visit is None: visit = True if visit: self._visit_request(request, update_history) success = True try: response = UserAgentBase.open(self, request, data) except urllib2.HTTPError, error: success = False if error.fp is None: # not a response raise response = error
def http_error_302(self, req, fp, code, msg, headers): # Some servers (incorrectly) return multiple Location headers # (so probably same goes for URI). Use first header. if "location" in headers: newurl = headers.getheaders("location")[0] elif "uri" in headers: newurl = headers.getheaders("uri")[0] else: return newurl = _rfc3986.clean_url(newurl, "latin-1") newurl = _rfc3986.urljoin(req.get_full_url(), newurl) # XXX Probably want to forget about the state of the current # request, although that might interact poorly with other # handlers that also use handler-specific request attributes new = self.redirect_request(newurl, req, fp, code, msg, headers) if new is None: return # loop detection # .redirect_dict has a key url if url was previously visited. if hasattr(req, "redirect_dict"): visited = new.redirect_dict = req.redirect_dict if visited.get(newurl, 0) >= self.max_repeats or len(visited) >= self.max_redirections: raise HTTPError(req.get_full_url(), code, self.inf_msg + msg, headers, fp) else: visited = new.redirect_dict = req.redirect_dict = {} visited[newurl] = visited.get(newurl, 0) + 1 # Don't close the fp until we are sure that we won't use it # with HTTPError. fp.read() fp.close() return self.parent.open(new)
def _mech_open(self, url, data=None, update_history=True, visit=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT): try: url.get_full_url except AttributeError: # string URL -- convert to absolute URL if required scheme, authority = _rfc3986.urlsplit(url)[:2] if scheme is None: # relative URL if self._response is None: raise BrowserStateError("can't fetch relative reference: " "not viewing any document") url = _rfc3986.urljoin(self._response.geturl(), url) request = self._request(url, data, visit, timeout) visit = request.visit if visit is None: visit = True if visit: self._visit_request(request, update_history) success = True try: response = UserAgentBase.open(self, request, data) except urllib2.HTTPError as error: success = False if error.fp is None: # not a response raise response = error # except (IOError, socket.error, OSError), error: # Yes, urllib2 really does raise all these :-(( # See test_urllib2.py for examples of socket.gaierror and OSError, # plus note that FTPHandler raises IOError. # XXX I don't seem to have an example of exactly socket.error being # raised, only socket.gaierror... # I don't want to start fixing these here, though, since this is a # subclass of OpenerDirector, and it would break old code. Even in # Python core, a fix would need some backwards-compat. hack to be # acceptable. # raise if visit: self._set_response(response, False) response = copy.copy(self._response) elif response is not None: response = _response.upgrade_response(response) if not success: raise response return response
def http_error_302(self, req, fp, code, msg, headers): # Some servers (incorrectly) return multiple Location headers # (so probably same goes for URI). Use first header. if headers.has_key('location'): newurl = headers.getheaders('location')[0] elif headers.has_key('uri'): newurl = headers.getheaders('uri')[0] else: return newurl = _rfc3986.clean_url(newurl, "latin-1") newurl = _rfc3986.urljoin(req.get_full_url(), newurl) # XXX Probably want to forget about the state of the current # request, although that might interact poorly with other # handlers that also use handler-specific request attributes new = self.redirect_request(newurl, req, fp, code, msg, headers) if new is None: return # loop detection # .redirect_dict has a key url if url was previously visited. if hasattr(req, 'redirect_dict'): visited = new.redirect_dict = req.redirect_dict if (visited.get(newurl, 0) >= self.max_repeats or len(visited) >= self.max_redirections): raise HTTPError(req.get_full_url(), code, self.inf_msg + msg, headers, fp) else: visited = new.redirect_dict = req.redirect_dict = {} visited[newurl] = visited.get(newurl, 0) + 1 # Don't close the fp until we are sure that we won't use it # with HTTPError. fp.read() fp.close() return self.parent.open(new)
def __init__(self, base_url, url, text, tag, attrs): assert None not in [url, tag, attrs] self.base_url = base_url self.absolute_url = _rfc3986.urljoin(base_url, url) self.url, self.text, self.tag, self.attrs = url, text, tag, attrs