예제 #1
0
        def http_response(self, request, response):
            code, msg, hdrs = response.code, response.msg, response.info()

            if code == 200 and hdrs.has_key("refresh"):
                refresh = getheaders(hdrs, "refresh")[0]
                ii = string.find(refresh, ";")
                if ii != -1:
                    pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
                    jj = string.find(newurl_spec, "=")
                    if jj != -1:
                        key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
                    if key.strip().lower() != "url":
                        debug("bad Refresh header: %r" % refresh)
                        return response
                else:
                    pause, newurl = float(refresh), response.geturl()
                if (self.max_time is None) or (pause <= self.max_time):
                    if pause > 1E-3 and self.honor_time:
                        time.sleep(pause)
                    hdrs["location"] = newurl
                    # hardcoded http is NOT a bug
                    response = self.parent.error(
                        "http", request, response,
                        "refresh", msg, hdrs)

            return response
예제 #2
0
        def http_response(self, request, response):
            code, msg, hdrs = response.code, response.msg, response.info()

            if code == 200 and hdrs.has_key("refresh"):
                refresh = getheaders(hdrs, "refresh")[0]
                ii = string.find(refresh, ";")
                if ii != -1:
                    pause, newurl_spec = float(refresh[:ii]), refresh[ii + 1:]
                    jj = string.find(newurl_spec, "=")
                    if jj != -1:
                        key, newurl = newurl_spec[:jj], newurl_spec[jj + 1:]
                    if key.strip().lower() != "url":
                        debug("bad Refresh header: %r" % refresh)
                        return response
                else:
                    pause, newurl = float(refresh), response.geturl()
                if (self.max_time is None) or (pause <= self.max_time):
                    if pause > 1E-3 and self.honor_time:
                        time.sleep(pause)
                    hdrs["location"] = newurl
                    # hardcoded http is NOT a bug
                    response = self.parent.error("http", request, response,
                                                 "refresh", msg, hdrs)

            return response
예제 #3
0
        def http_error_302(self, req, fp, code, msg, headers):
            # Some servers (incorrectly) return multiple Location headers
            # (so probably same goes for URI).  Use first header.
            if headers.has_key('location'):
                newurl = getheaders(headers, 'location')[0]
            elif headers.has_key('uri'):
                newurl = getheaders(headers, 'uri')[0]
            else:
                return
            newurl = urlparse.urljoin(req.get_full_url(), newurl)

            # XXX Probably want to forget about the state of the current
            # request, although that might interact poorly with other
            # handlers that also use handler-specific request attributes
            new = self.redirect_request(newurl, req, fp, code, msg, headers)
            if new is None:
                return

            # loop detection
            # .redirect_dict has a key url if url was previously visited.
            if hasattr(req, 'redirect_dict'):
                visited = new.redirect_dict = req.redirect_dict
                if (visited.get(newurl, 0) >= self.max_repeats or
                    len(visited) >= self.max_redirections):
                    raise HTTPError(req.get_full_url(), code,
                                    self.inf_msg + msg, headers, fp)
            else:
                visited = new.redirect_dict = req.redirect_dict = {}
            visited[newurl] = visited.get(newurl, 0) + 1

            # Don't close the fp until we are sure that we won't use it
            # with HTTPError.  
            fp.read()
            fp.close()

            return self.parent.open(new)
예제 #4
0
        def http_error_302(self, req, fp, code, msg, headers):
            # Some servers (incorrectly) return multiple Location headers
            # (so probably same goes for URI).  Use first header.
            if headers.has_key('location'):
                newurl = getheaders(headers, 'location')[0]
            elif headers.has_key('uri'):
                newurl = getheaders(headers, 'uri')[0]
            else:
                return
            newurl = urlparse.urljoin(req.get_full_url(), newurl)

            # XXX Probably want to forget about the state of the current
            # request, although that might interact poorly with other
            # handlers that also use handler-specific request attributes
            new = self.redirect_request(newurl, req, fp, code, msg, headers)
            if new is None:
                return

            # loop detection
            # .redirect_dict has a key url if url was previously visited.
            if hasattr(req, 'redirect_dict'):
                visited = new.redirect_dict = req.redirect_dict
                if (visited.get(newurl, 0) >= self.max_repeats
                        or len(visited) >= self.max_redirections):
                    raise HTTPError(req.get_full_url(), code,
                                    self.inf_msg + msg, headers, fp)
            else:
                visited = new.redirect_dict = req.redirect_dict = {}
            visited[newurl] = visited.get(newurl, 0) + 1

            # Don't close the fp until we are sure that we won't use it
            # with HTTPError.
            fp.read()
            fp.close()

            return self.parent.open(new)
예제 #5
0
 def http_response(self, request, response):
     if not hasattr(response, "seek"):
         response = response_seek_wrapper(response)
     headers = response.info()
     url = response.geturl()
     ct_hdrs = getheaders(response.info(), "content-type")
     if is_html(ct_hdrs, url, self._allow_xhtml):
         try:
             try:
                 html_headers = parse_head(response, self.head_parser_class())
             finally:
                 response.seek(0)
         except (HTMLParser.HTMLParseError,
                 sgmllib.SGMLParseError):
             pass
         else:
             for hdr, val in html_headers:
                 # rfc822.Message interprets this as appending, not clobbering
                 headers[hdr] = val
     return response
예제 #6
0
 def http_response(self, request, response):
     if not hasattr(response, "seek"):
         response = response_seek_wrapper(response)
     headers = response.info()
     url = response.geturl()
     ct_hdrs = getheaders(response.info(), "content-type")
     if is_html(ct_hdrs, url, self._allow_xhtml):
         try:
             try:
                 html_headers = parse_head(response,
                                           self.head_parser_class())
             finally:
                 response.seek(0)
         except (HTMLParser.HTMLParseError, sgmllib.SGMLParseError):
             pass
         else:
             for hdr, val in html_headers:
                 # rfc822.Message interprets this as appending, not clobbering
                 headers[hdr] = val
     return response
예제 #7
0
        def http_response(self, request, response):
            code, msg, hdrs = response.code, response.msg, response.info()

            if code == 200 and hdrs.has_key("refresh"):
                refresh = getheaders(hdrs, "refresh")[0]
                i = string.find(refresh, ";")
                if i != -1:
                    pause, newurl_spec = refresh[:i], refresh[i+1:]
                    i = string.find(newurl_spec, "=")
                    if i != -1:
                        pause = int(pause)
                        if (self.max_time is None) or (pause <= self.max_time):
                            if pause != 0 and self.honor_time:
                                time.sleep(pause)
                            newurl = newurl_spec[i+1:]
                            hdrs["location"] = newurl
                            # hardcoded http is NOT a bug
                            response = self.parent.error(
                                "http", request, response,
                                "refresh", msg, hdrs)

            return response
예제 #8
0
        def http_response(self, request, response):
            code, msg, hdrs = response.code, response.msg, response.info()

            if code == 200 and hdrs.has_key("refresh"):
                refresh = getheaders(hdrs, "refresh")[0]
                i = string.find(refresh, ";")
                if i != -1:
                    pause, newurl_spec = refresh[:i], refresh[i + 1:]
                    i = string.find(newurl_spec, "=")
                    if i != -1:
                        pause = int(pause)
                        if (self.max_time is None) or (pause <= self.max_time):
                            if pause != 0 and self.honor_time:
                                time.sleep(pause)
                            newurl = newurl_spec[i + 1:]
                            hdrs["location"] = newurl
                            # hardcoded http is NOT a bug
                            response = self.parent.error(
                                "http", request, response, "refresh", msg,
                                hdrs)

            return response