Esempio n. 1
 def test_get_meta_refresh(self):
     r1 = HtmlResponse("", body="""
     <head><title>Dummy</title><meta http-equiv="refresh" content="5;url=" /></head>
     r2 = HtmlResponse("", body="""
     <meta http-equiv="refresh" content="5;url=" /></head>
     r3 = HtmlResponse("", body="""
 <noscript><meta http-equiv="REFRESH" content="0;url=</noscript>
 <script type="text/javascript">
     document.write('<meta http-equiv="REFRESH" content="0;url=">');
         get_meta_refresh(r1), (5.0, ''))
     self.assertEqual(get_meta_refresh(r2), (None, None))
     self.assertEqual(get_meta_refresh(r3), (None, None))
Esempio n. 2
 def test_get_meta_refresh(self):
     r1 = HtmlResponse("",
     <head><title>Dummy</title><meta http-equiv="refresh" content="5;url=" /></head>
     r2 = HtmlResponse("",
     <meta http-equiv="refresh" content="5;url=" /></head>
     r3 = HtmlResponse("",
 <noscript><meta http-equiv="REFRESH" content="0;url=</noscript>
 <script type="text/javascript">
     document.write('<meta http-equiv="REFRESH" content="0;url=">');
                      (5.0, ''))
     self.assertEqual(get_meta_refresh(r2), (None, None))
     self.assertEqual(get_meta_refresh(r3), (None, None))
Esempio n. 3
    def process_response(self, request, response, spider):
        url = response.url

        if response.status in [301, 307]:
            log.msg("trying to redirect us: %s" % url, level=log.INFO)
            reason = 'redirect %d' % response.status

            return self._retry(request, reason, spider) or response
        interval, redirect_url = get_meta_refresh(response)
        # handle meta redirect

        if redirect_url:
            log.msg("trying to redirect us: %s" % url, level=log.INFO)
            reason = 'meta'

            return self._retry(request, reason, spider) or response

        hxs = HtmlXPathSelector(response)
        # test for captcha page
        captcha =
            ".//input[contains(@id, 'captchacharacters')]").extract()

        if captcha:
            log.msg("captcha page %s" % url, level=log.INFO)
            reason = 'capcha'

            return self._retry(request, reason, spider) or response

        return response
Esempio n. 4
    def process_response(self, request, response, spider):
        if 'dont_redirect' in request.meta:
            return response
        if request.method.upper() == 'HEAD':
            if response.status in [301, 302, 303, 307] and 'Location' in response.headers:
                redirected_url = urljoin(request.url, response.headers['location'])
                redirected = request.replace(url=redirected_url)
                return self._redirect(redirected, request, spider, response.status)
                return response

        if response.status in [302, 303] and 'Location' in response.headers:
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = self._redirect_request_using_get(request, redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        if response.status in [301, 307] and 'Location' in response.headers:
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = request.replace(url=redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        if isinstance(response, HtmlResponse):
            interval, url = get_meta_refresh(response)
            if url and interval < self.max_metarefresh_delay:
                redirected = self._redirect_request_using_get(request, url)
                return self._redirect(redirected, request, spider, 'meta refresh')

        return response
Esempio n. 5
    def process_response(self, request, response, spider):
        if 'dont_redirect' in request.meta:
            return response
        if request.method.upper() == 'HEAD':
            if response.status in [301, 302, 303, 307
                                   ] and 'Location' in response.headers:
                redirected_url = urljoin(request.url,
                redirected = request.replace(url=redirected_url)
                return self._redirect(redirected, request, spider,
                return response

        if response.status in [302, 303] and 'Location' in response.headers:
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = self._redirect_request_using_get(
                request, redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        if response.status in [301, 307] and 'Location' in response.headers:
            redirected_url = urljoin(request.url, response.headers['location'])
            redirected = request.replace(url=redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        if isinstance(response, HtmlResponse):
            interval, url = get_meta_refresh(response)
            if url and interval < self.max_metarefresh_delay:
                redirected = self._redirect_request_using_get(request, url)
                return self._redirect(redirected, request, spider,
                                      'meta refresh')

        return response
Esempio n. 6
    def _check_redirect(self, request, response):
        if request.method.upper() == 'HEAD':
            if response.status in [301, 302, 303, 307] and 'Location' in response.headers:
                redirected_url = urljoin_rfc(request.url, response.headers['location'])
                redirected = request.replace(url=redirected_url)
                return redirected,response.status
                return None,None

        if response.status in [302, 303] and 'Location' in response.headers:
            redirected_url = urljoin_rfc(request.url, response.headers['location'])
            redirected = self._redirect_request_using_get(request, redirected_url)
            return redirected,response.status

        if response.status in [301, 307] and 'Location' in response.headers:
            redirected_url = urljoin_rfc(request.url, response.headers['location'])
            redirected = request.replace(url=redirected_url)
            return redirected,response.status

        if  request.meta.get('meta_refresh',None) and  \
            isinstance(response, HtmlResponse):
            interval, url = get_meta_refresh(response)
            if url and interval < self.max_metarefresh_delay:
                redirected = self._redirect_request_using_get(request, url)
                return redirected,'meta refresh'

        return None,None
 def process_response(self, request, response, spider):
     get_list = {}
     obj = DmozSpider()
     redirect_filename = 'output/' + obj.redirect_filename
     from_url = request.url
     to_url = ''
     if response.status in [301,302,303, 307] and "Location" in response.headers:
         to_url = response.headers["Location"]
         get_list = self.extractgetList(from_url)
         redirect_dict = {"from_url":from_url,"params":get_list,"to_url":to_url}
         f = open(redirect_filename,'a')
         json.dump(redirect_dict,f,indent = 2)
         log.msg("trying to redirect : %s -> %s" %(from_url,to_url), level=log.INFO)
         reason = 'redirect %d' %response.status
         return self._retry(request, reason, spider) or response
     interval, redirect_url = get_meta_refresh(response)
     # handle meta redirect
     if redirect_url:
         get_list = self.extractgetList(to_url)
         redirect_dict = {"from_url":from_url,"params":get_list,"to_url":to_url}
         f = open(redirect_filename,'a')
         json.dump(redirect_dict,f,indent = 2)
         log.msg("trying to redirect : %s -> %s" %(from_url,to_url), level=log.INFO)
         reason = 'meta'
         return self._retry(request, reason, spider) or response
     return response
Esempio n. 8
    def process_response(self, request, response, spider):

        if request.meta.get("meta_refresh"):
            # logger.debug("local meta redirect middlewares: {}".format(response.url))
            _, location_url = get_meta_refresh(response)

            if not location_url:
                raise IgnoreRequest

            for off_key in off_keys:
                if off_key in location_url:
                    # ignore the page
                    raise IgnoreRequest

            if location_url.startswith("http"):
                reason = "local pan middlewares, meta redirected!!!"
                request.meta["meta_refresh"] = False
                request.headers.pop('Content-Type', None)
                request.headers.pop("Referer", None)
                request.headers.pop('Content-Length', None)
                request.priority += 100
                redirected = request.replace(url=location_url)
                return self._redirect(redirected, request, spider,
                                      reason) or response

        return response
Esempio n. 9
    def _check_redirect(self, request, response):
        if request.method.upper() == 'HEAD':
            if response.status in [301, 302, 303, 307
                                   ] and 'Location' in response.headers:
                redirected_url = urljoin_rfc(request.url,
                redirected = request.replace(url=redirected_url)
                return redirected, response.status
                return None, None

        if response.status in [302, 303] and 'Location' in response.headers:
            redirected_url = urljoin_rfc(request.url,
            redirected = self._redirect_request_using_get(
                request, redirected_url)
            return redirected, response.status

        if response.status in [301, 307] and 'Location' in response.headers:
            redirected_url = urljoin_rfc(request.url,
            redirected = request.replace(url=redirected_url)
            return redirected, response.status

        if  request.meta.get('meta_refresh',None) and  \
            isinstance(response, HtmlResponse):
            interval, url = get_meta_refresh(response)
            if url and interval < self.max_metarefresh_delay:
                redirected = self._redirect_request_using_get(request, url)
                return redirected, 'meta refresh'

        return None, None
Esempio n. 10
	def process_response(self, request, response, spider):
		url = response.url
		with open('redirects.txt', 'a+') as f:
			if response.status in [301, 307]:
				f.write("trying to redirect us: " + url + '\n')
				f.write('redirect %d' + str(response.status) + '\n')
				reason = 'redirect %d' %response.status 
				return self._retry(request, reason, spider) or response
			interval, redirect_url = get_meta_refresh(response)
			# handle meta redirect
			if redirect_url:
				f.write("trying to redirect us: " + url + '\n')
				f.write('redirect meta' + '\n')
				reason = 'meta'
				return self._retry(request, reason, spider) or response
			hxs = HtmlXPathSelector(response)
			# test for captcha page
			captcha =".//input[contains(@id, 'captchacharacters')]").extract()
			if captcha:
				f.write("trying to redirect us: " + url + '\n')
				f.write('redirect capcha' + '\n')
				reason = 'capcha'
				return self._retry(request, reason, spider) or response
		return response
Esempio n. 11
 def process_response(self, request, response, spider):
     url = response.url
     if response.status in [301, 307]:
         log.msg("trying to redirect us: %s" %url, level=log.INFO)
         reason = 'redirect %d' %response.status
         return self._retry(request, reason, spider) or response
     interval, redirect_url = get_meta_refresh(response)
     if response.status in [502, 503]:
         log.msg("Service Unavailable: %s" %url, level=log.INFO)
         reason = 'Possible block with 5xx error'
         return self._retry(request, reason, spider) or response
     # handle meta redirect
     if redirect_url:
         log.msg("trying to redirect us: %s" %url, level=log.INFO)
         reason = 'meta'
         return self._retry(request, reason, spider) or response
     hxs = HtmlXPathSelector(response)
     # test for captcha page
     captcha =".//input[contains(@id, 'captchacharacters')]").extract()
     if captcha or "Dostęp zablokowany" in response.text:
         log.msg("captcha page %s" %url, level=log.INFO)
         reason = 'captcha'
         return self._retry(request, reason, spider) or response
     if response.status in [502, 503, 504]:
         log.msg("Captcha with status: \n%s" %response.text, level=log.INFO)
         reason = 'captcha'
         return self._retry(request, reason, spider) or response
     return response
Esempio n. 12
    def process_response(self, request, response, spider):
        if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \
                not isinstance(response, HtmlResponse):
            return response

        interval, url = get_meta_refresh(response)
        if url and interval < self._maxdelay:
            redirected = self._redirect_request_using_get(request, url)
            return self._redirect(redirected, request, spider, 'meta refresh')

        return response
Esempio n. 13
    def process_response(self, request, response, spider):
        if request.meta.get('dont_redirect', False) or request.method == 'HEAD' or \
                not isinstance(response, HtmlResponse):
            return response

        interval, url = get_meta_refresh(response)
        if url and interval < self._maxdelay:
            redirected = self._redirect_request_using_get(request, url)
            return self._redirect(redirected, request, spider, 'meta refresh')

        return response
Esempio n. 14
    def process_response(self, request, response, spider):
        if "dont_redirect" in request.meta or request.method == "HEAD" or not isinstance(response, HtmlResponse):
            return response

        if isinstance(response, HtmlResponse):
            interval, url = get_meta_refresh(response)
            if url and interval < self._maxdelay:
                redirected = self._redirect_request_using_get(request, url)
                return self._redirect(redirected, request, spider, "meta refresh")

        return response
 def process_response(self, request, response, spider):
     url = response.url
     if response.status in [301, 307]:
         log.msg("trying to redirect us: %s" %url, level=log.INFO)
         reason = 'redirect %d' %response.status
         return self._retry(request, reason, spider) or response
     interval, redirect_url = get_meta_refresh(response)
     # handle meta redirect
     if redirect_url:
         log.msg("trying to redirect us: %s" %url, level=log.INFO)
         reason = 'meta'
         return self._retry(request, reason, spider) or response
Esempio n. 16
 def process_response(self, request, response, spider):
     url = response.url
     if response.status in [301, 307]:
         log.msg("trying to redirect us: %s" % url, level=log.INFO)
         reason = 'redirect %d' % response.status
         return self._retry(request, reason, spider) or response
     interval, redirect_url = get_meta_refresh(response)
     # handle meta redirect
     if redirect_url:
         log.msg("trying to redirect us: %s" % url, level=log.INFO)
         reason = 'meta'
         return self._retry(request, reason, spider) or response
Esempio n. 17
    def process_response(self, request, response, spider):
        if (request.meta.get("dont_redirect", False)
                or request.method == "HEAD"
                or not isinstance(response, HtmlResponse)):
            return response

        interval, url = get_meta_refresh(response,
        if url and interval < self._maxdelay:
            redirected = self._redirect_request_using_get(request, url)
            return self._redirect(redirected, request, spider, "meta refresh")

        return response
    def process_response(self, request, response, spider):
        if 'dont_redirect' in request.meta or request.method == 'HEAD' or \
                not isinstance(response, HtmlResponse):
            return response

        if isinstance(response, HtmlResponse):
            interval, url = get_meta_refresh(response)
            if url and interval < self._maxdelay:
                redirected = self._redirect_request_using_get(request, url)
   #             print response.body
                if (response.url.find("view") > 0):
                return self._redirect(redirected, request, spider, 'meta refresh')

        return response
Esempio n. 19
    def process_response(self, request, response, spider):
        if request.meta.get('dont_retry', False):
            return response
        if response.status in self.retry_http_codes:
            reason = response_status_message(response.status)
            return self._retry(request, reason, spider) or response
        interval, redirect_url= get_meta_refresh(response)
        if redirect_url:

        # this is your check

        if response.status == 403 and response.url:
            return self._retry(request, 'response got xpath "{}"'.format(response.url), spider) or response
        return response
    def process_response(self, request, response, spider):
        request.meta['dont_filter'] = True
        if 'dont_redirect' in request.meta or request.method == 'HEAD' or \
                not isinstance(response, HtmlResponse) or request.meta.get('redirect_times') >= 1:
            request.meta['dont_redirect'] = True
            return response

        if isinstance(response, HtmlResponse):
            interval, url = get_meta_refresh(response)
            if url and interval < self._maxdelay:
                redirected = self._redirect_request_using_get(request, url)
                redirected.dont_filter = True
                return self._redirect(redirected, request, spider,
                                      'meta refresh')

        return response
Esempio n. 21
    def process_response(self, request, response, spider):
        if response.status in [302, 303] and 'Location' in response.headers:
            redirected_url = urljoin_rfc(request.url, response.headers['location'])
            redirected = self._redirect_request_using_get(request, redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        if response.status in [301, 307] and 'Location' in response.headers:
            redirected_url = urljoin_rfc(request.url, response.headers['location'])
            redirected = request.replace(url=redirected_url)
            return self._redirect(redirected, request, spider, response.status)

        interval, url = get_meta_refresh(response)
        if url and interval < self.max_metarefresh_delay:
            redirected = self._redirect_request_using_get(request, url)
            return self._redirect(redirected, request, spider, 'meta refresh')

        return response
 def process_response(self, request, response, spider):
     get_list = {}
     obj = DmozSpider()
     redirect_filename = 'output/' + obj.redirect_filename
     from_url = request.url
     to_url = ''
     if response.status in [301, 302, 303, 307
                            ] and "Location" in response.headers:
         to_url = response.headers["Location"]
         get_list = self.extractgetList(from_url)
         redirect_dict = {
             "from_url": from_url,
             "params": get_list,
             "to_url": to_url
         f = open(redirect_filename, 'a')
         json.dump(redirect_dict, f, indent=2)
         log.msg("trying to redirect : %s -> %s" % (from_url, to_url),
         reason = 'redirect %d' % response.status
         return self._retry(request, reason, spider) or response
     interval, redirect_url = get_meta_refresh(response)
     # handle meta redirect
     if redirect_url:
         get_list = self.extractgetList(to_url)
         redirect_dict = {
             "from_url": from_url,
             "params": get_list,
             "to_url": to_url
         f = open(redirect_filename, 'a')
         json.dump(redirect_dict, f, indent=2)
         log.msg("trying to redirect : %s -> %s" % (from_url, to_url),
         reason = 'meta'
         return self._retry(request, reason, spider) or response
     return response
Esempio n. 23
 def process_response(self, request, response, spider):
     url = response.url
     if response.status in [301,302, 307]:
         log.msg("trying to redirect us: %s" %url, level=log.INFO)
         reason = 'redirect %d' %response.status
         return self._retry(request, reason, spider) or response
     interval, redirect_url = get_meta_refresh(response)
     # handle meta redirect
     if redirect_url:
         log.msg("trying to redirect us: %s" %url, level=log.INFO)
         reason = 'meta'
         return self._retry(request, reason, spider) or response
     userblocked = "userblocked" in response.url
     # test for captcha page
     if userblocked:
         log.msg("blocked page %s" %url, level=log.INFO)
         reason = 'blocked'
         return self._retry(request, reason, spider) or response
     return response
 def process_response(self, request, response, spider):
     url = response.url
     logging.debug( 'WangJun I am OK111'	)	
     logging.debug( 'WangJun I am OK111 %d' %response.status	)
     if response.status in [301, 302]:"WangJun trying to redirect us: %s" %url)
         reason = 'redirect %d' %response.status
         return self._retry(request, reason, spider) or response
     interval, redirect_url = get_meta_refresh(response)
     # handle meta redirect
     if redirect_url:"trying to redirect us: %s" %url)
         reason = 'meta'
         return self._retry(request, reason, spider) or response
     hxs = Selector(response) #HtmlXPathSelector(response)
     # test for captcha page
     captcha = hxs.xpath(".//input[contains(@id, 'captchacharacters')]").extract()
     if captcha:"captcha page %s" %url)
         reason = 'capcha'           
         return self._retry(request, reason, spider) or response
     return response
Esempio n. 25
    def process_response(self, request, response, spider):
        # 跳过成功抓取的页面
        if response.status == 200:
            return response
        url = response.url
        if response.status in [301, 307]:
            #             req = urllib2.Request(url='')
            #             opener = urllib2.build_opener()
            #             reponse =
            #             return reponse

            #             interval, redirect_url = get_meta_refresh(response)
            #             log.msg('redirect response.body:%s' %
            #                     response.body.extract(), level=log.INFO)
            #             url = request.meta['redirect_urls']
            #             url = 'https:' + url[5:]
            #             log.msg("response redirect_url: %s" % redirect_url, level=log.INFO)
            #             log.msg("request.headers: %s" % repr(request.headers))
            #             log.msg("request.body: %s" % repr(request.body))
            #             log.msg("response.headers: %s" % repr(response.headers))
            #             log.msg("response.request type: %s" %
            #                     type(response.request), level=log.INFO)
            #             log.msg("response type: %s" %
            #                     type(response), level=log.INFO)
            #             request.headers['Accept-Encoding'] = 'identity'
            #             request.headers['Host'] = ''
            #             request.headers['Connection'] = 'close'
            #             del request.headers['Accept-Language']
            #             del request.headers['Accept']
            # #             del request.headers['Cookie']
            #             request.meta['dont_merge_cookies'] = True

            #             """
            #             'Accept-Language': ['en'],
            #             'Accept-Encoding': ['identity'],
            #             'Host': [''],
            #             'Accept': ['text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'],
            #             'User-Agent': ['Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0'], 'Connection': ['close'], 'Referer': [''],
            #             'Cookie': ['__cfduid=dc80e726a73461e3823f2a312eb2b34cd1478357543']
            #             """

            #             log.msg("trying to redirect us: %s" % url, level=log.INFO)
            url = response.headers['Location']
  "trying to redirect us: %s" % url)
            reason = 'redirect %d' % response.status
            #             return response.request
            return self._retry(request, reason, spider) or response

#         else:
#             return response

# get_meta_refresh方法导致 'Response' object has no attribute 'text'
# 下段暂时通过try进行包装处理

            # handle meta redirect
            interval, redirect_url = get_meta_refresh(response)
            if redirect_url:
                #             log.msg("trying to redirect us: %s" % url, level=log.INFO)
      "trying to redirect us: %s" % url)
                reason = 'meta'
                return self._retry(request, reason, spider) or response
        except Exception, e:
            print 'str(Exception):\t', str(Exception)
            print 'str(e):\t\t', str(e)
            print 'repr(e):\t', repr(e)
            print 'e.message:\t', e.message
            print 'traceback.print_exc():'
            print 'traceback.format_exc():\n%s' % traceback.format_exc()
Esempio n. 26
    def test_get_meta_refresh(self):
        body = """
            <head><title>Dummy</title><meta http-equiv="refresh" content="5;url=" /></head>
        response = TextResponse(url='', body=body)
                         (5, ''))

        # refresh without url should return (None, None)
        body = """<meta http-equiv="refresh" content="5" />"""
        response = TextResponse(url='', body=body)
        self.assertEqual(get_meta_refresh(response), (None, None))

        body = """<meta http-equiv="refresh" content="5;
            url=" /></head>"""
        response = TextResponse(url='', body=body)
                         (5, ''))

        # meta refresh in multiple lines
        body = """<html><head>
               CONTENT="1; URL=">"""
        response = TextResponse(url='', body=body)
                         (1, ''))

        # entities in the redirect url
        body = """<meta http-equiv="refresh" content="3; url=&#39;;">"""
        response = TextResponse(url='', body=body)
                         (3, ''))

        # relative redirects
        body = """<meta http-equiv="refresh" content="3; url=other.html">"""
        response = TextResponse(url='',
                         (3, ''))

        # non-standard encodings (utf-16)
        body = """<meta http-equiv="refresh" content="3; url=">"""
        body = body.decode('ascii').encode('utf-16')
        response = TextResponse(url='',
                         (3, ''))

        # non-ascii chars in the url (utf8)
        body = """<meta http-equiv="refresh" content="3; url=\xc2\xa3">"""
        response = TextResponse(url='',
                         (3, ''))

        # non-ascii chars in the url (latin1)
        body = """<meta http-equiv="refresh" content="3; url=\xa3">"""
        response = TextResponse(url='',
                         (3, ''))

        # responses without refresh tag should return None None
        response = TextResponse(url='')
        self.assertEqual(get_meta_refresh(response), (None, None))
        response = TextResponse(url='')
        self.assertEqual(get_meta_refresh(response), (None, None))

        # html commented meta refresh header must not directed
        body = """<!--<meta http-equiv="refresh" content="3; url=">-->"""
        response = TextResponse(url='', body=body)
        self.assertEqual(get_meta_refresh(response), (None, None))

        # html comments must not interfere with uncommented meta refresh header
        body = """<!-- commented --><meta http-equiv="refresh" content="3; url=">-->"""
        response = TextResponse(url='', body=body)
                         (3, ''))

        # float refresh intervals
        body = """<meta http-equiv="refresh" content=".1;URL=index.html" />"""
        response = TextResponse(url='', body=body)
                         (0.1, ''))

        body = """<meta http-equiv="refresh" content="3.1;URL=index.html" />"""
        response = TextResponse(url='', body=body)
                         (3.1, ''))
Esempio n. 27
    def test_get_meta_refresh(self):
        body = """
            <head><title>Dummy</title><meta http-equiv="refresh" content="5;url=" /></head>
        response = TextResponse(url='', body=body)
        self.assertEqual(get_meta_refresh(response), (5, ''))

        # refresh without url should return (None, None)
        body = """<meta http-equiv="refresh" content="5" />"""
        response = TextResponse(url='', body=body)
        self.assertEqual(get_meta_refresh(response), (None, None))

        body = """<meta http-equiv="refresh" content="5;
            url=" /></head>"""
        response = TextResponse(url='', body=body)
        self.assertEqual(get_meta_refresh(response), (5, ''))

        # meta refresh in multiple lines
        body = """<html><head>
               CONTENT="1; URL=">"""
        response = TextResponse(url='', body=body)
        self.assertEqual(get_meta_refresh(response), (1, ''))

        # entities in the redirect url
        body = """<meta http-equiv="refresh" content="3; url=&#39;;">"""
        response = TextResponse(url='', body=body)
        self.assertEqual(get_meta_refresh(response), (3, ''))

        # relative redirects
        body = """<meta http-equiv="refresh" content="3; url=other.html">"""
        response = TextResponse(url='', body=body)
        self.assertEqual(get_meta_refresh(response), (3, ''))

        # non-standard encodings (utf-16)
        body = """<meta http-equiv="refresh" content="3; url=">"""
        body = body.decode('ascii').encode('utf-16')
        response = TextResponse(url='', body=body, encoding='utf-16')
        self.assertEqual(get_meta_refresh(response), (3, ''))

        # non-ascii chars in the url (default encoding - utf8)
        body = """<meta http-equiv="refresh" content="3; url=\xc2\xa3">"""
        response = TextResponse(url='', body=body)
        self.assertEqual(get_meta_refresh(response), (3, ''))

        # non-ascii chars in the url (custom encoding - latin1)
        body = """<meta http-equiv="refresh" content="3; url=\xa3">"""
        response = TextResponse(url='', body=body, encoding='latin1')
        self.assertEqual(get_meta_refresh(response), (3, ''))

        # responses without refresh tag should return None None
        response = TextResponse(url='')
        self.assertEqual(get_meta_refresh(response), (None, None))
        response = TextResponse(url='')
        self.assertEqual(get_meta_refresh(response), (None, None))