def test_basic(self):
     # engine is stopped
     self.clock.advance(60)
     self.assertEqual(self.lw.get_first_line(), '')
     # start the engine
     self.engine.signals.send(signals.engine_started)
     self.clock.advance(29)
     self.assertEqual(self.lw.get_first_line(), '')
     self.clock.advance(1)
     self.assertEqual(self.lw.get_first_line(),
                      '[crawlmi] INFO: Crawled 0 pages (at 0 pages/min).')
     # download some responses
     self.engine.signals.send(signals.response_downloaded,
                              response=Response(url=''))
     self.engine.signals.send(signals.response_downloaded,
                              response=Response(url=''))
     self.engine.signals.send(signals.response_received,
                              response=Response(url=''))
     self.clock.advance(30)
     self.assertEqual(self.lw.get_first_line(),
                      '[crawlmi] INFO: Crawled 2 pages (at 4 pages/min).')
     # stop the engine
     self.engine.signals.send(signals.engine_stopped)
     self.clock.advance(60)
     self.assertEqual(self.lw.get_first_line(), '')
Exemple #2
0
    def test_download(self):
        self.engine.start()
        del self.sp.received[:]

        req = Request('http://github.com/')
        self.engine.download(req)
        self.clock.advance(0)
        self.check_signals([signals.request_received])
        self.assertEqual(len(self.engine.request_queue), 1)

        # pipeline None
        self.pipeline.req = lambda req: None
        self.engine.download(req)
        self.clock.advance(0)
        self.assertEqual(len(self.engine.request_queue), 1)

        # pipeline response
        self.pipeline.req = lambda req: Response('')
        self.engine.download(req)
        self.clock.advance(0)
        self.assertEqual(len(self.engine.response_queue), 1)

        # download and stop
        self.pipeline.req = lambda req: Response('')
        d = self.engine.download(req)
        self.engine.stop('finished')
        self.clock.advance(0)
        return d
Exemple #3
0
 def test_request_cacheability(self):
     res0 = Response(self.request.url, status=200,
                     headers={'Expires': self.tomorrow})
     req0 = Request('http://example.com')
     req1 = req0.replace(headers={'Cache-Control': 'no-store'})
     req2 = req0.replace(headers={'Cache-Control': 'no-cache'})
     with self._middleware() as mw:
         # response for a request with no-store must not be cached
         res1 = self._process_requestresponse(mw, req1, res0)
         self.assertEqualResponse(res1, res0)
         self.assertIsNone(mw.storage.retrieve_response(req1))
         # Re-do request without no-store and expect it to be cached
         res2 = self._process_requestresponse(mw, req0, res0)
         self.assertNotIn('cached', res2.flags)
         res3 = mw.process_request(req0)
         self.assertIn('cached', res3.flags)
         self.assertEqualResponse(res2, res3)
         # request with no-cache directive must not return cached response
         # but it allows new response to be stored
         res0b = res0.replace(body='foo')
         res4 = self._process_requestresponse(mw, req2, res0b)
         self.assertEqualResponse(res4, res0b)
         self.assertNotIn('cached', res4.flags)
         res5 = self._process_requestresponse(mw, req0, None)
         self.assertEqualResponse(res5, res0b)
         self.assertIn('cached', res5.flags)
Exemple #4
0
    def test_fail(self):
        received = []

        def downloaded(result):
            received.append(result)

        # enqueue 3 requests
        r1, dfd1 = get_request('1', func=downloaded)
        self.slot.enqueue(r1, dfd1)
        r2, dfd2 = get_request('2', func=downloaded)
        self.slot.enqueue(r2, dfd2)
        r3, dfd3 = get_request('3', func=downloaded)
        self.slot.enqueue(r3, dfd3)
        # fail the first request
        err = ValueError('my bad')
        self.handler.fail(r1, err)
        self.assertEqual(received[-1].value, err)
        # other requests should be ok
        self.assertEqual(len(self.slot.in_progress), 2)
        self.assertEqual(len(self.slot.transferring), 2)
        self.handler.call(r2, Response(''))
        self.assertEqual(received[-1].request, r2)
        self.handler.call(r3, Response(''))
        self.assertEqual(received[-1].request, r3)
        self.assertEqual(len(self.slot.in_progress), 0)
        self.assertEqual(len(self.slot.transferring), 0)
 def test_repr(self):
     resp_200 = Response('a', status=200)
     self.assertEqual(repr(resp_200), '<Response a [200 (OK)]>')
     resp_301 = Response('a', status=301, flags=['cached'])
     self.assertEqual(
         repr(resp_301),
         '<Response a [301 (Moved Permanently)]> [\'cached\']')
     resp_999 = Response('a', status=999)
     self.assertEqual(repr(resp_999), '<Response a [999]>')
Exemple #6
0
    def test_no_limit(self):
        q = ResponseQueue(0)
        r1 = Response('', body='x' * 50)
        r2 = Response('', body='y' * 50)

        self.assertFalse(q.needs_backout())
        q.push(r1)
        q.push(r2)
        self.assertFalse(q.needs_backout())
    def test_response_status(self):
        mw = Filter(
            self._get_engine(FILTER_RESPONSE_STATUS=lambda x: x != 201))
        req = Request('http://github.com/')

        good1 = Response('', request=req, status=201)
        good2 = mw.process_response(good1)
        self.assertIs(good1, good2)

        bad1 = Response('', request=req, status=200)
        self.assertRaises(FilterError, mw.process_response, bad1)
Exemple #8
0
    def test_limit(self):
        q = ResponseQueue(10)
        r1 = Response('', body='x' * 5)
        r2 = Response('', body='y' * 5)

        self.assertFalse(q.needs_backout())
        q.push(r1)
        self.assertFalse(q.needs_backout())
        q.push(r2)
        self.assertTrue(q.needs_backout())
        q.pop()
        self.assertFalse(q.needs_backout())
    def test_copy(self):
        req = Request('http://gh.com/')
        r1 = Response(url='http://hey.com/', status=201, headers={'a': 'b'},
                      body='hey', request=req, flags=['cached'])
        r2 = r1.copy()

        self.assertEqual(r1.url, r2.url)
        self.assertEqual(r1.status, r2.status)
        self.assertEqual(r1.body, r2.body)
        self.assertIs(r1.request, r2.request)
        self.assertIsInstance(r2.headers, Headers)
        self.assertDictEqual(r1.headers, r2.headers)
        self.assertListEqual(r1.flags, r2.flags)
    def test_filter_non_200(self):
        mw = Filter(self._get_engine(FILTER_NON_200_RESPONSE_STATUS=True))
        req = Request('http://github.com/')

        good1 = Response('', request=req, status=200)
        good2 = mw.process_response(good1)
        self.assertIs(good1, good2)

        bad1 = Response('', request=req, status=404)
        self.assertRaises(FilterError, mw.process_response, bad1)

        mw = Filter(self._get_engine(FILTER_NON_200_RESPONSE_STATUS=False))
        bad3 = mw.process_response(bad1)
        self.assertIs(bad1, bad3)
    def test_max_redirect_times(self):
        self.mw.max_redirect_times = 1
        req = Request('http://crawlmitest.org/302')
        resp = Response('http://crawlmitest.org/302',
                        headers={'Location': '/redirected'},
                        status=302,
                        request=req)

        req2 = self.mw.process_response(resp)
        self.assertIsInstance(req2, Request)
        self.assertListEqual(req2.history, ['http://crawlmitest.org/302'])
        resp2 = Response('http://crawlmitest.org/302',
                         headers={'Location': '/redirected'},
                         status=302,
                         request=req2)
        self.assertIsNone(self.mw.process_response(resp2))
    def test_redirect_302(self):
        url = 'http://www.example.com/302'
        url2 = 'http://www.example.com/redirected2'
        req = Request(url,
                      method='POST',
                      body='test',
                      headers={
                          'Content-Type': 'text/plain',
                          'Content-length': '4'
                      })
        resp = Response(url,
                        headers={'Location': url2},
                        status=302,
                        request=req)

        req2 = self.mw.process_response(resp)
        self.assertIsInstance(req2, Request)
        self.assertEqual(req2.url, url2)
        self.assertEqual(req2.method, 'GET')
        self.assertNotIn(
            'Content-Type', req2.headers,
            'Content-Type header must not be present in redirected request')
        self.assertNotIn(
            'Content-Length', req2.headers,
            'Content-Length header must not be present in redirected request')
        self.assertEqual(req2.body, '',
                         'Redirected body must be empty, not `%s`' % req2.body)

        # response without Location header but with status code is 3XX should be ignored
        del resp.headers['Location']
        self.assertIs(self.mw.process_response(resp), resp)
 def test_request(self):
     req = Request(url='http://github.com', meta={'a': 'b'})
     req.history = ['a', 'b']
     r = Response(url='', request=req)
     self.assertIs(r.request, req)
     self.assertIs(r.meta, req.meta)
     self.assertIs(r.history, req.history)
     self.assertIs(r.original_url, req.original_url)
     r = Response(url='')
     from crawlmi.http.response.response import _no_request_error
     self.assertRaisesRegexp(AttributeError, _no_request_error,
                             lambda: r.meta)
     self.assertRaisesRegexp(AttributeError, _no_request_error,
                             lambda: r.history)
     self.assertRaisesRegexp(AttributeError, _no_request_error,
                             lambda: r.original_url)
Exemple #14
0
 def test_req_or_resp(self):
     req = Request('http://github.com/', meta={'a': 10, 'x': 'y'})
     self.assertEqual(self.settings.get('a', req_or_resp=req), 10)
     self.assertEqual(self.settings.get('x', req_or_resp=req), 'y')
     resp = Response('', request=req)
     self.assertEqual(self.settings.get('a', req_or_resp=resp), 10)
     self.assertEqual(self.settings.get('x', req_or_resp=resp), 'y')
    def setUp(self):
        engine = get_engine()
        self.stats = engine.stats
        self.mw = DownloaderStats(engine)

        self.req = Request('http://github.com')
        self.resp = Response('scrapytest.org', status=400, request=self.req)
Exemple #16
0
 def test_404(self):
     req = Request('http://www.scrapytest.org/404')
     rsp = Response('http://www.scrapytest.org/404',
                    body='',
                    status=404,
                    request=req)
     self.assertIs(self.mw.process_response(rsp), rsp)
Exemple #17
0
    def test_fail(self):
        self._update_dwn(CONCURRENT_REQUESTS=3,
                         CONCURRENT_REQUESTS_PER_DOMAIN=2)
        requests = [get_request(id)[0] for id in 'aab']
        map(lambda r: self.request_queue.push(r), requests)

        # enqueue requests
        self.clock.advance(0)
        # fail 1st request
        err = ValueError('my bad')
        self.handler.fail(requests[0], err)
        self.assertEqual(self.dwn.free_slots, 1)
        fail = self.response_queue.pop()
        self.assertIs(fail.request, requests[0])
        self.assertIs(fail.value, err)
        # fail 3rd request
        self.handler.fail(requests[2], err)
        fail = self.response_queue.pop()
        self.assertIs(fail.request, requests[2])
        self.assertIs(fail.value, err)
        # succeed 2nd request
        self.handler.call(requests[1], Response('nice!', request=requests[1]))
        resp = self.response_queue.pop()
        self.assertIs(resp.request, requests[1])
        self.assertEqual(resp.url, 'nice!')
Exemple #18
0
 def setUp(self):
     self.mws = []
     self.actions = []
     self.req = Request('http://gh.com/')
     self.resp = Response('http://gh.com/', request=self.req)
     self.fail = Failure(Exception())
     self.fail.request = self.req
 def test_priority_adjust(self):
     req = Request('http://a.com')
     resp = Response('http://a.com',
                     headers={'Location': 'http://a.com/redirected'},
                     status=301,
                     request=req)
     req2 = self.mw.process_response(resp)
     assert req2.priority > req.priority
Exemple #20
0
 def test_priority_adjust(self):
     req = Request('http://www.scrapytest.org/503')
     rsp = Response('http://www.scrapytest.org/503',
                    body='',
                    status=503,
                    request=req)
     req2 = self.mw.process_response(rsp)
     self.assertTrue(req2.priority < req.priority)
    def test_properties(self):
        r = Response('', body='hey')

        def set_body():
            r.body = ''

        self.assertEqual(r.body, 'hey')
        self.assertRaises(AttributeError, set_body)
Exemple #22
0
    def test_cookiejar_key(self):
        req = Request('http://test.org/', cookies={'galleta': 'salada'}, meta={'cookiejar': 'store1'})
        self.assertIs(self.mw.process_request(req), req)
        self.assertEquals(req.headers.get('Cookie'), 'galleta=salada')

        headers = {'Set-Cookie': 'C1=value1; path=/'}
        res = Response('http://test.org/', headers=headers, request=req)
        self.assertIs(self.mw.process_response(res), res)

        req2 = Request('http://test.org/', meta=res.meta)
        self.assertIs(self.mw.process_request(req2), req2)
        self.assertEquals(req2.headers.get('Cookie'), 'C1=value1; galleta=salada')


        req3 = Request('http://test.org/', cookies={'galleta': 'dulce'}, meta={'cookiejar': 'store2'})
        self.assertIs(self.mw.process_request(req3), req3)
        self.assertEquals(req3.headers.get('Cookie'), 'galleta=dulce')

        headers = {'Set-Cookie': 'C2=value2; path=/'}
        res2 = Response('http://test.org/', headers=headers, request=req3)
        self.assertIs(self.mw.process_response(res2), res2)

        req4 = Request('http://test.org/', meta=res2.meta)
        self.assertIs(self.mw.process_request(req4), req4)
        self.assertEquals(req4.headers.get('Cookie'), 'C2=value2; galleta=dulce')

        #cookies from hosts with port
        req5_1 = Request('http://test.org:1104/')
        self.assertIs(self.mw.process_request(req5_1), req5_1)

        headers = {'Set-Cookie': 'C1=value1; path=/'}
        res5_1 = Response('http://test.org:1104/', headers=headers, request=req5_1)
        self.assertIs(self.mw.process_response(res5_1), res5_1)

        req5_2 = Request('http://test.org:1104/some-redirected-path')
        self.assertIs(self.mw.process_request(req5_2), req5_2)
        self.assertEquals(req5_2.headers.get('Cookie'), 'C1=value1')

        req5_3 = Request('http://test.org/some-redirected-path')
        self.assertIs(self.mw.process_request(req5_3), req5_3)
        self.assertEquals(req5_3.headers.get('Cookie'), 'C1=value1')

        #skip cookie retrieval for not http request
        req6 = Request('file:///crawlmi/sometempfile')
        self.assertIs(self.mw.process_request(req6), req6)
        self.assertEquals(req6.headers.get('Cookie'), None)
    def test_copy(self):
        req = Request('http://gh.com/')
        r1 = Response(url='http://hey.com/',
                      status=201,
                      headers={'a': 'b'},
                      body='hey',
                      request=req,
                      flags=['cached'])
        r2 = r1.copy()

        self.assertEqual(r1.url, r2.url)
        self.assertEqual(r1.status, r2.status)
        self.assertEqual(r1.body, r2.body)
        self.assertIs(r1.request, r2.request)
        self.assertIsInstance(r2.headers, Headers)
        self.assertDictEqual(r1.headers, r2.headers)
        self.assertListEqual(r1.flags, r2.flags)
Exemple #24
0
 def test_clear_slots(self):
     requests = [get_request(id)[0] for id in xrange(30)]
     for r in requests:
         self.request_queue.push(r)
         self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY)
         self.handler.call(r, Response(''))
     self.assertLessEqual(len(self.dwn.slots),
                          2 * self.dwn.total_concurrency)
    def test_503(self):
        req = Request('http://www.scrapytest.org/503')
        rsp = Response('http://www.scrapytest.org/503', body='', status=503, request=req)

        # first retry
        req = self.mw.process_response(rsp)
        self.assertIsInstance(req, Request)
        self.assertEqual(req.meta['retry_times'], 1)

        # second retry
        rsp.request = req
        req = self.mw.process_response(rsp)
        self.assertIsInstance(req, Request)
        self.assertEqual(req.meta['retry_times'], 2)

        # discard it
        rsp.request = req
        self.assertIs(self.mw.process_response(rsp), rsp)
Exemple #26
0
    def test_middleware_ignore_schemes(self):
        # http responses are cached by default
        req = Request('http://test.com/')
        res = Response('http://test.com/', request=req)
        with self._middleware() as mw:
            self.assertIs(mw.process_request(req), req)
            mw.process_response(res)

            cached = mw.process_request(req)
            self.assertIsInstance(cached, Response, type(cached))
            self.assertEqualResponse(res, cached)
            self.assertIn('cached', cached.flags)

        # file response is not cached by default
        req = Request('file:///tmp/t.txt')
        res = Response('file:///tmp/t.txt', request=req)
        with self._middleware() as mw:
            self.assertIs(mw.process_request(req), req)
            mw.process_response(res)

            self.assertIsNone(mw.storage.retrieve_response(req))
            self.assertIs(mw.process_request(req), req)

        # s3 scheme response is cached by default
        req = Request('s3://bucket/key')
        res = Response('http://bucket/key', request=req)
        with self._middleware() as mw:
            self.assertIs(mw.process_request(req), req)
            mw.process_response(res)

            cached = mw.process_request(req)
            self.assertIsInstance(cached, Response, type(cached))
            self.assertEqualResponse(res, cached)
            self.assertIn('cached', cached.flags)

        # ignore s3 scheme
        req = Request('s3://bucket/key2')
        res = Response('http://bucket/key2', request=req)
        with self._middleware(HTTP_CACHE_IGNORE_SCHEMES=['s3']) as mw:
            self.assertIs(mw.process_request(req), req)
            mw.process_response(res)

            self.assertIsNone(mw.storage.retrieve_response(req))
            self.assertIs(mw.process_request(req), req)
Exemple #27
0
 def test_response_cacheability(self):
     responses = [
         # 304 is not cacheable no matter what servers sends
         (False, 304, {}),
         (False, 304, {'Last-Modified': self.yesterday}),
         (False, 304, {'Expires': self.tomorrow}),
         (False, 304, {'Etag': 'bar'}),
         (False, 304, {'Cache-Control': 'max-age=3600'}),
         # Always obey no-store cache control
         (False, 200, {'Cache-Control': 'no-store'}),
         (False, 200, {'Cache-Control': 'no-store, max-age=300'}),  # invalid
         (False, 200, {'Cache-Control': 'no-store', 'Expires': self.tomorrow}),  # invalid
         # Ignore responses missing expiration and/or validation headers
         (False, 200, {}),
         (False, 302, {}),
         (False, 307, {}),
         (False, 404, {}),
         # Cache responses with expiration and/or validation headers
         (True, 200, {'Last-Modified': self.yesterday}),
         (True, 203, {'Last-Modified': self.yesterday}),
         (True, 300, {'Last-Modified': self.yesterday}),
         (True, 301, {'Last-Modified': self.yesterday}),
         (True, 401, {'Last-Modified': self.yesterday}),
         (True, 404, {'Cache-Control': 'public, max-age=600'}),
         (True, 302, {'Expires': self.tomorrow}),
         (True, 200, {'Etag': 'foo'}),
     ]
     with self._middleware() as mw:
         for idx, (shouldcache, status, headers) in enumerate(responses):
             req0 = Request('http://example-%d.com' % idx)
             res0 = Response(req0.url, status=status, headers=headers)
             res1 = self._process_requestresponse(mw, req0, res0)
             res304 = res0.replace(status=304)
             res2 = self._process_requestresponse(mw, req0, res304 if shouldcache else res0)
             self.assertEqualResponse(res1, res0)
             self.assertEqualResponse(res2, res0)
             resc = mw.storage.retrieve_response(req0)
             if shouldcache:
                 self.assertEqualResponse(resc, res1)
                 self.assertTrue('cached' in res2.flags and res2.status != 304)
             else:
                 self.assertFalse(resc)
                 self.assertNotIn('cached', res2.flags)
Exemple #28
0
 def test_different_request_response_urls(self):
     with self._middleware() as mw:
         req = Request('http://host.com/path')
         res = Response('http://host2.net/test.html', request=req)
         self.assertIs(mw.process_request(req), req)
         mw.process_response(res)
         cached = mw.process_request(req)
         self.assertIsInstance(cached, Response)
         self.assertEqualResponse(res, cached)
         self.assertIn('cached', cached.flags)
    def _getresponse(self, coding):
        if coding not in FORMAT:
            raise ValueError()

        sample_file, content_encoding = FORMAT[coding]
        with open(join(SAMPLE_DIR, sample_file), "rb") as sample:
            body = sample.read()

        headers = {
            "Server": "Yaws/1.49 Yet Another Web Server",
            "Date": "Sun, 08 Mar 2009 00:41:03 GMT",
            "Content-Length": len(body),
            "Content-Type": "text/html",
            "Content-Encoding": content_encoding,
        }

        response = Response("http://github.com/", body=body, headers=headers)
        response.request = Request("http://github.com/", headers={"Accept-Encoding": "gzip,deflate"})
        return response
    def test_redirect_urls(self):
        req1 = Request('http://crawlmitest.org/first')
        resp1 = Response('http://crawlmitest.org/first',
                         headers={'Location': '/redirected'},
                         status=302,
                         request=req1)
        req2 = self.mw.process_response(resp1)
        resp2 = Response('http://crawlmitest.org/redirected',
                         headers={'Location': '/redirected2'},
                         status=302,
                         request=req2)
        req3 = self.mw.process_response(resp2)

        self.assertEqual(req2.url, 'http://crawlmitest.org/redirected')
        self.assertListEqual(req2.history, ['http://crawlmitest.org/first'])
        self.assertEqual(req3.url, 'http://crawlmitest.org/redirected2')
        self.assertListEqual(req3.history, [
            'http://crawlmitest.org/first', 'http://crawlmitest.org/redirected'
        ])
    def test_header(self):
        # absolute url
        req = Request('http://a.com/pom')
        rsp = Response(
            req.url,
            headers={'Link': '<https://b.sk/hello>; rel="canonical"'},
            request=req)
        rsp2 = self.mw.process_response(rsp)
        self.assertIs(rsp, rsp2)
        self.assertEqual(rsp.meta['canonical_url'], 'https://b.sk/hello')

        # relative url
        req = Request('http://a.com/pom')
        rsp = Response(req.url,
                       headers={'Link': '</hello/world>; rel="canonical"'},
                       request=req)
        rsp2 = self.mw.process_response(rsp)
        self.assertIs(rsp, rsp2)
        self.assertEqual(rsp.meta['canonical_url'], 'http://a.com/hello/world')
Exemple #32
0
    def test_stop_engine(self):
        def _stop_engine(response):
            raise StopEngine()

        def _engine_stopped():
            self.assertEqual(len(self.engine.response_queue), 1)

        req1 = Request('http://github.com/', callback=_stop_engine)
        resp1 = Response('', request=req1)
        self.engine.response_queue.push(resp1)
        req2 = Request('http://github.com/')
        resp2 = Response('', request=req2)
        self.engine.response_queue.push(resp2)

        self.engine.signals.connect(_engine_stopped, signal=signals.engine_stopped)
        self.engine.start()
        self.assertTrue(self.engine.running)
        self.clock.pump([self.engine.QUEUE_CHECK_FREQUENCY, 0, 0, 0])
        self.assertFalse(self.engine.running)
    def _getresponse(self, coding):
        if coding not in FORMAT:
            raise ValueError()

        sample_file, content_encoding = FORMAT[coding]
        with open(join(SAMPLE_DIR, sample_file), 'rb') as sample:
            body = sample.read()

        headers = {
            'Server': 'Yaws/1.49 Yet Another Web Server',
            'Date': 'Sun, 08 Mar 2009 00:41:03 GMT',
            'Content-Length': len(body),
            'Content-Type': 'text/html',
            'Content-Encoding': content_encoding,
        }

        response = Response('http://github.com/', body=body, headers=headers)
        response.request = Request('http://github.com/', headers={'Accept-Encoding': 'gzip,deflate'})
        return response
    def _getresponse(self, coding):
        if coding not in FORMAT:
            raise ValueError()

        sample_file, content_encoding = FORMAT[coding]
        with open(join(SAMPLE_DIR, sample_file), 'rb') as sample:
            body = sample.read()

        headers = {
            'Server': 'Yaws/1.49 Yet Another Web Server',
            'Date': 'Sun, 08 Mar 2009 00:41:03 GMT',
            'Content-Length': len(body),
            'Content-Type': 'text/html',
            'Content-Encoding': content_encoding,
        }

        response = Response('http://github.com/', body=body, headers=headers)
        response.request = Request('http://github.com/',
                                   headers={'Accept-Encoding': 'gzip,deflate'})
        return response
 def test_cached_and_stale(self):
     sample_data = [
         (200, {'Date': self.today, 'Expires': self.yesterday}),
         (200, {'Date': self.today, 'Expires': self.yesterday, 'Last-Modified': self.yesterday}),
         (200, {'Expires': self.yesterday}),
         (200, {'Expires': self.yesterday, 'ETag': 'foo'}),
         (200, {'Expires': self.yesterday, 'Last-Modified': self.yesterday}),
         (200, {'Expires': self.tomorrow, 'Age': '86405'}),
         (200, {'Cache-Control': 'max-age=86400', 'Age': '86405'}),
         # no-cache forces expiration, also revalidation if validators exists
         (200, {'Cache-Control': 'no-cache'}),
         (200, {'Cache-Control': 'no-cache', 'ETag': 'foo'}),
         (200, {'Cache-Control': 'no-cache', 'Last-Modified': self.yesterday}),
     ]
     with self._middleware() as mw:
         for idx, (status, headers) in enumerate(sample_data):
             req0 = Request('http://example-%d.com' % idx)
             res0a = Response(req0.url, status=status, headers=headers)
             # cache expired response
             res1 = self._process_requestresponse(mw, req0, res0a)
             self.assertEqualResponse(res1, res0a)
             self.assertNotIn('cached', res1.flags)
             # Same request but as cached response is stale a new response must
             # be returned
             res0b = res0a.replace(body='bar')
             res2 = self._process_requestresponse(mw, req0, res0b)
             self.assertEqualResponse(res2, res0b)
             self.assertNotIn('cached', res2.flags)
             # Previous response expired too, subsequent request to same
             # resource must revalidate and succeed on 304 if validators
             # are present
             if 'ETag' in headers or 'Last-Modified' in headers:
                 res0c = res0b.replace(status=304)
                 res3 = self._process_requestresponse(mw, req0, res0c)
                 self.assertEqualResponse(res3, res0b)
                 self.assertIn('cached', res3.flags)