Ejemplo n.º 1
0
    def test_xml_detect(self):
        self.assertTrue(
            XMLDetector.is_file(io.BytesIO(
                '<?xml version='.encode('utf-16le'))))
        self.assertFalse(
            XMLDetector.is_file(
                io.BytesIO('<!DOCTYPE html><html><body>'.encode('utf-16le'))))
        self.assertFalse(XMLDetector.is_file(io.BytesIO(b'<html><body>hello')))
        self.assertTrue(XMLDetector.is_file(io.BytesIO(b'<?xml version')))
        self.assertTrue(
            XMLDetector.is_url(URLInfo.parse('example.com/index.xml')))
        self.assertFalse(
            XMLDetector.is_url(URLInfo.parse('example.com/image.jpg')))
        self.assertTrue(
            XMLDetector.is_request(Request('example.com/index.xml')))
        self.assertFalse(
            XMLDetector.is_request(Request('example.com/image.jpg')))

        response = Response(200, 'OK')
        response.fields['Content-Type'] = 'text/xml'
        self.assertTrue(XMLDetector.is_response(response))

        response = Response(200, 'OK')
        response.fields['Content-Type'] = 'application/xml'
        self.assertTrue(XMLDetector.is_response(response))

        response = Response(200, 'OK')
        response.fields['Content-Type'] = 'image/png'
        self.assertFalse(XMLDetector.is_response(response))
Ejemplo n.º 2
0
    def test_fetch_allow(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com'))

        self.assertEqual(RobotsState.unknown, session._robots_state)

        request = session.next_request
        self.assertTrue(request.url_info.url.endswith('robots.txt'))

        response = Response('HTTP/1.0', 200, 'OK')
        response.body.content_file = io.StringIO('User-agent:*\nAllow: /\n')

        http_client.response = response
        yield session.fetch()
        self.assertEqual(RobotsState.ok, session._robots_state)

        request = session.next_request
        self.assertTrue(request.url_info.url.endswith('/'))

        response = Response('HTTP/1.0', 200, 'OK')
        http_client.response = response
        yield session.fetch()

        self.assertTrue(session.done)
Ejemplo n.º 3
0
    def test_redirect_loop(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com')
        )

        self.assertEqual(RobotsState.unknown, session._robots_state)

        for dummy in range(21):
            request = session.next_request
            self.assertTrue(request.url_info.url.endswith('robots.txt'))

            response = Response('HTTP/1.0', 302, 'See else')
            response.url_info = request.url_info
            response.fields['location'] = '/robots.txt'

            http_client.response = response
            yield session.fetch()

        request = session.next_request
        self.assertTrue(request)

        response = Response('HTTP/1.0', 200, 'OK')

        http_client.response = response
        yield session.fetch()

        self.assertEqual(RobotsState.ok, session._robots_state)

        print(session.next_request)
        self.assertTrue(session.done)
Ejemplo n.º 4
0
    def test_to_dict_body(self):
        request = Request()
        request.body = Body()
        request_dict = request.to_dict()

        self.assertTrue(request_dict['body'])
        request.body.close()

        request = Request()
        request.body = NotImplemented
        request_dict = request.to_dict()

        self.assertFalse(request_dict['body'])

        response = Response()
        response.body = Body()
        response_dict = response.to_dict()

        self.assertTrue(response_dict['body'])
        response.body.close()

        response = Response()
        response.body = NotImplemented
        response_dict = response.to_dict()

        self.assertFalse(response_dict['body'])
Ejemplo n.º 5
0
    def test_html_detect(self):
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO('<html><body>hi</body></html>'.encode('utf-16le'))
        ))
        self.assertFalse(HTMLReader.is_file(
            io.BytesIO('hello world!'.encode('utf-16le'))
        ))
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO(b'<title>hello</title>hi')
        ))
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO(b'<html><body>hello')
        ))
        self.assertTrue(HTMLReader.is_file(
            io.BytesIO(
                b'The document has moved <a href="somewhere.html">here</a>'
            )
        ))
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.htm'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.html'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.dhtm'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.xhtml'))
        )
        self.assertTrue(
            HTMLReader.is_url(URLInfo.parse('example.com/index.xht'))
        )
        self.assertFalse(
            HTMLReader.is_url(URLInfo.parse('example.com/image.jpg'))
        )
        self.assertTrue(
            HTMLReader.is_request(Request.new('example.com/index.html'))
        )
        self.assertFalse(
            HTMLReader.is_request(Request.new('example.com/image.jpg'))
        )

        response = Response('HTTP/1.0', '200', 'OK')
        response.fields['Content-Type'] = 'text/html'
        self.assertTrue(HTMLReader.is_response(response))

        response = Response('HTTP/1.0', '200', 'OK')
        response.fields['Content-Type'] = 'image/png'
        self.assertFalse(HTMLReader.is_response(response))
Ejemplo n.º 6
0
    def test_response_status_codes(self):
        response = Response()
        response.parse(b'HTTP/1.0 0\r\n')
        response.parse(b'\r\n')

        self.assertEqual(0, response.status_code)

        response = Response()
        response.parse(b'HTTP/1.0 999\r\n')
        response.parse(b'\r\n')

        self.assertEqual(999, response.status_code)

        response = Response(0, '')
        self.assertEqual(0, response.status_code)
Ejemplo n.º 7
0
    def test_javascript_heavy_inline_monstrosity(self):
        scraper = JavaScriptScraper()
        request = Request('http://example.com/test.js')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH, 'testing', 'samples',
                                          'twitchplayspokemonfirered.html')
            with open(html_file_path, 'rb') as in_file:
                in_file.seek(0x147)
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertIn(
            'http://cdn.bulbagarden.net/upload/archive/a/a4/'
            '20090718115357%21195Quagsire.png', inline_urls)
        self.assertIn(
            'http://www.google.com/url?q=http%3A%2F%2Fwww.reddit.com%2F'
            'user%2FGoldenSandslash15&sa=D&sntz=1&'
            'usg=AFQjCNElFBxZYdNm5mWoRSncf5tbdIJQ-A', linked_urls)

        print('\n'.join(inline_urls))
        print('\n'.join(linked_urls))
Ejemplo n.º 8
0
    def test_response(self):
        response = Response(200, 'OK')
        response.fields['Cake'] = 'dolphin'

        self.assertEqual((b'HTTP/1.1 200 OK\r\n'
                          b'Cake: dolphin\r\n'
                          b'\r\n'), response.to_bytes())
Ejemplo n.º 9
0
    def test_html_scraper_links_base_href(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, 'OK')

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                'testing', 'samples', 'basehref.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual('utf-8', scrape_info['encoding'])

        self.assertEqual({
            'http://cdn.example.com/stylesheet1.css',
            'http://www.example.com/stylesheet2.css',
            'http://example.com/a/stylesheet3.css',
            'http://example.com/a/dir/image1.png',
            'http://example.com/dir/image2.png',
            'http://example.net/image3.png',
            'http://example.com/dir/image4.png',
            },
            inline_urls
        )
        self.assertEqual({
            'http://example.com/a/'
            },
            linked_urls
        )
Ejemplo n.º 10
0
    def test_server_error(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com')
        )

        self.assertEqual(RobotsState.unknown, session._robots_state)

        for dummy in range(21):
            request = session.next_request
            self.assertTrue(request.url_info.url.endswith('robots.txt'))

            response = Response('HTTP/1.0', 500, 'Opps')

            http_client.response = response
            yield session.fetch()

        request = session.next_request
        self.assertIsNone(request)

        try:
            yield session.fetch()
        except RobotsDenied:
            pass
        else:
            self.fail()

        self.assertTrue(session.done)
Ejemplo n.º 11
0
    def test_xhtml_invalid(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples',
                                          'xhtml_invalid.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual(
            {
                'http://example.com/image.png',
                'http://example.com/script.js',
            },
            inline_urls
        )
        self.assertEqual(
            {
                'http://example.com/link'
            },
            linked_urls
        )
Ejemplo n.º 12
0
    def test_html_not_quite_charset(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples',
                                          'videogame_top.htm')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertIn(
            'http://example.com/copyright_2001_2006_rtype.gif',
            inline_urls
        )
        self.assertIn(
            'http://www.geocities.jp/gamehouse_grindcrusher/',
            linked_urls
        )
Ejemplo n.º 13
0
    def test_html_soup(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()
        response.fields['Refresh'] = 'yes'

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples', 'soup.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual(
            {'http://example.com/ABOUTM~1.JPG'},
            inline_urls
        )
        self.assertEqual(
            {
                'http://example.com/BLOG',
                'http://example.com/web ring/Join.htm',
            },
            linked_urls
        )
Ejemplo n.º 14
0
    def test_html_krokozyabry(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()
        response.fields['content-type'] = 'text/html; charset=KOI8-R'

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples',
                                          'krokozyabry.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual('koi8-r', scrape_result.encoding)

        self.assertEqual(
            set(),
            inline_urls
        )
        self.assertEqual(
            {'http://example.com/Кракозябры'},
            linked_urls
        )
Ejemplo n.º 15
0
    def test_sitemap_scraper_xml(self):
        scraper = SitemapScraper()
        request = Request.new('http://example.com/sitemap.xml')
        response = Response('HTTP/1.0', 200, 'OK')

        with wpull.util.reset_file_offset(response.body.content_file):
            response.body.content_file.write(
                b'''<?xml version="1.0" encoding="UTF-8"?>
                <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                   <url>
                      <loc>http://www.example.com/</loc>
                      <lastmod>2005-01-01</lastmod>
                      <changefreq>monthly</changefreq>
                      <priority>0.8</priority>
                   </url>
                </urlset>
            '''
            )

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual({
            'http://www.example.com/',
            },
            linked_urls
        )
        self.assertFalse(inline_urls)
Ejemplo n.º 16
0
    def test_sitemap_scraper_xml_index(self):
        scraper = SitemapScraper(self.get_html_parser())
        request = Request('http://example.com/sitemap.xml')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            response.body.write(b'''<?xml version="1.0" encoding="UTF-8"?>
                <sitemapindex
                xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                   <sitemap>
                      <loc>http://www.example.com/sitemap1.xml.gz</loc>
                      <lastmod>2004-10-01T18:23:17+00:00</lastmod>
                   </sitemap>
                </sitemapindex>
            ''')

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual({
            'http://www.example.com/sitemap1.xml.gz',
        }, linked_urls)
        self.assertFalse(inline_urls)
Ejemplo n.º 17
0
    def test_rss_as_html(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['content-type'] = 'application/rss+xml'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                'testing', 'samples', 'rss.xml')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)

        self.assertTrue(scrape_info)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']
        self.assertFalse(
            inline_urls
        )
        self.assertEqual(
            {
                'http://www.someexamplerssdomain.com/main.html',
                'http://www.wikipedia.org/'
            },
            linked_urls
        )
Ejemplo n.º 18
0
    def test_sitemap_scraper_xml_index(self):
        scraper = SitemapScraper()
        request = Request.new('http://example.com/sitemap.xml')
        response = Response('HTTP/1.0', 200, 'OK')

        with wpull.util.reset_file_offset(response.body.content_file):
            response.body.content_file.write(
                b'''<?xml version="1.0" encoding="UTF-8"?>
                <sitemapindex
                xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                   <sitemap>
                      <loc>http://www.example.com/sitemap1.xml.gz</loc>
                      <lastmod>2004-10-01T18:23:17+00:00</lastmod>
                   </sitemap>
                </sitemapindex>
            '''
            )

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual({
            'http://www.example.com/sitemap1.xml.gz',
            },
            linked_urls
        )
        self.assertFalse(inline_urls)
Ejemplo n.º 19
0
    def get_http_header(self):
        '''Return the HTTP header.

        It only attempts to read the first 4 KiB of the payload.

        Returns:
            Response, None: Returns an instance of
            :class:`.http.request.Response` or None.
        '''
        with wpull.util.reset_file_offset(self.block_file):
            data = self.block_file.read(4096)

        match = re.match(br'(.*?\r?\n\r?\n)', data)

        if not match:
            return

        status_line, dummy, field_str = match.group(1).partition(b'\n')

        try:
            version, code, reason = Response.parse_status_line(status_line)
        except ValueError:
            return

        response = Response(version, code, reason)

        try:
            response.fields.parse(field_str, strict=False)
        except ValueError:
            return

        return response
Ejemplo n.º 20
0
    def test_xhtml_invalid(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                'testing', 'samples', 'xhtml_invalid.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual(
            {
                'http://example.com/image.png',
                'http://example.com/script.js',
            },
            inline_urls
        )
        self.assertEqual(
            {
                'http://example.com/link'
            },
            linked_urls
        )
Ejemplo n.º 21
0
    def test_html_krokozyabry(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['content-type'] = 'text/html; charset=KOI8-R'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                'testing', 'samples', 'krokozyabry.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual('koi8-r', scrape_info['encoding'])

        self.assertEqual(
            set(),
            inline_urls
        )
        self.assertEqual(
            {'http://example.com/Кракозябры'},
            linked_urls
        )
Ejemplo n.º 22
0
    def test_html_soup(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, '')
        response.fields['Refresh'] = 'yes'

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                'testing', 'samples', 'soup.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertEqual(
            {'http://example.com/ABOUTM~1.JPG'},
            inline_urls
        )
        self.assertEqual(
            {
                'http://example.com/BLOG',
                'http://example.com/web ring/Join.htm',
            },
            linked_urls
        )
Ejemplo n.º 23
0
    def test_sitemap_scraper_xml(self):
        scraper = SitemapScraper(self.get_html_parser())
        request = Request('http://example.com/sitemap.xml')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            response.body.write(b'''<?xml version="1.0" encoding="UTF-8"?>
                <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                   <url>
                      <loc>http://www.example.com/</loc>
                      <lastmod>2005-01-01</lastmod>
                      <changefreq>monthly</changefreq>
                      <priority>0.8</priority>
                   </url>
                </urlset>
            ''')

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual({
            'http://www.example.com/',
        }, linked_urls)
        self.assertFalse(inline_urls)
Ejemplo n.º 24
0
    def test_rss_as_html(self):
        element_walker = ElementWalker(
            css_scraper=CSSScraper(), javascript_scraper=JavaScriptScraper())
        scraper = HTMLScraper(self.get_html_parser(), element_walker)
        request = Request('http://example.com/')
        response = Response(200, '')
        response.body = Body()
        response.fields['content-type'] = 'application/rss+xml'

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples', 'rss.xml')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)

        self.assertTrue(scrape_result)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links
        self.assertFalse(
            inline_urls
        )
        self.assertEqual(
            {
                'http://www.someexamplerssdomain.com/main.html',
                'http://www.wikipedia.org/'
            },
            linked_urls
        )
Ejemplo n.º 25
0
    def test_javascript_heavy_inline_monstrosity(self):
        scraper = HTMLScraper()
        request = Request.new('http://example.com/')
        response = Response('HTTP/1.0', 200, 'OK')

        with wpull.util.reset_file_offset(response.body.content_file):
            html_file_path = os.path.join(os.path.dirname(__file__),
                'testing', 'samples', 'twitchplayspokemonfirered.html')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body.content_file)

        scrape_info = scraper.scrape(request, response)
        inline_urls = scrape_info['inline_urls']
        linked_urls = scrape_info['linked_urls']

        self.assertIn(
            'http://cdn.bulbagarden.net/upload/archive/a/a4/'
                '20090718115357%21195Quagsire.png',
            inline_urls
        )
        self.assertIn(
            'http://www.google.com/url?q=http%3A%2F%2Fwww.reddit.com%2F'
                'user%2FGoldenSandslash15&sa=D&sntz=1&'
                'usg=AFQjCNElFBxZYdNm5mWoRSncf5tbdIJQ-A',
            linked_urls
        )
Ejemplo n.º 26
0
    def test_http_response(self):
        response = Response(200, 'OK', version='HTTP/1.0')
        response.fields['hello'] = 'world'

        new_response = HTTPResponseInfoWrapper(response)
        info = new_response.info()

        self.assertEqual('world', info.get('hello'))
Ejemplo n.º 27
0
    def test_response_empty_reason_line(self):
        response = Response()
        response.parse(b'HTTP/1.0 200\r\n')
        response.parse(b'Cake: dolphin\r\n')
        response.parse(b'\r\n')

        self.assertEqual(200, response.status_code)
        self.assertEqual('', response.reason)
        self.assertEqual('dolphin', response.fields['Cake'])
Ejemplo n.º 28
0
    def test_redirect_tracker(self):
        tracker = RedirectTracker(5)

        self.assertFalse(tracker.is_redirect())
        self.assertFalse(tracker.is_repeat())
        self.assertFalse(tracker.exceeded())
        self.assertFalse(tracker.next_location(raw=True))
        self.assertEqual(0, tracker.count())

        response = Response(200, 'OK')

        tracker.load(response)

        self.assertFalse(tracker.is_redirect())
        self.assertFalse(tracker.is_repeat())
        self.assertFalse(tracker.exceeded())
        self.assertFalse(tracker.next_location())
        self.assertEqual(0, tracker.count())

        response = Response(303, 'See other')
        response.fields['location'] = '/test'

        tracker.load(response)

        self.assertTrue(tracker.is_redirect())
        self.assertFalse(tracker.is_repeat())
        self.assertFalse(tracker.exceeded())
        self.assertEqual('/test', tracker.next_location(raw=True))
        self.assertEqual(1, tracker.count())

        response = Response(307, 'Temporary redirect')
        response.fields['location'] = '/test'

        tracker.load(response)
        tracker.load(response)
        tracker.load(response)
        tracker.load(response)
        tracker.load(response)

        self.assertTrue(tracker.is_redirect())
        self.assertTrue(tracker.is_repeat())
        self.assertTrue(tracker.exceeded())
        self.assertEqual('/test', tracker.next_location(raw=True))
        self.assertEqual(6, tracker.count())
Ejemplo n.º 29
0
    def test_redirect_tracker(self):
        tracker = RedirectTracker(5)

        self.assertFalse(tracker.is_redirect())
        self.assertFalse(tracker.is_repeat())
        self.assertFalse(tracker.exceeded())
        self.assertFalse(tracker.next_location(raw=True))
        self.assertEqual(0, tracker.count())

        response = Response('HTTP/1.1', 200, '')

        tracker.load(response)

        self.assertFalse(tracker.is_redirect())
        self.assertFalse(tracker.is_repeat())
        self.assertFalse(tracker.exceeded())
        self.assertFalse(tracker.next_location())
        self.assertEqual(0, tracker.count())

        response = Response('HTTP/1.1', 303, '')
        response.fields['location'] = '/test'

        tracker.load(response)

        self.assertTrue(tracker.is_redirect())
        self.assertFalse(tracker.is_repeat())
        self.assertFalse(tracker.exceeded())
        self.assertEqual('/test', tracker.next_location(raw=True))
        self.assertEqual(1, tracker.count())

        response = Response('HTTP/1.1', 307, '')
        response.fields['location'] = '/test'

        tracker.load(response)
        tracker.load(response)
        tracker.load(response)
        tracker.load(response)
        tracker.load(response)

        self.assertTrue(tracker.is_redirect())
        self.assertTrue(tracker.is_repeat())
        self.assertTrue(tracker.exceeded())
        self.assertEqual('/test', tracker.next_location(raw=True))
        self.assertEqual(6, tracker.count())
Ejemplo n.º 30
0
        def factory(*args, **kwargs):
            # TODO: Response should be dependency injected
            response = Response(*args, **kwargs)
            root = self._processor.root_path
            response.body.content_file = Body.new_temp_file(root)

            if self._file_writer_session:
                self._file_writer_session.process_response(response)

            return response