Example #1
0
    def test_css_links_simple(self):
        css_data = b'''@import url('wow.css');
            body { background: url('cool.png') }
        '''
        reader = CSSReader()
        links = set()

        for link in reader.iter_links(io.BytesIO(css_data),
                                      encoding='ascii',
                                      context=True):
            links.add(link)

        self.assertEqual({('wow.css', 'import'), ('cool.png', 'url')}, links)
Example #2
0
    def test_css_read_links_huge(self):
        css_data = b'\n'.join([
            'url(blah{0});'.format(num).encode('ascii')
            for num in range(200000)
        ])
        reader = CSSReader()

        self.assertGreater(len(css_data), reader.BUFFER_SIZE)

        links = set()

        for link in reader.iter_links(io.BytesIO(css_data), encoding='ascii'):
            links.add(link)

        self.assertEqual(len(links), 200000)
Example #3
0
    def test_css_read_links_huge(self):
        css_data = b'\n'.join(
            [
                'url(blah{0});'.format(num).encode('ascii')
                for num in range(200000)
            ]
        )
        reader = CSSReader()

        self.assertGreater(len(css_data), reader.BUFFER_SIZE)

        links = set()

        for link in reader.iter_links(
                io.BytesIO(css_data), encoding='ascii'):
            links.add(link)

        self.assertEqual(len(links), 200000)
Example #4
0
    def test_css_links_simple(self):
        css_data = b'''@import url('wow.css');
            body { background: url('cool.png') }
        '''
        reader = CSSReader()
        links = set()

        for link in reader.iter_links(
                io.BytesIO(css_data), encoding='ascii', context=True):
            links.add(link)

        self.assertEqual(
            {
                ('wow.css', 'import'),
                ('cool.png', 'url')
            },
            links
        )
Example #5
0
    def _append_filename_extension(self, response):
        '''Append an HTML/CSS file suffix as needed.'''
        if not self._filename:
            return

        if response.request.url_info.scheme not in ('http', 'https'):
            return

        if not re.search(r'\.[hH][tT][mM][lL]?$', self._filename) and \
                HTMLReader.is_response(response):
            self._filename += '.html'
        elif not re.search(r'\.[cC][sS][sS]$', self._filename) and \
                CSSReader.is_response(response):
            self._filename += '.css'
Example #6
0
    def _append_filename_extension(self, response: BaseResponse):
        '''Append an HTML/CSS file suffix as needed.'''
        if not self._filename:
            return

        if response.request.url_info.scheme not in ('http', 'https'):
            return

        if not re.search(r'\.[hH][tT][mM][lL]?$', self._filename) and \
                HTMLReader.is_response(response):
            self._filename += '.html'
        elif not re.search(r'\.[cC][sS][sS]$', self._filename) and \
                CSSReader.is_response(response):
            self._filename += '.css'
Example #7
0
    def test_css_detect(self):
        self.assertTrue(
            CSSReader.is_file(
                io.BytesIO('body { color: white }'.encode('utf-16le'))))
        self.assertFalse(
            CSSReader.is_file(io.BytesIO('hello world!'.encode('utf-16le'))))
        self.assertFalse(CSSReader.is_file(io.BytesIO(b'<html><body>hello')))
        self.assertTrue(
            CSSReader.is_file(io.BytesIO(b'<html><body>hello')) is VeryFalse)
        self.assertTrue(
            CSSReader.is_file(io.BytesIO(b'h1 { background-color: red }')))
        self.assertTrue(CSSReader.is_file(io.BytesIO(b'@import url.css;')))
        self.assertTrue(
            CSSReader.is_url(URLInfo.parse('example.com/index.css')))
        self.assertFalse(
            CSSReader.is_url(URLInfo.parse('example.com/image.jpg')))
        self.assertTrue(CSSReader.is_request(Request('example.com/index.css')))
        self.assertFalse(CSSReader.is_request(
            Request('example.com/image.jpg')))

        response = Response(200, 'OK')
        response.fields['Content-Type'] = 'text/css'
        self.assertTrue(CSSReader.is_response(response))

        response = Response(200, 'OK')
        response.fields['Content-Type'] = 'image/png'
        self.assertFalse(CSSReader.is_response(response))
Example #8
0
    def test_css_detect(self):
        self.assertTrue(CSSReader.is_file(
            io.BytesIO('body { color: white }'.encode('utf-16le'))
        ))
        self.assertFalse(CSSReader.is_file(
            io.BytesIO('hello world!'.encode('utf-16le'))
        ))
        self.assertFalse(CSSReader.is_file(
            io.BytesIO(b'<html><body>hello')
        ))
        self.assertTrue(CSSReader.is_file(
            io.BytesIO(b'<html><body>hello')
        ) is VeryFalse)
        self.assertTrue(CSSReader.is_file(
            io.BytesIO(b'h1 { background-color: red }')
        ))
        self.assertTrue(CSSReader.is_file(
            io.BytesIO(b'@import url.css;')
        ))
        self.assertTrue(
            CSSReader.is_url(URLInfo.parse('example.com/index.css'))
        )
        self.assertFalse(
            CSSReader.is_url(URLInfo.parse('example.com/image.jpg'))
        )
        self.assertTrue(
            CSSReader.is_request(Request('example.com/index.css'))
        )
        self.assertFalse(
            CSSReader.is_request(Request('example.com/image.jpg'))
        )

        response = Response(200, 'OK')
        response.fields['Content-Type'] = 'text/css'
        self.assertTrue(CSSReader.is_response(response))

        response = Response(200, 'OK')
        response.fields['Content-Type'] = 'image/png'
        self.assertFalse(CSSReader.is_response(response))