Beispiel #1
0
 def test_getEncodingInfo(self):
     for exp, test in self.fulltests.items():
         header, text = test
         if header:
             res = encutils.getEncodingInfo(self._fakeRes(header), text)
         else:
             res = encutils.getEncodingInfo(text=text)
         res = (res.encoding, res.mismatch)
         self.assertEqual(exp, res)
Beispiel #2
0
    def capture(self, url):
        """
        Capture all stylesheets at given URL's HTML document.
        Any HTTPError is raised to caller.

        url
            to capture CSS from

        Returns ``cssutils.stylesheets.StyleSheetList``.
        """
        self._log.info('\nCapturing CSS from URL:\n    %s\n', url)
        self._nonparsed = {}
        self.stylesheetlist = cssutils.stylesheets.StyleSheetList()

        # used to save inline styles
        scheme, loc, path, query, fragment = urllib.parse.urlsplit(url)
        self._filename = os.path.basename(path)

        # get url content
        url, res = self._doRequest(url)
        if not res:
            sys.exit(1)

        rawdoc = res.read()

        self.docencoding = encutils.getEncodingInfo(res, rawdoc,
                                                    log=self._log).encoding
        self._log.info('\nUsing Encoding: %s\n', self.docencoding)

        doctext = rawdoc.decode(self.docencoding)

        # fill list of stylesheets and list of raw css
        self._findStyleSheets(url, doctext)

        return self.stylesheetlist
Beispiel #3
0
    def capture(self, url, ua=None):
        """
        Capture stylesheets for the given url, any HTTPError is raised to
        caller.

        url
            to capture CSS from
        ua
            User-Agent to use for requests

        Returns StyleSheetList.
        """
        if ua is not None:
            self._ua = ua

        self._log.info(u'\nCapturing CSS from URL: %s\n', url)
        self.stylesheetlist = cssutils.stylesheets.StyleSheetList()
            
        # used to save inline styles
        scheme, loc, path, query, fragment = urlparse.urlsplit(url)
        self._filename = os.path.basename(path)

        # get url content
        res, url = self._doRequest(url)
        if not res:
            sys.exit(1)
        rawdoc = res.read()

        self.docencoding = encutils.getEncodingInfo(
            res, rawdoc, log=self._log).encoding
        self._log.info(u'\nUsing Encoding: %s\n', self.docencoding)

        doctext = unicode(rawdoc, self.docencoding)

        # fill list of stylesheets and list of raw css
        self._nonparsed = {}
        self._findStyleSheets(url, doctext)

        return self.stylesheetlist