def test_getEncodingInfo(self): for exp, test in self.fulltests.items(): header, text = test if header: res = encutils.getEncodingInfo(self._fakeRes(header), text) else: res = encutils.getEncodingInfo(text=text) res = (res.encoding, res.mismatch) self.assertEqual(exp, res)
def capture(self, url): """ Capture all stylesheets at given URL's HTML document. Any HTTPError is raised to caller. url to capture CSS from Returns ``cssutils.stylesheets.StyleSheetList``. """ self._log.info('\nCapturing CSS from URL:\n %s\n', url) self._nonparsed = {} self.stylesheetlist = cssutils.stylesheets.StyleSheetList() # used to save inline styles scheme, loc, path, query, fragment = urllib.parse.urlsplit(url) self._filename = os.path.basename(path) # get url content url, res = self._doRequest(url) if not res: sys.exit(1) rawdoc = res.read() self.docencoding = encutils.getEncodingInfo(res, rawdoc, log=self._log).encoding self._log.info('\nUsing Encoding: %s\n', self.docencoding) doctext = rawdoc.decode(self.docencoding) # fill list of stylesheets and list of raw css self._findStyleSheets(url, doctext) return self.stylesheetlist
def capture(self, url, ua=None): """ Capture stylesheets for the given url, any HTTPError is raised to caller. url to capture CSS from ua User-Agent to use for requests Returns StyleSheetList. """ if ua is not None: self._ua = ua self._log.info(u'\nCapturing CSS from URL: %s\n', url) self.stylesheetlist = cssutils.stylesheets.StyleSheetList() # used to save inline styles scheme, loc, path, query, fragment = urlparse.urlsplit(url) self._filename = os.path.basename(path) # get url content res, url = self._doRequest(url) if not res: sys.exit(1) rawdoc = res.read() self.docencoding = encutils.getEncodingInfo( res, rawdoc, log=self._log).encoding self._log.info(u'\nUsing Encoding: %s\n', self.docencoding) doctext = unicode(rawdoc, self.docencoding) # fill list of stylesheets and list of raw css self._nonparsed = {} self._findStyleSheets(url, doctext) return self.stylesheetlist