def test_styleSheet(self): "CSSImportRule.styleSheet" def fetcher(url): if url == "/root/level1/anything.css": return None, '@import "level2/css.css" "title2";' else: return None, 'a { color: red }' parser = css_parser.CSSParser(fetcher=fetcher) sheet = parser.parseString('''@charset "ascii"; @import "level1/anything.css" tv "title";''', href='/root/') self.assertEqual(sheet.href, '/root/') ir = sheet.cssRules[1] self.assertEqual(ir.href, 'level1/anything.css') self.assertEqual(ir.styleSheet.href, '/root/level1/anything.css') # inherits ascii as no self charset is set self.assertEqual(ir.styleSheet.encoding, 'ascii') self.assertEqual(ir.styleSheet.ownerRule, ir) self.assertEqual(ir.styleSheet.media.mediaText, 'tv') self.assertEqual(ir.styleSheet.parentStyleSheet, None) # sheet self.assertEqual(ir.styleSheet.title, 'title') self.assertEqual(ir.styleSheet.cssText, '@charset "ascii";\n@import "level2/css.css" "title2";'.encode()) ir2 = ir.styleSheet.cssRules[1] self.assertEqual(ir2.href, 'level2/css.css') self.assertEqual(ir2.styleSheet.href, '/root/level1/level2/css.css') # inherits ascii as no self charset is set self.assertEqual(ir2.styleSheet.encoding, 'ascii') self.assertEqual(ir2.styleSheet.ownerRule, ir2) self.assertEqual(ir2.styleSheet.media.mediaText, 'all') self.assertEqual(ir2.styleSheet.parentStyleSheet, None) # ir.styleSheet self.assertEqual(ir2.styleSheet.title, 'title2') self.assertEqual(ir2.styleSheet.cssText, '@charset "ascii";\na {\n color: red\n }'.encode()) sheet = css_parser.parseString('@import "CANNOT-FIND.css";') ir = sheet.cssRules[0] self.assertEqual(ir.href, "CANNOT-FIND.css") self.assertEqual(type(ir.styleSheet), css_parser.css.CSSStyleSheet) def fetcher(url): if url.endswith('level1.css'): return None, '@charset "ascii"; @import "level2.css";'.encode() else: return None, 'a { color: red }'.encode() parser = css_parser.CSSParser(fetcher=fetcher) sheet = parser.parseString('@charset "iso-8859-1";@import "level1.css";') self.assertEqual(sheet.encoding, 'iso-8859-1') sheet = sheet.cssRules[1].styleSheet self.assertEqual(sheet.encoding, 'ascii') sheet = sheet.cssRules[1].styleSheet self.assertEqual(sheet.encoding, 'ascii')
def test_parseComments(self): "css_parser.CSSParser(parseComments=False)" css = '/*1*/ a { color: /*2*/ red; }' p = css_parser.CSSParser(parseComments=False) self.assertEqual( p.parseString(css).cssText, 'a {\n color: red\n }'.encode()) p = css_parser.CSSParser(parseComments=True) self.assertEqual( p.parseString(css).cssText, '/*1*/\na {\n color: /*2*/ red\n }'.encode())
def __init__(self, accept_invalid_tokens=True) -> None: self.cssparser = css_parser.CSSParser(raiseExceptions=True, validate=False) if accept_invalid_tokens: self.ident_token = self.simpler_ident_token else: self.ident_token = self.full_ident_token
def __init__(self, ua=None, log=None, defaultloglevel=logging.INFO): """ initialize a new Capture object ua init User-Agent to use for requests log supply a log object which is used instead of the default log which writes to sys.stderr defaultloglevel constant of logging package which defines the level of the default log if no explicit log given """ self._ua = ua if log: self._log = log else: self._log = logging.getLogger('CSSCapture') hdlr = logging.StreamHandler(sys.stderr) formatter = logging.Formatter('%(message)s') hdlr.setFormatter(formatter) self._log.addHandler(hdlr) self._log.setLevel(defaultloglevel) self._log.debug('Using default log') self._htmlparser = CSSCaptureHTMLParser() self._cssparser = css_parser.CSSParser(log=self._log)
def do_raise_p(self, tests, debug=False, raising=True): # parses with self.p and expects raise p = css_parser.CSSParser(raiseExceptions=raising) for test, expected in tests.items(): if debug: print(('"%s"' % test)) self.assertRaises(expected, p.parseString, test)
def test_import_from_above(self): def fetch(url): self.assertEqual(url, '../test2.css') return None, '' p = css_parser.CSSParser(fetcher=fetch) s = p.parseString( "@import url('../test2.css'); a { background-image: url(../test.jpg); }", href='test.css') self.assertEqual( s.cssRules[1].style.getPropertyCSSValue('background-image') [0].absoluteUri, '../test.jpg') def fetch2(url): self.assertEqual(url, 'a/test2.css') return None, '' p = css_parser.CSSParser(fetcher=fetch2) s = p.parseString( "@import url('../test2.css'); a { background-image: url(../test.jpg); }", href='a/b/test.css') self.assertEqual( s.cssRules[1].style.getPropertyCSSValue('background-image') [0].absoluteUri, 'a/test.jpg') results = [('a/test.css', '@import url(../test.css);'), ('test.css', 'p { background-image: url(a/test.jpg) }')] def fetch3(url): e, text = results.pop(0) self.assertEqual(e, url) return None, text p = css_parser.CSSParser(fetcher=fetch3) s = p.parseString( "@import url('../test.css'); a { background-image: url(../test.jpg); }", href='a/b/test.css') style = s.cssRules[0].styleSheet.cssRules[0].styleSheet.cssRules[ 0].style self.assertEqual( style.getPropertyCSSValue('background-image')[0].absoluteUri, 'a/test.jpg')
def check_css_parsing(name, raw, line_offset=0, is_declaration=False): log = ErrorHandler(name) parser = css_parser.CSSParser(fetcher=lambda x: (None, None), log=log) if is_declaration: parser.parseStyle(raw, validate=True) else: try: parser.parseString(raw, validate=True) except UnicodeDecodeError: return [DecodeError(name)] for err in log.errors: err.line += line_offset return log.errors
def csscombine(path=None, url=None, cssText=None, href=None, sourceencoding=None, targetencoding=None, minify=True, resolveVariables=True): """Combine sheets referred to by @import rules in given CSS proxy sheet into a single new sheet. :returns: combined cssText, normal or minified :Parameters: `path` or `url` or `cssText` + `href` path or URL to a CSSStyleSheet or a cssText of a sheet which imports other sheets which are then combined into one sheet. `cssText` normally needs `href` to be able to resolve relative imports. `sourceencoding` = 'utf-8' explicit encoding of the source proxysheet `targetencoding` encoding of the combined stylesheet `minify` = True defines if the combined sheet should be minified, in this case comments are not parsed at all! `resolveVariables` = True defines if variables in combined sheet should be resolved """ css_parser.log.info('Combining files from %r' % url, neverraise=True) if sourceencoding is not None: css_parser.log.info('Using source encoding %r' % sourceencoding, neverraise=True) parser = css_parser.CSSParser(parseComments=not minify) if path and not cssText: src = parser.parseFile(path, encoding=sourceencoding) elif url: src = parser.parseUrl(url, encoding=sourceencoding) elif cssText: src = parser.parseString(cssText, href=href, encoding=sourceencoding) else: sys.exit('Path or URL must be given') result = css_parser.resolveImports(src) result.encoding = targetencoding css_parser.log.info('Using target encoding: %r' % targetencoding, neverraise=True) oldser = css_parser.ser css_parser.setSerializer(css_parser.serialize.CSSSerializer()) if minify: css_parser.ser.prefs.useMinified() css_parser.ser.prefs.resolveVariables = resolveVariables cssText = result.cssText css_parser.setSerializer(oldser) return cssText
def test_init(self): "CSSParser.__init__()" self.assertEqual(True, css_parser.log.raiseExceptions) # also the default: css_parser.log.raiseExceptions = True # default non raising parser p = css_parser.CSSParser() s = p.parseString('$') self.assertEqual(s.cssText, ''.encode()) # explicit raiseExceptions=False p = css_parser.CSSParser(raiseExceptions=False) s = p.parseString('$') self.assertEqual(s.cssText, ''.encode()) # working with sheet does raise though! self.assertRaises(xml.dom.DOMException, s.__setattr__, 'cssText', '$') # ---- # raiseExceptions=True p = css_parser.CSSParser(raiseExceptions=True) self.assertRaises(xml.dom.SyntaxErr, p.parseString, '$') # working with a sheet does raise too s = css_parser.css.CSSStyleSheet() self.assertRaises(xml.dom.DOMException, s.__setattr__, 'cssText', '$') # RESET css_parser.log.raiseExceptions css_parser.log.raiseExceptions = False s = css_parser.css.CSSStyleSheet() # does not raise! s.__setattr__('cssText', '$') self.assertEqual(s.cssText, ''.encode())
def test_import_from_above(self): def fetch(url): self.assertEqual(url, '../test2.css') return None, '' p = css_parser.CSSParser(fetcher=fetch) s = p.parseString( "@import url('../test2.css'); a { background-image: url(../test.jpg); }", href='test.css') self.assertEqual( s.cssRules[1].style.getPropertyCSSValue('background-image') [0].absoluteUri, '../test.jpg') def fetch2(url): self.assertEqual(url, 'a/test2.css') return None, '' p = css_parser.CSSParser(fetcher=fetch2) s = p.parseString( "@import url('../test2.css'); a { background-image: url(../test.jpg); }", href='a/b/test.css') self.assertEqual( s.cssRules[1].style.getPropertyCSSValue('background-image') [0].absoluteUri, 'a/test.jpg')
def do_equal_p(self, tests, att='cssText', debug=False, raising=True): """ if raising self.p is used for parsing, else self.pf """ p = css_parser.CSSParser(raiseExceptions=raising) # parses with self.p and checks att of result for test, expected in tests.items(): if debug: print(('"%s"' % test)) s = p.parseString(test) if expected is None: expected = test ans = s.__getattribute__(att) if isinstance(ans, bytes): ans = ans.decode('utf-8') self.assertEqual(expected, ans)
def loads(styles: str) -> MutableMapping[str, MutableMapping[str, Any]]: r""" Parse a style sheet and return its dictionary representation. .. versionadded:: 0.2.0 :param styles: :return: The style sheet as a dictionary. """ parser = css_parser.CSSParser(validate=False) stylesheet: css_parser.css.CSSStyleSheet = parser.parseString(styles) styles_dict: MutableMapping[str, MutableMapping[str, Any]] = {} def parse_style( style: css_parser.css.CSSStyleDeclaration ) -> MutableMapping[str, Property]: style_dict: Dict[str, Property] = {} prop: css_parser.css.Property for prop in style.children(): if prop.priority: style_dict[prop.name] = (prop.value, prop.priority) else: style_dict[prop.name] = prop.value return style_dict rule: css_parser.css.CSSRule for rule in stylesheet.cssRules: if isinstance(rule, css_parser.css.CSSStyleRule): styles_dict[rule.selectorText] = parse_style(rule.style) elif isinstance(rule, css_parser.css.CSSMediaRule): styles_dict[f"@media {rule.media.mediaText}"] = {} for child in rule.cssRules: styles_dict[f"@media {rule.media.mediaText}"][ child.selectorText] = parse_style(child.style) else: raise NotImplementedError(rule) return styles_dict
def __call__(self, oeb, log, opts): self.oeb, self.log, self.opts = oeb, log, opts self.sheet_cache = {} self.find_style_rules() self.find_embedded_fonts() self.parser = css_parser.CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css')) self.warned = set() self.warned2 = set() self.newly_embedded_fonts = set() for item in oeb.spine: if not hasattr(item.data, 'xpath'): continue sheets = [] for href in XPath('//h:link[@href and @type="text/css"]/@href')(item.data): sheet = self.oeb.manifest.hrefs.get(item.abshref(href), None) if sheet is not None: sheets.append(sheet) if sheets: self.process_item(item, sheets)
def test_hrefFound(self): "CSSImportRule.hrefFound" def fetcher(url): if url == 'http://example.com/yes': return None, '/**/' else: return None, None parser = css_parser.CSSParser(fetcher=fetcher) sheet = parser.parseString('@import "http://example.com/yes" "name"') r = sheet.cssRules[0] self.assertEqual('/**/'.encode(), r.styleSheet.cssText) self.assertEqual(True, r.hrefFound) self.assertEqual('name', r.name) r.cssText = '@import url(http://example.com/none) "name2";' self.assertEqual(''.encode(), r.styleSheet.cssText) self.assertEqual(False, r.hrefFound) self.assertEqual('name2', r.name) sheet.cssText = '@import url(http://example.com/none);' self.assertNotEqual(r, sheet.cssRules[0])
def reformat_css(css_string, useoneline): new_css_string = "" css_errors = "" css_warnings = "" prefs = get_prefs() if useoneline: prefs.indent = '' prefs.lineSeparator = '' prefs.listItemSpacer = ' ' prefs.paranthesisSpacer = '' prefs.propertyNameSpacer = '' prefs.selectorCombinatorSpacer = '' prefs.spacer = ' ' prefs.validOnly = False prefs.linesAfterRules = 1 * '\n' css_parser.setSerializer(MyCSSSerializer(prefs)) aparser = css_parser.CSSParser(raiseExceptions=True, validate=False, fetcher=nofetch) try: parsed_css = aparser.parseString(css_string) except Exception as E: # css_parser.xml.dom.HierarchyRequestErr as E: # parsing error - make no changes css_errors = str(E) new_css_string = css_string else: # 0 means UNKNOWN_RULE, as from cssparser.css.cssrule.CSSRule for unknown_rule in parsed_css.cssRules.rulesOfType(0): line = css_string[:css_string.find(unknown_rule.atkeyword)].count( '\n') + 1 css_warnings += "Unknown rule: " + unknown_rule.atkeyword + " at line: " + line + '\n' # we want unicode not a byte string here new_css_string = parsed_css.cssText.decode('utf-8', errors='replace') return (new_css_string, css_errors, css_warnings)
def test_validate(self): """CSSParser(validate)""" style = 'color: red' t = 'a { %s }' % style # helper s = css_parser.parseString(t) self.assertEqual(s.validating, True) s = css_parser.parseString(t, validate=False) self.assertEqual(s.validating, False) s = css_parser.parseString(t, validate=True) self.assertEqual(s.validating, True) d = css_parser.parseStyle(style) self.assertEqual(d.validating, True) d = css_parser.parseStyle(style, validate=True) self.assertEqual(d.validating, True) d = css_parser.parseStyle(style, validate=False) self.assertEqual(d.validating, False) # parser p = css_parser.CSSParser() s = p.parseString(t) self.assertEqual(s.validating, True) s = p.parseString(t, validate=False) self.assertEqual(s.validating, False) s = p.parseString(t, validate=True) self.assertEqual(s.validating, True) d = p.parseStyle(style) self.assertEqual(d.validating, True) p = css_parser.CSSParser(validate=True) s = p.parseString(t) self.assertEqual(s.validating, True) s = p.parseString(t, validate=False) self.assertEqual(s.validating, False) s = p.parseString(t, validate=True) self.assertEqual(s.validating, True) d = p.parseStyle(style) self.assertEqual(d.validating, True) p = css_parser.CSSParser(validate=False) s = p.parseString(t) self.assertEqual(s.validating, False) s = p.parseString(t, validate=False) self.assertEqual(s.validating, False) s = p.parseString(t, validate=True) self.assertEqual(s.validating, True) d = p.parseStyle(style) self.assertEqual(d.validating, False) # url p = css_parser.CSSParser(validate=False) p.setFetcher(self._make_fetcher('utf-8', t)) u = 'url' s = p.parseUrl(u) self.assertEqual(s.validating, False) s = p.parseUrl(u, validate=False) self.assertEqual(s.validating, False) s = p.parseUrl(u, validate=True) self.assertEqual(s.validating, True)
def convert(self, stream, options, file_ext, log, accelerators): from lxml import etree from calibre.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER from calibre.ebooks.chardet import xml_to_unicode self.log = log log.debug('Parsing XML...') raw = get_fb2_data(stream)[0] raw = raw.replace(b'\0', b'') raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)[0] try: doc = etree.fromstring(raw) except etree.XMLSyntaxError: try: doc = etree.fromstring(raw, parser=RECOVER_PARSER) if doc is None: raise Exception('parse failed') except: doc = etree.fromstring(raw.replace('& ', '&'), parser=RECOVER_PARSER) if doc is None: raise ValueError('The FB2 file is not valid XML') doc = ensure_namespace(doc) try: fb_ns = doc.nsmap[doc.prefix] except Exception: fb_ns = FB2NS NAMESPACES = {'f': fb_ns, 'l': XLINK_NS} stylesheets = doc.xpath( '//*[local-name() = "stylesheet" and @type="text/css"]') css = '' for s in stylesheets: css += etree.tostring( s, encoding=unicode_type, method='text', with_tail=False) + '\n\n' if css: import css_parser, logging parser = css_parser.CSSParser(fetcher=None, log=logging.getLogger('calibre.css')) XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS text = XHTML_CSS_NAMESPACE + css log.debug('Parsing stylesheet...') stylesheet = parser.parseString(text) stylesheet.namespaces['h'] = XHTML_NS css = stylesheet.cssText if isinstance(css, bytes): css = css.decode('utf-8', 'replace') css = css.replace('h|style', 'h|span') css = re.sub(r'name\s*=\s*', 'class=', css) self.extract_embedded_content(doc) log.debug('Converting XML to HTML...') ss = open(P('templates/fb2.xsl'), 'rb').read() ss = ss.replace("__FB_NS__", fb_ns) if options.no_inline_fb2_toc: log('Disabling generation of inline FB2 TOC') ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->', re.DOTALL).sub('', ss) styledoc = etree.fromstring(ss) transform = etree.XSLT(styledoc) result = transform(doc) # Handle links of type note and cite notes = { a.get('href')[1:]: a for a in result.xpath('//a[@link_note and @href]') if a.get('href').startswith('#') } cites = { a.get('link_cite'): a for a in result.xpath('//a[@link_cite]') if not a.get('href', '') } all_ids = {x for x in result.xpath('//*/@id')} for cite, a in iteritems(cites): note = notes.get(cite, None) if note: c = 1 while 'cite%d' % c in all_ids: c += 1 if not note.get('id', None): note.set('id', 'cite%d' % c) all_ids.add(note.get('id')) a.set('href', '#%s' % note.get('id')) for x in result.xpath('//*[@link_note or @link_cite]'): x.attrib.pop('link_note', None) x.attrib.pop('link_cite', None) for img in result.xpath('//img[@src]'): src = img.get('src') img.set('src', self.binary_map.get(src, src)) index = transform.tostring(result) open(u'index.xhtml', 'wb').write(index) open(u'inline-styles.css', 'wb').write(css) stream.seek(0) mi = get_metadata(stream, 'fb2') if not mi.title: mi.title = _('Unknown') if not mi.authors: mi.authors = [_('Unknown')] cpath = None if mi.cover_data and mi.cover_data[1]: with open(u'fb2_cover_calibre_mi.jpg', 'wb') as f: f.write(mi.cover_data[1]) cpath = os.path.abspath(u'fb2_cover_calibre_mi.jpg') else: for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES): href = img.get('{%s}href' % XLINK_NS, img.get('href', None)) if href is not None: if href.startswith('#'): href = href[1:] cpath = os.path.abspath(href) break opf = OPFCreator(getcwd(), mi) entries = [(f2, guess_type(f2)[0]) for f2 in os.listdir(u'.')] opf.create_manifest(entries) opf.create_spine([u'index.xhtml']) if cpath: opf.guide.set_cover(cpath) with open(u'metadata.opf', 'wb') as f: opf.render(f) return os.path.join(getcwd(), u'metadata.opf')
def test_imports(self): "CSSVariables imports" def fetcher(url): url = url.replace('\\', '/') url = url[url.rfind('/') + 1:] return (None, { '3.css': ''' @variables { over3-2-1-0: 3; over3-2-1: 3; over3-2: 3; over3-2-0: 3; over3-1: 3; over3-1-0: 3; over3-0: 3; local3: 3; } ''', '2.css': ''' @variables { over3-2-1-0: 2; over3-2-1: 2; over3-2-0: 2; over3-2: 2; over2-1: 2; over2-1-0: 2; over2-0: 2; local2: 2; } ''', '1.css': ''' @import "3.css"; @import "2.css"; @variables { over3-2-1-0: 1; over3-2-1: 1; over3-1: 1; over3-1-0: 1; over2-1: 1; over2-1-0: 1; over1-0: 1; local1: 1; } ''' }[url]) css = ''' @import "1.css"; @variables { over3-2-1-0: 0; over3-2-0: 0; over3-1-0: 0; over2-1-0: 0; over3-0: 0; over2-0: 0; over1-0: 0; local0: 0; } a { local0: var(local0); local1: var(local1); local2: var(local2); local3: var(local3); over1-0: var(over1-0); over2-0: var(over2-0); over3-0: var(over3-0); over2-1: var(over2-1); over3-1: var(over3-1); over3-2: var(over3-2); over2-1-0: var(over2-1-0); over3-2-0: var(over3-2-0); over3-2-1: var(over3-2-1); over3-2-1-0: var(over3-2-1-0); } ''' p = css_parser.CSSParser(fetcher=fetcher) s = p.parseString(css) # only these in rule of this sheet self.assertEqual(s.cssRules[1].variables.length, 8) # but all vars in s available! self.assertEqual(s.variables.length, 15) self.assertEqual([ 'local0', 'local1', 'local2', 'local3', 'over1-0', 'over2-0', 'over2-1', 'over2-1-0', 'over3-0', 'over3-1', 'over3-1-0', 'over3-2', 'over3-2-0', 'over3-2-1', 'over3-2-1-0' ], sorted(s.variables.keys())) # test with variables rule css_parser.ser.prefs.resolveVariables = False self.assertEqual( s.cssText, '''@import "1.css"; @variables { over3-2-1-0: 0; over3-2-0: 0; over3-1-0: 0; over2-1-0: 0; over3-0: 0; over2-0: 0; over1-0: 0; local0: 0 } a { local0: var(local0); local1: var(local1); local2: var(local2); local3: var(local3); over1-0: var(over1-0); over2-0: var(over2-0); over3-0: var(over3-0); over2-1: var(over2-1); over3-1: var(over3-1); over3-2: var(over3-2); over2-1-0: var(over2-1-0); over3-2-0: var(over3-2-0); over3-2-1: var(over3-2-1); over3-2-1-0: var(over3-2-1-0) }'''.encode()) # test with resolved vars css_parser.ser.prefs.resolveVariables = True self.assertEqual( s.cssText, '''@import "1.css"; a { local0: 0; local1: 1; local2: 2; local3: 3; over1-0: 0; over2-0: 0; over3-0: 0; over2-1: 1; over3-1: 1; over3-2: 2; over2-1-0: 0; over3-2-0: 0; over3-2-1: 1; over3-2-1-0: 0 }'''.encode()) s = css_parser.resolveImports(s) self.assertEqual( s.cssText, '''/* START @import "1.css" */ /* START @import "3.css" */ /* START @import "2.css" */ a { local0: 0; local1: 1; local2: 2; local3: 3; over1-0: 0; over2-0: 0; over3-0: 0; over2-1: 1; over3-1: 1; over3-2: 2; over2-1-0: 0; over3-2-0: 0; over3-2-1: 1; over3-2-1-0: 0 }'''.encode())
def test_parseUrl(self): "CSSParser.parseUrl()" parser = css_parser.CSSParser() with self.patch_default_fetcher((None, '')): sheet = parser.parseUrl('http://example.com', media='tv,print', title='test') self.assertEqual(sheet.href, 'http://example.com') self.assertEqual(sheet.encoding, 'utf-8') self.assertEqual(sheet.media.mediaText, 'tv, print') self.assertEqual(sheet.title, 'test') # URL and content tests tests = { # (url, content): isSheet, encoding, cssText ('', None): (False, None, None), ('1', None): (False, None, None), ('mailto:[email protected]', None): (False, None, None), ('http://cthedot.de/test.css', None): (False, None, None), ('http://cthedot.de/test.css', ''): (True, 'utf-8', ''), ('http://cthedot.de/test.css', 'a'): (True, 'utf-8', ''), ('http://cthedot.de/test.css', 'a {color: red}'): (True, 'utf-8', 'a {\n color: red\n }'), ('http://cthedot.de/test.css', 'a {color: red}'): (True, 'utf-8', 'a {\n color: red\n }'), ('http://cthedot.de/test.css', '@charset "ascii";a {color: red}'): (True, 'ascii', '@charset "ascii";\na {\n color: red\n }'), } override = 'iso-8859-1' overrideprefix = '@charset "iso-8859-1";' httpencoding = None for (url, content), (isSheet, expencoding, cssText) in tests.items(): parser.setFetcher(self._make_fetcher(httpencoding, content)) sheet1 = parser.parseUrl(url) sheet2 = parser.parseUrl(url, encoding=override) if isSheet: self.assertEqual(sheet1.encoding, expencoding) self.assertEqual(sheet1.cssText, cssText.encode()) self.assertEqual(sheet2.encoding, override) if sheet1.cssText and cssText.startswith('@charset'): self.assertEqual( sheet2.cssText, (cssText.replace('ascii', override).encode())) elif sheet1.cssText: self.assertEqual(sheet2.cssText, (overrideprefix + '\n' + cssText).encode()) else: self.assertEqual(sheet2.cssText, (overrideprefix + cssText).encode()) else: self.assertEqual(sheet1, None) self.assertEqual(sheet2, None) parser.setFetcher(None) self.assertRaises(ValueError, parser.parseUrl, '../not-valid-in-urllib') # we'll get an URLError if no network connection self.assertRaises( (HTTPError, URLError, FetchError), parser.parseUrl, 'https://github.com/ebook-utils/css-parser/not-found.css')
def main(args=None): """ Parses given filename(s) or string or URL (using optional encoding) and prints the parsed style sheet to stdout. Redirect stdout to save CSS. Redirect stderr to save parser log infos. """ usage = """usage: %prog [options] filename1.css [filename2.css ...] [>filename_combined.css] [2>parserinfo.log] """ p = optparse.OptionParser(usage=usage) p.add_option('-s', '--string', action='store_true', dest='string', help='parse given string') p.add_option('-u', '--url', action='store', dest='url', help='parse given url') p.add_option('-e', '--encoding', action='store', dest='encoding', help='encoding of the file or override encoding found') p.add_option('-m', '--minify', action='store_true', dest='minify', help='minify parsed CSS', default=False) p.add_option('-d', '--debug', action='store_true', dest='debug', help='activate debugging output') (options, params) = p.parse_args(args) if not params and not options.url: p.error("no filename given") if options.debug: p = css_parser.CSSParser(loglevel=logging.DEBUG) else: p = css_parser.CSSParser() if options.minify: css_parser.ser.prefs.useMinified() if options.string: sheet = p.parseString(''.join(params), encoding=options.encoding) print(sheet.cssText) elif options.url: sheet = p.parseUrl(options.url, encoding=options.encoding) print(sheet.cssText) else: for filename in params: sys.stderr.write('=== CSS FILE: "%s" ===\n' % filename) sheet = p.parseFile(filename, encoding=options.encoding) print(sheet.cssText) print() sys.stderr.write('\n')
def test_fetcher(self): """CSSParser.fetcher order: 0. explicity given encoding OVERRIDE (css_parser only) 1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols) 2. BOM and/or @charset (see below) 3. <link charset=""> or other metadata from the linking mechanism (if any) 4. charset of referring style sheet or document (if any) 5. Assume UTF-8 """ tests = { # css, encoding, (mimetype, encoding, importcss): # encoding, importIndex, importEncoding, importText # 0/0 override/override => ASCII/ASCII ('@charset "utf-16"; @import "x";', 'ASCII', ('iso-8859-1', '@charset "latin1";/*t*/')): ('ascii', 1, 'ascii', '@charset "ascii";\n/*t*/'.encode()), # 1/1 not tested her but same as next # 2/1 @charset/HTTP => UTF-16/ISO-8859-1 ('@charset "UTF-16"; @import "x";', None, ('ISO-8859-1', '@charset "latin1";/*t*/')): ('utf-16', 1, 'iso-8859-1', '@charset "iso-8859-1";\n/*t*/'.encode('iso-8859-1')), # 2/2 @charset/@charset => UTF-16/ISO-8859-1 ('@charset "UTF-16"; @import "x";', None, (None, '@charset "ISO-8859-1";/*t*/')): ('utf-16', 1, 'iso-8859-1', '@charset "iso-8859-1";\n/*t*/'.encode('iso-8859-1')), # 2/4 @charset/referrer => ASCII/ASCII ('@charset "ASCII"; @import "x";', None, (None, '/*t*/')): ('ascii', 1, 'ascii', '@charset "ascii";\n/*t*/'.encode()), # 5/5 default/default or referrer ('@import "x";', None, (None, '/*t*/')): ('utf-8', 0, 'utf-8', '/*t*/'.encode()), # 0/0 override/override+unicode ('@charset "utf-16"; @import "x";', 'ASCII', (None, '@charset "latin1";/*\u0287*/')): ('ascii', 1, 'ascii', '@charset "ascii";\n/*\\287 */'.encode()), # 2/1 @charset/HTTP+unicode ('@charset "ascii"; @import "x";', None, ('iso-8859-1', '/*\u0287*/')): ('ascii', 1, 'iso-8859-1', '@charset "iso-8859-1";\n/*\\287 */'.encode()), # 2/4 @charset/referrer+unicode ('@charset "ascii"; @import "x";', None, (None, '/*\u0287*/')): ('ascii', 1, 'ascii', '@charset "ascii";\n/*\\287 */'.encode()), # 5/1 default/HTTP+unicode ('@import "x";', None, ('ascii', '/*\u0287*/')): ('utf-8', 0, 'ascii', '@charset "ascii";\n/*\\287 */'.encode()), # 5/5 default+unicode/default+unicode ('@import "x";', None, (None, '/*\u0287*/')): ('utf-8', 0, 'utf-8', '/*\u0287*/'.encode('utf-8')) } parser = css_parser.CSSParser() for test in tests: css, encoding, fetchdata = test sheetencoding, importIndex, importEncoding, importText = tests[ test] # use setFetcher parser.setFetcher(self._make_fetcher(*fetchdata)) # use init parser2 = css_parser.CSSParser(fetcher=self._make_fetcher( *fetchdata)) sheet = parser.parseString(css, encoding=encoding) sheet2 = parser2.parseString(css, encoding=encoding) # sheet self.assertEqual(sheet.encoding, sheetencoding) self.assertEqual(sheet2.encoding, sheetencoding) # imported sheet self.assertEqual(sheet.cssRules[importIndex].styleSheet.encoding, importEncoding) self.assertEqual(sheet2.cssRules[importIndex].styleSheet.encoding, importEncoding) self.assertEqual(sheet.cssRules[importIndex].styleSheet.cssText, importText) self.assertEqual(sheet2.cssRules[importIndex].styleSheet.cssText, importText)
def setUp(self): # a raising parser!!! css_parser.log.raiseExceptions = True css_parser.log.setLevel(logging.FATAL) self.p = css_parser.CSSParser(raiseExceptions=True)