Python CSSParser.CSSParser 예제들, cssutils.CSSParser.CSSParser Python 예제들

예제 #1

0

파일 보기

파일: utils.py 프로젝트: kba/calibre

def parse_css(data,
              fname='<string>',
              is_declaration=False,
              decode=None,
              log_level=None,
              css_preprocessor=None):
    if log_level is None:
        import logging
        log_level = logging.WARNING
    from cssutils import CSSParser, log
    from calibre.ebooks.oeb.base import _css_logger
    log.setLevel(log_level)
    log.raiseExceptions = False
    if isinstance(data, bytes):
        data = data.decode('utf-8') if decode is None else decode(data)
    if css_preprocessor is not None:
        data = css_preprocessor(data)
    parser = CSSParser(
        loglevel=log_level,
        # We dont care about @import rules
        fetcher=lambda x: (None, None),
        log=_css_logger)
    if is_declaration:
        data = parser.parseStyle(data, validate=False)
    else:
        data = parser.parseString(data, href=fname, validate=False)
    return data

예제 #2

0

파일 보기

    def test_finish(self):
        """
        L{StylesheetRewritingRequestWrapper.finish} causes all written bytes to
        be translated with C{_replace} written to the wrapped request.
        """
        stylesheetFormat = """
            .foo {
                background-image: url(%s)
            }
        """
        originalStylesheet = stylesheetFormat % ("/Foo/bar", )
        expectedStylesheet = stylesheetFormat % ("/bar/Foo/bar", )

        request = FakeRequest()
        roots = {request: URL.fromString('/bar/')}
        wrapper = website.StylesheetRewritingRequestWrapper(
            request, [], roots.get)
        wrapper.write(originalStylesheet)
        wrapper.finish()
        # Parse and serialize both versions to normalize whitespace so we can
        # make a comparison.
        parser = CSSParser()
        self.assertEqual(
            parser.parseString(request.accumulator).cssText,
            parser.parseString(expectedStylesheet).cssText)

예제 #3

0

파일 보기

def create_importer(page):
    importer = Importer(page=page, style='')
    resp = urlfetch.fetch(page.url, deadline=10)
    if resp.status_code == 200:
        soup = BeautifulSoup(resp.content)
        parser = CSSParser()
        for tag in soup.findAll(re.compile(r'^(link|style)$')):
            if tag.name == 'link':
                if tag.get('href', None) and tag.get('rel', 'stylesheet').lower() == 'stylesheet':
                    url = urljoin(page.url, tag['href'])
                    if urlparse(url).netloc != urlparse(request.url).netloc:
                        importer.urls.append(url)
            elif tag.name == 'style':
                media = tag.get('media', None)
                sheet = parser.parseString(''.join(tag.contents).strip('\n'), href=url)
                style = sheet.cssText
                if media:
                    style = '@media %s {\n%s\n}' % (media, style)
                style = '/* Imported directly from %s */\n%s\n' % (page.url, style)
                importer.style += style
        # Patch around AppEngine's frame inspection
        del parser

        importer.put()
        queue_import(page)

예제 #4

0

파일 보기

    def __init__(self, container, do_embed=False):
        self.container = container
        self.log = self.logger = container.log
        self.do_embed = do_embed
        must_use_qt()
        self.parser = CSSParser(loglevel=logging.CRITICAL, log=logging.getLogger('calibre.css'))
        self.first_letter_pat = regex.compile(r'^[\p{Ps}\p{Ps}\p{Pe}\p{Pi}\p{Pf}\p{Po}]+', regex.VERSION1 | regex.UNICODE)
        self.capitalize_pat = regex.compile(r'[\p{L}\p{N}]', regex.VERSION1 | regex.UNICODE)

        self.loop = QEventLoop()
        self.view = QWebView()
        self.page = Page(self.log)
        self.view.setPage(self.page)
        self.page.setViewportSize(QSize(1200, 1600))

        self.view.loadFinished.connect(self.collect,
                type=Qt.QueuedConnection)

        self.render_queue = list(container.spine_items)
        self.font_stats = {}
        self.font_usage_map = {}
        self.font_spec_map = {}
        self.font_rule_map = {}
        self.all_font_rules = {}

        QTimer.singleShot(0, self.render_book)

        if self.loop.exec_() == 1:
            raise Exception('Failed to gather statistics from book, see log for details')

예제 #5

0

파일 보기

    def extract_css(self, root, log):
        ans = []
        for s in root.xpath(
                '//*[local-name() = "style" and @type="text/css"]'):
            ans.append(s.text)
            s.getparent().remove(s)

        head = root.xpath('//*[local-name() = "head"]')
        if head:
            head = head[0]
            ns = head.nsmap.get(None, '')
            if ns:
                ns = '{%s}' % ns
            etree.SubElement(head, ns + 'link', {
                'type': 'text/css',
                'rel': 'stylesheet',
                'href': 'odfpy.css'
            })

        css = '\n\n'.join(ans)
        parser = CSSParser(loglevel=logging.WARNING, log=_css_logger)
        self.css = parser.parseString(css, validate=False)

        with open('odfpy.css', 'wb') as f:
            f.write(css.encode('utf-8'))

예제 #6

0

파일 보기

def do_import():
    page = Page.get(request.form.get('page_key', ''))
    if not page or page.import_state != IMPORTING:
        return 'NO_IMPORTER' # We're done
    importer = Importer.gql('WHERE page=:1', page.key()).get()
    if not importer:
        # This requires a request to fetch the page and parse the URLs.
        # It also enqueues the next run.
        create_importer(page)
        return 'CREATED'
    if importer.urls:
        url = importer.urls.pop(0)
        parser = None
        try:
            resp = urlfetch.fetch(url, deadline=10)
            if resp.status_code == 200:
                parser = CSSParser()
                sheet = parser.parseString(resp.content, href=url)
                style = sheet.cssText
                importer.style += '\n\n/* Imported from %s */\n%s' % (url, style)
            else:
                raise Exception('Error fetching %s' % url)
        except Exception, e:
            import traceback
            importer.errors.append('Error importing %s' % url)
            logging.error('Error importing for Page %s from %s:\n%s\n%s', page.key().id(), url, e, traceback.format_exc())
        finally:

예제 #7

0

파일 보기

def beautify_text(raw, syntax):
    from lxml import etree
    from calibre.ebooks.oeb.polish.parsing import parse
    from calibre.ebooks.oeb.polish.pretty import pretty_xml_tree, pretty_html_tree
    from calibre.ebooks.chardet import strip_encoding_declarations
    if syntax == 'xml':
        root = etree.fromstring(strip_encoding_declarations(raw))
        pretty_xml_tree(root)
    elif syntax == 'css':
        import logging
        from calibre.ebooks.oeb.base import serialize, _css_logger
        from calibre.ebooks.oeb.polish.utils import setup_cssutils_serialization
        from cssutils import CSSParser, log
        setup_cssutils_serialization(tprefs['editor_tab_stop_width'])
        log.setLevel(logging.WARN)
        log.raiseExceptions = False
        parser = CSSParser(
            loglevel=logging.WARNING,
            # We dont care about @import rules
            fetcher=lambda x: (None, None),
            log=_css_logger)
        data = parser.parseString(raw, href='<string>', validate=False)
        return serialize(data, 'text/css')
    else:
        root = parse(raw, line_numbers=False)
        pretty_html_tree(None, root)
    return etree.tostring(root, encoding=unicode)

예제 #8

0

파일 보기

def main():
    css = '''
    /* some umlauts äöü and EURO sign € */
    a:before {
       content: "ä";
        }'''

    p = CSSParser()
    sheet = p.parseString(css)

    print("""cssText in different encodings, depending on the console some
     chars may look broken but are actually not""")
    print()

    sheet.encoding = 'ascii'
    print(sheet.cssText)
    print()

    sheet.encoding = 'iso-8859-1'
    print(sheet.cssText)
    print()

    sheet.encoding = 'iso-8859-15'
    print(sheet.cssText)
    print()

    sheet.encoding = 'utf-8'
    print(sheet.cssText)
    print()

    # results in default UTF-8 encoding without @charset rule
    sheet.encoding = None
    print(sheet.cssText)

예제 #9

0

파일 보기

def validate_css(string):
    p = CSSParser(raiseExceptions=True)

    if not string or only_whitespace.match(string):
        return ('', ValidationReport())

    report = ValidationReport(string)

    # avoid a very expensive parse
    max_size_kb = 100
    if len(string) > max_size_kb * 1024:
        report.append(
            ValidationError((msgs['too_big'] % dict(max_size=max_size_kb))))
        return ('', report)

    if '\\' in string:
        report.append(
            ValidationError(
                _("if you need backslashes, you're doing it wrong")))

    try:
        parsed = p.parseString(string)
    except DOMException, e:
        # yuck; xml.dom.DOMException can't give us line-information
        # directly, so we have to parse its error message string to
        # get it
        line = None
        line_match = error_message_extract_re.match(e.message)
        if line_match:
            line = line_match.group(1)
            if line:
                line = int(line)
        error_message = (msgs['syntax_error'] % dict(syntaxerror=e.message))
        report.append(ValidationError(error_message, e, line))
        return (None, report)

예제 #10

0

파일 보기

def validate_css(css):
    try:
        parser = CSSParser(raiseExceptions=True, parseComments=False)
        stylesheet = parser.parseString(css)
    except Exception as exception:
        raise ValidationError(str(exception).split("\n")[0])
    validate_rules(stylesheet.cssRules)
    return css

예제 #11

0

파일 보기

파일: website.py 프로젝트: jonathanj/mantissa

 def finish(self):
     """
     Parse the buffered response body, rewrite its URLs, write the result to
     the wrapped request, and finish the wrapped request.
     """
     stylesheet = ''.join(self._buffer)
     parser = CSSParser()
     css = parser.parseString(stylesheet)
     replaceUrls(css, self._replace)
     self.request.write(css.cssText)
     return self.request.finish()

예제 #12

0

파일 보기

def normalize_filter_css(props):
    import logging
    ans = set()
    p = CSSParser(loglevel=logging.CRITICAL, validate=False)
    for prop in props:
        n = normalizers.get(prop, None)
        ans.add(prop)
        if n is not None and prop in SHORTHAND_DEFAULTS:
            dec = p.parseStyle('%s: %s' % (prop, SHORTHAND_DEFAULTS[prop]))
            cssvalue = dec.getPropertyCSSValue(dec.item(0))
            ans |= set(n(prop, cssvalue))
    return ans

예제 #13

0

파일 보기

파일: container.py 프로젝트: Gondulf/calibre

 def parse_css(self, data, fname='<string>'):
     from cssutils import CSSParser, log
     log.setLevel(logging.WARN)
     log.raiseExceptions = False
     data = self.decode(data)
     if not self.tweak_mode:
         data = self.css_preprocessor(data)
     parser = CSSParser(loglevel=logging.WARNING,
                        # We dont care about @import rules
                        fetcher=lambda x: (None, None), log=_css_logger)
     data = parser.parseString(data, href=fname, validate=False)
     return data

예제 #14

0

파일 보기

def validate_css(css):
    try:
        parser = CSSParser(raiseExceptions=True)
        stylesheet = parser.parseString(css)
    except Exception as exception:
        raise ValidationError(str(exception).split('\n')[0])

    if not stylesheet.valid:
        raise ValidationError(_('This stylesheet is not valid CSS.'))

    validate_rules(stylesheet.cssRules)
    return css

예제 #15

0

파일 보기

파일: transformer.py 프로젝트: hubaimaster/aws-interface

 def _apply_to_style_uri(style_text, func):
     dirty = False
     parser = CSSParser().parseStyle(style_text)
     for prop in parser.getProperties(all=True):
         for value in prop.propertyValue:
             if value.type == 'URI':
                 old_uri = value.uri
                 new_uri = func(old_uri, element=value)
                 if new_uri != old_uri:
                     dirty = True
                     value.uri = new_uri
     if dirty:
         return to_unicode(parser.cssText, 'utf-8')
     else:
         return style_text

예제 #16

0

파일 보기

파일: css.py 프로젝트: erdgeist/pretalx

def validate_css(css):
    try:
        parser = CSSParser(raiseExceptions=True)
        stylesheet = parser.parseString(css)
    except Exception as e:
        raise ValidationError(str(e).split('\n')[0])

    if not stylesheet.valid:
        raise ValidationError(_('This stylesheet is not valid CSS.'))

    for rule in stylesheet.cssRules:
        style = rule.style
        for key in style.keys():
            validate_key(key=key, style=style)
    return css

예제 #17

0

파일 보기

def parse_style(cssText):
    element_dict = defaultdict(lambda: '')
    # inline style not support pseudo-selector, write into <style> tag
    pseudo_selector_list = []
    parser = CSSParser(loglevel=logging.CRITICAL)
    cssStyle = parser.parseString(cssText)

    # init all selector
    for r in cssStyle:
        if isinstance(r, CSSComment):
            continue

        # skip * selector
        if r.selectorText.find('*') >= 0:
            continue

        # skip pseudo-selector like a:hover
        if r.selectorText.find(':') >= 0:
            continue

        for selector in r.selectorList:
            element_dict[selector.selectorText]

    def append_all_selector(style):
        for k in element_dict.keys():
            element_dict[k] += style

    element_list = []
    for r in cssStyle:
        if isinstance(r, CSSComment):
            continue

        if r.selectorText.find('*') >= 0 and r.selectorText.find(':') < 0:
            append_all_selector(to_inline_style(r.style))
            continue

        if r.selectorText.find(':') >= 0:
            pseudo_selector_list.append(r.cssText)
            continue

        for selector in r.selectorList:
            element_list.append(selector.selectorText)
            element_dict[selector.selectorText] += to_inline_style(r.style)

    return element_list, element_dict, pseudo_selector_list

예제 #18

0

파일 보기

    def run(self):
        # Step 0: ensure that the document_root and base_path variables are
        # set. If the file that's being processed was inside a source that has
        # either one or both not set, then this processor can't run.
        if self.document_root is None or self.base_path is None:
            raise DocumentRootAndBasePathRequiredException

        # We don't rename the file, so we can use the default output file.

        parser = CSSParser(log=None, loglevel=logging.critical)
        sheet = parser.parseFile(self.input_file)

        # Step 1: ensure the file has URLs. If it doesn't, we can stop the
        # processing.
        url_count = 0
        for url in getUrls(sheet):
            url_count += 1
            break
        if url_count == 0:
            return self.input_file

        # Step 2: resolve the relative URLs to absolute paths.
        replaceUrls(sheet, self.resolveToAbsolutePath)

        # Step 3: verify that each of these files has been synced.
        synced_files_db = urljoin(sys.path[0] + os.sep, SYNCED_FILES_DB)
        self.dbcon = sqlite3.connect(synced_files_db)
        self.dbcur = self.dbcon.cursor()
        all_synced = True
        for urlstring in getUrls(sheet):
            # Skip absolute URLs.
            if urlstring.startswith("http://") or urlstring.startswith(
                    "https://"):
                continue

            # Skip broken references in the CSS file. This would otherwise
            # prevent this CSS file from ever passing through this processor.
            if not os.path.exists(urlstring):
                continue

            # Get the CDN URL for the given absolute path.
            self.dbcur.execute(
                "SELECT url FROM synced_files WHERE input_file=?",
                (urlstring, ))
            result = self.dbcur.fetchone()

            if result == None:
                raise RequestToRequeueException(
                    "The file '%s' has not yet been synced to the server '%s'"
                    % (urlstring, self.process_for_server))
            else:
                cdn_url = result[0]

        # Step 4: resolve the absolute paths to CDN URLs.
        replaceUrls(sheet, self.resolveToCDNURL)

        # Step 5: write the updated CSS to the output file.
        f = open(self.output_file, 'w')
        f.write(sheet.cssText)
        f.close()

        return self.output_file

예제 #19

0

파일 보기

    def __init__(self,
                 tree,
                 path,
                 oeb,
                 opts,
                 profile=None,
                 extra_css='',
                 user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'], profile['props'],
                                   profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                           log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style')
                    and elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text, add_namespace=True)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    stylesheet.namespaces['h'] = XHTML_NS
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn(
                                    'Ignoring missing stylesheet in @import rule:',
                                    rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn(
                                    'CSS @import of non-CSS file %r' %
                                    rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet,
                                item.abshref,
                                ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r is not CSS' %
                        (path, item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css': extra_css, 'user_css': user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.' % w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {
                        rule.media.item(i)
                        for i in xrange(rule.media.length)
                    }
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(
            ur':(first-letter|first-line|link|hover|visited|active|focus|before|after)',
            re.I)
        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            if fl is not None:
                text = text.replace(fl.group(), '')
            selector = get_css_selector(text, self.oeb.log)
            matches = selector(tree, self.logger)
            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                                                    'plumber_output_format',
                                                    '').lower() == u'mobi':
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter():
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)

예제 #20

0

파일 보기

    def __init__(self, tree, path, oeb, opts, profile=None,
            extra_css='', user_css='', base_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        if base_css:
            stylesheets.append(parseString(base_css, validate=False))
        style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'],
                                        profile['props'],
                                        profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in style_tags:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if rule.media.mediaText == 'amzn-mobi':
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    for rule in tuple(stylesheet.cssRules.rulesOfType(CSSRule.PAGE_RULE)):
                        stylesheet.cssRules.remove(rule)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet, item.abshref,
                            ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = x
                    stylesheet = parser.parseString(text, href=cssname,
                            validate=False)
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    media = {rule.media.item(i) for i in
                             xrange(rule.media.length)}
                    if not media.intersection({'all', 'screen', 'amzn-kf8'}):
                        continue
                    for subrule in rule.cssRules:
                        rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
                        index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)

        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
            except SelectorError as err:
                self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, as_unicode(err)))
                continue

            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(self.oeb,
                        'plumber_output_format', '').lower() in {u'mobi', u'docx'}:
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter('*'):
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.set('data-fake-first-letter', '1')
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'[0-9.]+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)

예제 #21

0

파일 보기

    def __init__(self, tree, path, oeb, profile, extra_css='', user_css='',
            change_justification='left'):
        assert profile is not None
        # XXX str/bytes hackfix
        if isinstance(path, bytes):
            decoded_path = path.decode('utf-8')
        else:
            decoded_path = path
        self.oeb = oeb
        self.profile = profile
        self.change_justification = change_justification
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(decoded_path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style') and
                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else ''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += '\n\n' + force_unicode(t, 'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += '\n\n' + force_unicode(t, 'utf-8')
                if text:
                    text = XHTML_CSS_NAMESPACE + elem.text
                    text = oeb.css_preprocessor(text)
                    stylesheet = parser.parseString(text, href=cssname)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    logging.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    logging.warn(
                    'Stylesheet %r referenced by file %r is not CSS'%(path,
                        item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css':extra_css, 'user_css':user_css}
        for w, x in list(csses.items()):
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text, href=cssname)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    logging.exception('Failed to parse %s, ignoring.'%w)
                    logging.debug('Bad css: ')
                    logging.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                rules.extend(self.flatten_rule(rule, href, index))
                index = index + 1
        # XXX had to fix crash about unsortable type, so that's why we only sort by first item of tuple
        rules.sort(key=lambda tup: tup[:1])
        self.rules = rules
        self._styles = {}
        class_sel_pat = re.compile(r'\.[a-z]+', re.IGNORECASE)
        capital_sel_pat = re.compile(r'h|[A-Z]+')
        for _, _, cssdict, text, _ in rules:
            fl = ':first-letter' in text
            if fl:
                text = text.replace(':first-letter', '')
            try:
                selector = CSSSelector(text)
            except (AssertionError, ExpressionError, etree.XPathSyntaxError,
                    NameError, # thrown on OS X instead of SelectorSyntaxError
                    SelectorSyntaxError):
                continue
            try:
                matches = selector(tree)
            except etree.XPathEvalError:
                continue

            if not matches:
                ntext = capital_sel_pat.sub(lambda m: m.group().lower(), text)
                if ntext != text:
                    logging.warn('Transformed CSS selector' + text + 'to' + ntext)
                    selector = CSSSelector(ntext)
                    matches = selector(tree)

            if not matches and class_sel_pat.match(text) and text.lower() != text:
                found = False
                ltext = text.lower()
                for x in tree.xpath('//*[@class]'):
                    if ltext.endswith('.'+x.get('class').lower()):
                        matches.append(x)
                        found = True
                if found:
                    logging.warn('Ignoring case mismatches for CSS selector: %s in %s'%(text, item.href))
            if fl:
                from lxml.builder import ElementMaker
                E = ElementMaker(namespace=XHTML_NS)
                for elem in matches:
                    for x in elem.iter():
                        if x.text:
                            punctuation_chars = []
                            text = str(x.text)
                            while text:
                                if not unicodedata.category(text[0]).startswith('P'):
                                    break
                                punctuation_chars.append(text[0])
                                text = text[1:]

                            special_text = ''.join(punctuation_chars) + \
                                    (text[0] if text else '')
                            span = E.span(special_text)
                            span.tail = text[1:]
                            x.text = None
                            x.insert(0, span)
                            self.style(span)._update_cssdict(cssdict)
                            break
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr()
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)

예제 #22

0

파일 보기

def safe_parser():
    global _safe_parser
    if _safe_parser is None:
        import logging
        _safe_parser = CSSParser(loglevel=logging.CRITICAL, validate=False)
    return _safe_parser

예제 #23

0

파일 보기

def inline(tree):
    """
    Inlines all of the styles within this stylesheet into their matching HTML
    elements. This modifies the original tree in-place, removing all style tags
    and updating the nodes.

    To prevent a ``<style>`` tag from being inlined, add an ``inline="false"``
    attribute::

        <style type="text/css" inline="false">
            /* Any rules contained within this tag will not be inlined. */
        </style>

    """
    rules = defaultdict(list)
    rule_id_sequence = itertools.count()

    stylesheet_parser = CSSParser(log=logging.getLogger('%s.cssutils' %
                                                        __name__))

    # Get all stylesheets from the document.
    stylesheets = CSSSelector('style')(tree)
    for stylesheet in stylesheets:
        if stylesheet.attrib.get('inline') == 'false':
            del stylesheet.attrib['inline']
            continue

        if not stylesheet.text:
            continue

        for rule in ifilter(is_style_rule,
                            stylesheet_parser.parseString(stylesheet.text)):
            properties = {}
            for property in rule.style:
                properties.update(expand_property(property))

            # XXX: This doesn't handle selectors with odd multiple whitespace.
            for selector in map(text_type.strip, rule.selectorText.split(',')):
                rule = Rule(
                    next(rule_id_sequence),
                    selector,
                    properties,
                )
                rules[rule.selector].append(rule)

        stylesheet.getparent().remove(stylesheet)

    # Collect all nodes matching our style rules.
    nodes = defaultdict(list)
    for selector, rs in rules.items():
        for node in selector(tree):
            nodes[node].extend(rs)

    # Apply all styles to our collected elements.
    for node, rules in nodes.items():
        properties = Rule.combine(rules)

        # If this node already has a style attribute, we need to apply those
        # styles on top of the CSS rules from the stylesheet.
        style_attr = node.attrib.get('style')
        if style_attr is not None:
            properties.update(Properties.from_string(style_attr))

        node.attrib['style'] = '%s' % properties

예제 #24

0

파일 보기

파일: css.py 프로젝트: viromehunter/yabi

    "width",
    "word-spacing",
    "z-index",
)

# Set up some default objects.


# A default fetcher for cssutils that will ensure that no external stylesheets
# included via @import are loaded.
def default_fetcher(url):
    return (None, "")


# A default parser that uses the default fetcher.
default_parser = CSSParser(fetcher=default_fetcher)

# The default CSS ruleset.
default_ruleset = Ruleset(DEFAULT_ALLOWED_PROPERTIES)


def _sanitise(content, parser=default_parser, ruleset=default_ruleset):
    """
    The actual implementation of the CSS sanitiser.
    """

    input = parser.parseString(content)
    output = css.CSSStyleSheet()

    def primitive(value):
        # This is the one place where we'll blacklist rather than whitelist:

예제 #25

0

파일 보기

파일: test_css.py 프로젝트: rajatguptarg/nereid-webshop

 def validate(self, filename):
     """
     Uses cssutils to validate a css file.
     Prints output using a logger.
     """
     CSSParser(raiseExceptions=True).parseFile(filename, validate=True)

예제 #26

0

파일 보기

파일: stylizer.py 프로젝트: syn-gowthamsrungarapu/calibre

    def __init__(self,
                 tree,
                 path,
                 oeb,
                 opts,
                 profile=None,
                 extra_css='',
                 user_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        head = xpath(tree, '/h:html/h:head')
        if head:
            head = head[0]
        else:
            head = []

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'], profile['props'],
                                   profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                           log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
            if (elem.tag == XHTML('style')
                    and elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = XHTML_CSS_NAMESPACE + text
                    text = oeb.css_preprocessor(text)
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet,
                                item.abshref,
                                ignoreImportRules=True)
            elif elem.tag == XHTML('link') and elem.get('href') \
                 and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                 and elem.get('type', CSS_MIME).lower() in OEB_STYLES:
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r is not CSS' %
                        (path, item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css': extra_css, 'user_css': user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = XHTML_CSS_NAMESPACE + x
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheet.namespaces['h'] = XHTML_NS
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.' % w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for stylesheet in stylesheets:
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                rules.extend(self.flatten_rule(rule, href, index))
                index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        for _, _, cssdict, text, _ in rules:
            fl = ':first-letter' in text
            if fl:
                text = text.replace(':first-letter', '')
            selector = get_css_selector(text)
            matches = selector(tree, self.logger)
            if fl:
                from lxml.builder import ElementMaker
                E = ElementMaker(namespace=XHTML_NS)
                for elem in matches:
                    for x in elem.iter():
                        if x.text:
                            punctuation_chars = []
                            text = unicode(x.text)
                            while text:
                                if not unicodedata.category(
                                        text[0]).startswith('P'):
                                    break
                                punctuation_chars.append(text[0])
                                text = text[1:]

                            special_text = u''.join(punctuation_chars) + \
                                    (text[0] if text else u'')
                            span = E.span(special_text)
                            span.tail = text[1:]
                            x.text = None
                            x.insert(0, span)
                            self.style(span)._update_cssdict(cssdict)
                            break
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'\d+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)