Example #1
0
def preprocess_stylesheet(device_media_type, base_url, rules, url_fetcher):
    """Do the work that can be done early on stylesheet, before they are
    in a document.

    """
    selector_to_xpath = cssselect.HTMLTranslator().selector_to_xpath
    for rule in rules:
        if not rule.at_keyword:
            declarations = list(
                preprocess_declarations(base_url, rule.declarations))
            if declarations:
                selector_string = rule.selector.as_css()
                try:
                    selector_list = []
                    for selector in cssselect.parse(selector_string):
                        xpath = selector_to_xpath(selector)
                        try:
                            lxml_xpath = lxml.etree.XPath(xpath)
                        except ValueError as exc:
                            # TODO: Some characters are not supported by lxml's
                            # XPath implementation (including control
                            # characters), but these characters are valid in
                            # the CSS2.1 specification.
                            raise cssselect.SelectorError(str(exc))
                        selector_list.append(
                            Selector((0, ) + selector.specificity(),
                                     selector.pseudo_element, lxml_xpath))
                    for selector in selector_list:
                        if selector.pseudo_element not in PSEUDO_ELEMENTS:
                            raise cssselect.ExpressionError(
                                'Unknown pseudo-element: %s' %
                                selector.pseudo_element)
                except cssselect.SelectorError as exc:
                    LOGGER.warn("Invalid or unsupported selector '%s', %s",
                                selector_string, exc)
                    continue
                yield rule, selector_list, declarations

        elif rule.at_keyword == '@import':
            if not evaluate_media_query(rule.media, device_media_type):
                continue
            url = url_join(base_url, rule.uri, '@import at %s:%s', rule.line,
                           rule.column)
            if url is not None:
                try:
                    stylesheet = CSS(url=url,
                                     url_fetcher=url_fetcher,
                                     media_type=device_media_type)
                except URLFetchingError as exc:
                    LOGGER.warn('Failed to load stylesheet at %s : %s', url,
                                exc)
                else:
                    for result in stylesheet.rules:
                        yield result

        elif rule.at_keyword == '@media':
            if not evaluate_media_query(rule.media, device_media_type):
                continue
            for result in preprocess_stylesheet(device_media_type, base_url,
                                                rule.rules, url_fetcher):
                yield result

        elif rule.at_keyword == '@page':
            page_name, pseudo_class = rule.selector
            # TODO: support named pages (see CSS3 Paged Media)
            if page_name is not None:
                LOGGER.warn(
                    'Named pages are not supported yet, the whole '
                    '@page %s rule was ignored.',
                    page_name + (':' + pseudo_class if pseudo_class else ''))
                continue
            declarations = list(
                preprocess_declarations(base_url, rule.declarations))

            # Use a double lambda to have a closure that holds page_types
            match = (lambda page_types: lambda _document: page_types)(
                PAGE_PSEUDOCLASS_TARGETS[pseudo_class])
            specificity = rule.specificity

            if declarations:
                selector_list = [Selector(specificity, None, match)]
                yield rule, selector_list, declarations

            for margin_rule in rule.at_rules:
                declarations = list(
                    preprocess_declarations(base_url,
                                            margin_rule.declarations))
                if declarations:
                    selector_list = [
                        Selector(specificity, margin_rule.at_keyword, match)
                    ]
                    yield margin_rule, selector_list, declarations
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
                          url_fetcher, rules, fonts, font_config):
    """Do the work that can be done early on stylesheet, before they are
    in a document.

    """
    selector_to_xpath = cssselect.HTMLTranslator().selector_to_xpath
    for rule in stylesheet_rules:
        if rule.type == 'qualified-rule':
            declarations = list(
                preprocess_declarations(
                    base_url, tinycss2.parse_declaration_list(rule.content)))
            if declarations:
                selector_string = tinycss2.serialize(rule.prelude)
                try:
                    selector_list = []
                    for selector in cssselect.parse(selector_string):
                        xpath = selector_to_xpath(selector)
                        try:
                            lxml_xpath = lxml.etree.XPath(xpath)
                        except ValueError as exc:
                            # TODO: Some characters are not supported by lxml's
                            # XPath implementation (including control
                            # characters), but these characters are valid in
                            # the CSS2.1 specification.
                            raise cssselect.SelectorError(str(exc))
                        selector_list.append(
                            Selector((0, ) + selector.specificity(),
                                     selector.pseudo_element, lxml_xpath))
                    for selector in selector_list:
                        if selector.pseudo_element not in PSEUDO_ELEMENTS:
                            raise cssselect.ExpressionError(
                                'Unknown pseudo-element: %s' %
                                selector.pseudo_element)
                except cssselect.SelectorError as exc:
                    LOGGER.warning("Invalid or unsupported selector '%s', %s",
                                   selector_string, exc)
                    continue
                rules.append((rule, selector_list, declarations))

        elif rule.type == 'at-rule' and rule.at_keyword == 'import':
            tokens = remove_whitespace(rule.prelude)
            if tokens and tokens[0].type in ('url', 'string'):
                url = tokens[0].value
            else:
                continue
            media = parse_media_query(tokens[1:])
            if media is None:
                LOGGER.warning(
                    'Invalid media type "%s" '
                    'the whole @import rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
            if not evaluate_media_query(media, device_media_type):
                continue
            url = url_join(base_url,
                           url,
                           allow_relative=False,
                           context='@import at %s:%s',
                           context_args=(rule.source_line, rule.source_column))
            if url is not None:
                try:
                    stylesheet = CSS(url=url,
                                     url_fetcher=url_fetcher,
                                     media_type=device_media_type,
                                     font_config=font_config)
                except URLFetchingError as exc:
                    LOGGER.warning('Failed to load stylesheet at %s : %s', url,
                                   exc)
                else:
                    for result in stylesheet.rules:
                        rules.append(result)

        elif rule.type == 'at-rule' and rule.at_keyword == 'media':
            media = parse_media_query(rule.prelude)
            if media is None:
                LOGGER.warning(
                    'Invalid media type "%s" '
                    'the whole @media rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            if not evaluate_media_query(media, device_media_type):
                continue
            content_rules = tinycss2.parse_rule_list(rule.content)
            preprocess_stylesheet(device_media_type, base_url, content_rules,
                                  url_fetcher, rules, fonts, font_config)

        elif rule.type == 'at-rule' and rule.at_keyword == 'page':
            tokens = remove_whitespace(rule.prelude)
            # TODO: support named pages (see CSS3 Paged Media)
            if not tokens:
                pseudo_class = None
                specificity = (0, 0)
            elif (len(tokens) == 2 and tokens[0].type == 'literal'
                  and tokens[0].value == ':' and tokens[1].type == 'ident'):
                pseudo_class = tokens[1].lower_value
                specificity = {
                    'first': (1, 0),
                    'blank': (1, 0),
                    'left': (0, 1),
                    'right': (0, 1),
                }.get(pseudo_class)
                if not specificity:
                    LOGGER.warning(
                        'Unknown @page pseudo-class "%s", '
                        'the whole @page rule was ignored '
                        'at %s:%s.', pseudo_class, rule.source_line,
                        rule.source_column)
                    continue
            else:
                LOGGER.warning(
                    'Unsupported @page selector "%s", '
                    'the whole @page rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            content = tinycss2.parse_declaration_list(rule.content)
            declarations = list(preprocess_declarations(base_url, content))

            # Use a double lambda to have a closure that holds page_types
            match = (lambda page_types: lambda _document: page_types)(
                PAGE_PSEUDOCLASS_TARGETS[pseudo_class])

            if declarations:
                selector_list = [Selector(specificity, None, match)]
                rules.append((rule, selector_list, declarations))

            for margin_rule in content:
                if margin_rule.type != 'at-rule':
                    continue
                declarations = list(
                    preprocess_declarations(
                        base_url,
                        tinycss2.parse_declaration_list(margin_rule.content)))
                if declarations:
                    selector_list = [
                        Selector(specificity, '@' + margin_rule.at_keyword,
                                 match)
                    ]
                    rules.append((margin_rule, selector_list, declarations))

        elif rule.type == 'at-rule' and rule.at_keyword == 'font-face':
            content = tinycss2.parse_declaration_list(rule.content)
            rule_descriptors = dict(preprocess_descriptors(base_url, content))
            for key in ('src', 'font_family'):
                if key not in rule_descriptors:
                    LOGGER.warning(
                        "Missing %s descriptor in '@font-face' rule at %s:%s",
                        key.replace('_', '-'), rule.source_line,
                        rule.source_column)
                    break
            else:
                if font_config is not None:
                    font_filename = font_config.add_font_face(
                        rule_descriptors, url_fetcher)
                    if font_filename:
                        fonts.append(font_filename)