Exemple #1
0
def _format_css_rules(content: list, indent_level: int) -> str:
	"""
	Helper function for CSS formatting that formats a list of CSS selectors.

	INPUTS
	content: A list of component values generated by the tinycss2 library

	OUTPUTS
	A string of formatted CSS
	"""

	output = ""

	for token in tinycss2.parse_rule_list(content):
		if token.type == "error":
			raise se.InvalidCssException("Couldn’t parse CSS. Exception: {token.message}")

		if token.type == "qualified-rule":
			output += ("\t" * indent_level) + _format_css_component_list(token.prelude, True).replace("\n", "\n" + ("\t" * indent_level)) + "{\n" + _format_css_declarations(token.content, indent_level + 1) + "\n" + ("\t" * indent_level) + "}\n\n"

		if token.type == "at-rule":
			output += ("\t" * indent_level) + "@" + token.lower_at_keyword + " " + _format_css_component_list(token.prelude, True).replace("\n", " ") + "{\n" + _format_css_rules(token.content, indent_level + 1) + "\n" + ("\t" * indent_level) + "}\n\n"

		if token.type == "comment":
			# House style: If the comment starts with /* End, then attach it to the previous block
			if token.value.strip().lower().startswith("end"):
				output = output.rstrip() + "\n"

			output += ("\t" * indent_level) + "/* " + token.value.strip() + " */\n"

			if token.value.strip().lower().startswith("end"):
				output += "\n"

	return output.rstrip()
    def _find_rules_selectors(self, rule):
        """
        Find all selectors for a given CSS rule
        :param rule: rule to find selectors
        :return: list of selectors found
        """
        selectors = []
        # types: QualifiedRule, AtRule, Comment
        if rule.type == 'at-rule' and rule.content:
            ruleList = tinycss2.parse_rule_list(rule.content)
            for r in ruleList:
                selectors += self._find_rules_selectors(r)
            return selectors

        if rule.type == 'qualified-rule':
            rule_selectors = rule.serialize().split('{')[0]
            selectors += rule_selectors.split(',')

        # ignore comments
        if rule.type == 'comment':
            pass

        if rule.type == 'error':
            pass

        # rules not covered (@todo Cover this rules if they showup in any new css library used)
        if rule.type not in ('comment', 'at-rule', 'qualified-rule', 'error',
                             'whitespace'):
            pass

        return selectors
    def _remove_unused_selectors_from_rule(self, rule, unused):

        new_css = ''

        # ignore @media queries
        if rule.type == 'at-rule' and rule.lower_at_keyword == 'media':
            rule_list = tinycss2.parse_rule_list(rule.content)
            new_css = ''
            for r in rule_list:
                # recursion to deal with @media elements
                css = self._remove_unused_selectors_from_rule(r, unused)
                new_css += css

            # reconstruct media CSS if any rule is left inside @media
            if new_css:
                media_css = '@media'
                for node in rule.prelude:
                    media_css += node.serialize()
                    # log.debug(node.serialize())

                media_css = media_css + '{' + new_css + '}'

                return media_css
            else:
                return ''

        # check if there are any used selectors
        rule_css = rule.serialize()
        selectors = self._get_all_selectors(rule_css)
        selectors_left = selectors[:]

        for sel in selectors:
            if sel in unused:
                selectors_left.remove(sel)

                # .nav,.sidebar li,.thumbnails{list-style:none}
                # remove class: .thumbnails
                rule_css = rule_css.replace(',' + sel + '{', '{')

                # remove class: .sidebar li or .nav
                rule_css = rule_css.replace(sel + ',', '')

                # remove class: .nav{
                rule_css = rule_css.replace(sel + '{', '{')

        # if any selector left... uses the remaining css
        if selectors_left:
            if self._is_css_valid(rule_css):
                new_css += rule_css
            else:
                logger.error(
                    'Invalid CSS rule detected after removing unused classes!')
                logger.error(rule_css)
                exit()

        return new_css
Exemple #4
0
 def gen_js_style_nodes(self, node_type, styles):
     res = []
     # Parsing the node
     for style in styles:
         parsed = tinycss2.parse_rule_list(style.text)
         for node in parsed:
             if isinstance(node, tinycss2.ast.AtRule
                           ) and node.lower_at_keyword == node_type:
                 res.append(node)
     return res
Exemple #5
0
    def validate_at_rule(self, rule):
        prelude_errors = self.validate_component_values(rule.prelude)

        keyword = strip_vendor_prefix(rule.lower_at_keyword)

        if keyword in ("media", "keyframes"):
            rules = tinycss2.parse_rule_list(rule.content)
            rule_errors = self.validate_rule_list(rules)
        elif keyword == "page":
            rule_errors = self.validate_qualified_rule(rule)
        else:
            return ValidationError(rule.source_line, "UNKNOWN_AT_RULE", {"keyword": rule.at_keyword})

        return itertools.chain(prelude_errors, rule_errors)
Exemple #6
0
    def validate_at_rule(self, rule):
        prelude_errors = self.validate_component_values(rule.prelude)

        keyword = strip_vendor_prefix(rule.lower_at_keyword)

        if keyword in ("media", "keyframes"):
            rules = tinycss2.parse_rule_list(rule.content)
            rule_errors = self.validate_rule_list(rules)
        elif keyword == "page":
            rule_errors = self.validate_qualified_rule(rule)
        else:
            return ValidationError(rule.source_line, "UNKNOWN_AT_RULE",
                                   {"keyword": rule.at_keyword})

        return itertools.chain(prelude_errors, rule_errors)
def find_urls_in_css_rules(name, css_rules):
    """
    Find the url of the given image name in the given style.
    >>> stylesheet = get_embedded_styles(get_html_tree('https://www.apple.com/retail/fifthavenue/'), 'image-hero')[0]
    >>> css_rules = tinycss2.parse_stylesheet(stylesheet, skip_comments=True, skip_whitespace=True)
    >>> find_urls_in_css_rules('image-hero', css_rules)[1]
    'https://www.apple.com/retail/fifthavenue/images/hero_large_2x.jpg'
    """
    urls = []
    for rule in css_rules:
        if rule.type == 'qualified-rule':
            url = _get_url_if_contains_token_name(name, rule)
            urls.append(_format_url(url)) if url else None
        elif rule.type == 'at-rule' and rule.at_keyword == 'media':
            nested_rules = tinycss2.parse_rule_list(rule.content,
                                                    skip_whitespace=True)
            urls.extend(find_urls_in_css_rules(name, nested_rules))
    return urls
Exemple #8
0
    def extract_inline_style_source(self, style):
        """
        Extracts sources for an inline style tag

        Parses inline style tags in order to extract sources that are relevant
        to the style-src directive.

        :param style: style tag to extract the source from
        :return: None
        """
        parsed = tinycss2.parse_rule_list(style.text)
        for node in parsed:
            if isinstance(node, tinycss2.ast.AtRule):
                print(node, type(node))
                if node.lower_at_keyword == 'import':
                    print('import')
                    self.find_import_source(node)
                if node.lower_at_keyword == 'font-face':
                    print('font-face')
                    self.find_fontface_source(node)
Exemple #9
0
    def __init__(self, rule, parent_style_sheet=None, parent_rule=None):
        """Constructs a CSSMediaRule object.

        Arguments:
            rule: A parsed CSS at-rule object.
            parent_style_sheet (CSSStyleSheet, optional): The parent CSS style
                sheet.
            parent_rule (CSSRule, optional): The parent CSS rule.
        """
        super().__init__(rule,
                         CSSRule.MEDIA_RULE,
                         parent_style_sheet=parent_style_sheet,
                         parent_rule=parent_rule)
        self._media = MediaList()
        self._media.media_text = tinycss2.serialize(rule.prelude)
        rules = tinycss2.parse_rule_list(rule.content,
                                         skip_comments=True,
                                         skip_whitespace=True)
        css_rules = CSSParser.parse_rules(
            rules, parent_style_sheet=parent_style_sheet, parent_rule=self)
        self.css_rules.extend(css_rules)
Exemple #10
0
 def _parse_content(self, content):
     nodes = tinycss2.parse_rule_list(content,
                                      skip_comments=True,
                                      skip_whitespace=True)
     for node in nodes:
         if node.type == 'at-rule':
             # tinycss2.parse_one_component_value() returns ParseError
             feature_type = node.lower_at_keyword
             name = None
             feature_values = None
             for token in node.content:
                 if token.type == 'ident':
                     name = token.value
                 elif token.type == 'literal' and token.value == ':':
                     if name is not None:
                         feature_values = list()
                 elif token.type == 'number' and token.is_integer:
                     if feature_values is not None and token.int_value >= 0:
                         feature_values.append(token.int_value)
                 elif token.type == 'literal' and token.value == ';':
                     if (name is not None and feature_values is not None
                             and len(feature_values) > 0):
                         if feature_type == 'annotation':
                             self._annotation[name] = feature_values
                         elif feature_type == 'character-variant':
                             self._character_variant[name] = feature_values
                         elif feature_type == 'ornaments':
                             self._ornaments[name] = feature_values
                         elif feature_type == 'styleset':
                             self._styleset[name] = feature_values
                         elif feature_type == 'stylistic':
                             self._stylistic[name] = feature_values
                         elif feature_type == 'swash':
                             self._swash[name] = feature_values
                     name = None
                     feature_values = None
Exemple #11
0
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
                          url_fetcher, matcher, page_rules, fonts,
                          font_config, ignore_imports=False):
    """Do the work that can be done early on stylesheet, before they are
    in a document.

    """
    for rule in stylesheet_rules:
        if getattr(rule, 'content', None) is None and (
                rule.type != 'at-rule' or rule.lower_at_keyword != 'import'):
            continue

        if rule.type == 'qualified-rule':
            declarations = list(preprocess_declarations(
                base_url, tinycss2.parse_declaration_list(rule.content)))
            if declarations:
                logger_level = WARNING
                try:
                    selectors = cssselect2.compile_selector_list(rule.prelude)
                    for selector in selectors:
                        matcher.add_selector(selector, declarations)
                        if selector.pseudo_element not in PSEUDO_ELEMENTS:
                            if selector.pseudo_element.startswith('-'):
                                logger_level = DEBUG
                                raise cssselect2.SelectorError(
                                    'ignored prefixed pseudo-element: %s'
                                    % selector.pseudo_element)
                            else:
                                raise cssselect2.SelectorError(
                                    'unknown pseudo-element: %s'
                                    % selector.pseudo_element)
                    ignore_imports = True
                except cssselect2.SelectorError as exc:
                    LOGGER.log(
                        logger_level,
                        "Invalid or unsupported selector '%s', %s",
                        tinycss2.serialize(rule.prelude), exc)
                    continue
            else:
                ignore_imports = True

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'import':
            if ignore_imports:
                LOGGER.warning('@import rule "%s" not at the beginning of the '
                               'the whole rule was ignored at %s:%s.',
                               tinycss2.serialize(rule.prelude),
                               rule.source_line, rule.source_column)
                continue

            tokens = remove_whitespace(rule.prelude)
            if tokens and tokens[0].type in ('url', 'string'):
                url = tokens[0].value
            else:
                continue
            media = media_queries.parse_media_query(tokens[1:])
            if media is None:
                LOGGER.warning('Invalid media type "%s" '
                               'the whole @import rule was ignored at %s:%s.',
                               tinycss2.serialize(rule.prelude),
                               rule.source_line, rule.source_column)
                continue
            if not media_queries.evaluate_media_query(
                    media, device_media_type):
                continue
            url = url_join(
                base_url, url, allow_relative=False,
                context='@import at %s:%s',
                context_args=(rule.source_line, rule.source_column))
            if url is not None:
                try:
                    CSS(
                        url=url, url_fetcher=url_fetcher,
                        media_type=device_media_type, font_config=font_config,
                        matcher=matcher, page_rules=page_rules)
                except URLFetchingError as exc:
                    LOGGER.error(
                        'Failed to load stylesheet at %s : %s', url, exc)

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'media':
            media = media_queries.parse_media_query(rule.prelude)
            if media is None:
                LOGGER.warning('Invalid media type "%s" '
                               'the whole @media rule was ignored at %s:%s.',
                               tinycss2.serialize(rule.prelude),
                               rule.source_line, rule.source_column)
                continue
            ignore_imports = True
            if not media_queries.evaluate_media_query(
                    media, device_media_type):
                continue
            content_rules = tinycss2.parse_rule_list(rule.content)
            preprocess_stylesheet(
                device_media_type, base_url, content_rules, url_fetcher,
                matcher, page_rules, fonts, font_config, ignore_imports=True)

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'page':
            data = parse_page_selectors(rule)

            if data is None:
                LOGGER.warning(
                    'Unsupported @page selector "%s", '
                    'the whole @page rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude),
                    rule.source_line, rule.source_column)
                continue

            ignore_imports = True
            for page_type in data:
                specificity = page_type.pop('specificity')
                page_type = PageType(**page_type)
                content = tinycss2.parse_declaration_list(rule.content)
                declarations = list(preprocess_declarations(base_url, content))

                if declarations:
                    selector_list = [(specificity, None, page_type)]
                    page_rules.append((rule, selector_list, declarations))

                for margin_rule in content:
                    if margin_rule.type != 'at-rule' or (
                            margin_rule.content is None):
                        continue
                    declarations = list(preprocess_declarations(
                        base_url,
                        tinycss2.parse_declaration_list(margin_rule.content)))
                    if declarations:
                        selector_list = [(
                            specificity, '@' + margin_rule.lower_at_keyword,
                            page_type)]
                        page_rules.append(
                            (margin_rule, selector_list, declarations))

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'font-face':
            ignore_imports = True
            content = tinycss2.parse_declaration_list(rule.content)
            rule_descriptors = dict(preprocess_descriptors(base_url, content))
            for key in ('src', 'font_family'):
                if key not in rule_descriptors:
                    LOGGER.warning(
                        "Missing %s descriptor in '@font-face' rule at %s:%s",
                        key.replace('_', '-'),
                        rule.source_line, rule.source_column)
                    break
            else:
                if font_config is not None:
                    font_filename = font_config.add_font_face(
                        rule_descriptors, url_fetcher)
                    if font_filename:
                        fonts.append(font_filename)
Exemple #12
0
def preprocess_stylesheet(device_media_type,
                          base_url,
                          stylesheet_rules,
                          url_fetcher,
                          matcher,
                          page_rules,
                          fonts,
                          font_config,
                          counter_style,
                          ignore_imports=False):
    """Do the work that can be done early on stylesheet, before they are
    in a document.

    """
    for rule in stylesheet_rules:
        if getattr(rule, 'content', None) is None and (
                rule.type != 'at-rule' or rule.lower_at_keyword != 'import'):
            continue

        if rule.type == 'qualified-rule':
            declarations = list(
                preprocess_declarations(
                    base_url, tinycss2.parse_declaration_list(rule.content)))
            if declarations:
                logger_level = WARNING
                try:
                    selectors = cssselect2.compile_selector_list(rule.prelude)
                    for selector in selectors:
                        matcher.add_selector(selector, declarations)
                        if selector.pseudo_element not in PSEUDO_ELEMENTS:
                            if selector.pseudo_element.startswith('-'):
                                logger_level = DEBUG
                                raise cssselect2.SelectorError(
                                    'ignored prefixed pseudo-element: '
                                    f'{selector.pseudo_element}')
                            else:
                                raise cssselect2.SelectorError(
                                    'unknown pseudo-element: '
                                    f'{selector.pseudo_element}')
                    ignore_imports = True
                except cssselect2.SelectorError as exc:
                    LOGGER.log(logger_level,
                               "Invalid or unsupported selector '%s', %s",
                               tinycss2.serialize(rule.prelude), exc)
                    continue
            else:
                ignore_imports = True

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'import':
            if ignore_imports:
                LOGGER.warning(
                    '@import rule %r not at the beginning of the '
                    'the whole rule was ignored at %d:%d.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue

            tokens = remove_whitespace(rule.prelude)
            url = None
            if tokens:
                if tokens[0].type == 'string':
                    url = url_join(base_url,
                                   tokens[0].value,
                                   allow_relative=False,
                                   context='@import at %s:%s',
                                   context_args=(rule.source_line,
                                                 rule.source_column))
                else:
                    url_tuple = get_url(tokens[0], base_url)
                    if url_tuple and url_tuple[1][0] == 'external':
                        url = url_tuple[1][1]
            if url is None:
                continue
            media = media_queries.parse_media_query(tokens[1:])
            if media is None:
                LOGGER.warning(
                    'Invalid media type %r '
                    'the whole @import rule was ignored at %d:%d.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            if not media_queries.evaluate_media_query(media,
                                                      device_media_type):
                continue
            if url is not None:
                try:
                    CSS(url=url,
                        url_fetcher=url_fetcher,
                        media_type=device_media_type,
                        font_config=font_config,
                        counter_style=counter_style,
                        matcher=matcher,
                        page_rules=page_rules)
                except URLFetchingError as exc:
                    LOGGER.error('Failed to load stylesheet at %s : %s', url,
                                 exc)

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'media':
            media = media_queries.parse_media_query(rule.prelude)
            if media is None:
                LOGGER.warning(
                    'Invalid media type %r '
                    'the whole @media rule was ignored at %d:%d.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            ignore_imports = True
            if not media_queries.evaluate_media_query(media,
                                                      device_media_type):
                continue
            content_rules = tinycss2.parse_rule_list(rule.content)
            preprocess_stylesheet(device_media_type,
                                  base_url,
                                  content_rules,
                                  url_fetcher,
                                  matcher,
                                  page_rules,
                                  fonts,
                                  font_config,
                                  counter_style,
                                  ignore_imports=True)

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'page':
            data = parse_page_selectors(rule)

            if data is None:
                LOGGER.warning(
                    'Unsupported @page selector %r, '
                    'the whole @page rule was ignored at %d:%d.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue

            ignore_imports = True
            for page_type in data:
                specificity = page_type.pop('specificity')
                page_type = PageType(**page_type)
                content = tinycss2.parse_declaration_list(rule.content)
                declarations = list(preprocess_declarations(base_url, content))

                if declarations:
                    selector_list = [(specificity, None, page_type)]
                    page_rules.append((rule, selector_list, declarations))

                for margin_rule in content:
                    if margin_rule.type != 'at-rule' or (margin_rule.content is
                                                         None):
                        continue
                    declarations = list(
                        preprocess_declarations(
                            base_url,
                            tinycss2.parse_declaration_list(
                                margin_rule.content)))
                    if declarations:
                        selector_list = [
                            (specificity, f'@{margin_rule.lower_at_keyword}',
                             page_type)
                        ]
                        page_rules.append(
                            (margin_rule, selector_list, declarations))

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'font-face':
            ignore_imports = True
            content = tinycss2.parse_declaration_list(rule.content)
            rule_descriptors = dict(
                preprocess_descriptors('font-face', base_url, content))
            for key in ('src', 'font_family'):
                if key not in rule_descriptors:
                    LOGGER.warning(
                        "Missing %s descriptor in '@font-face' rule at %d:%d",
                        key.replace('_', '-'), rule.source_line,
                        rule.source_column)
                    break
            else:
                if font_config is not None:
                    font_filename = font_config.add_font_face(
                        rule_descriptors, url_fetcher)
                    if font_filename:
                        fonts.append(font_filename)

        elif (rule.type == 'at-rule'
              and rule.lower_at_keyword == 'counter-style'):
            name = counters.parse_counter_style_name(rule.prelude,
                                                     counter_style)
            if name is None:
                LOGGER.warning(
                    'Invalid counter style name %r, the whole '
                    '@counter-style rule was ignored at %d:%d.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue

            ignore_imports = True
            content = tinycss2.parse_declaration_list(rule.content)
            counter = {
                'system': None,
                'negative': None,
                'prefix': None,
                'suffix': None,
                'range': None,
                'pad': None,
                'fallback': None,
                'symbols': None,
                'additive_symbols': None,
            }
            rule_descriptors = dict(
                preprocess_descriptors('counter-style', base_url, content))

            for descriptor_name, descriptor_value in rule_descriptors.items():
                counter[descriptor_name] = descriptor_value

            if counter['system'] is None:
                system = (None, 'symbolic', None)
            else:
                system = counter['system']

            if system[0] is None:
                if system[1] in ('cyclic', 'fixed', 'symbolic'):
                    if len(counter['symbols'] or []) < 1:
                        LOGGER.warning(
                            'In counter style %r at %d:%d, '
                            'counter style %r needs at least one symbol', name,
                            rule.source_line, rule.source_column, system[1])
                        continue
                elif system[1] in ('alphabetic', 'numeric'):
                    if len(counter['symbols'] or []) < 2:
                        LOGGER.warning(
                            'In counter style %r at %d:%d, '
                            'counter style %r needs at least two symbols',
                            name, rule.source_line, rule.source_column,
                            system[1])
                        continue
                elif system[1] == 'additive':
                    if len(counter['additive_symbols'] or []) < 2:
                        LOGGER.warning(
                            'In counter style %r at %d:%d, '
                            'counter style "additive" '
                            'needs at least two additive symbols', name,
                            rule.source_line, rule.source_column)
                        continue

            counter_style[name] = counter
Exemple #13
0
def test_bad_unicode():
    parse_one_declaration('background:\udca9')
    parse_rule_list('@\udca9')
Exemple #14
0
def test_serialize_rules():
    source = '@import "a.css"; foo#bar.baz { color: red } /**/ @media print{}'
    rules = parse_rule_list(source)
    assert serialize(rules) == source
Exemple #15
0
def test_rule_list(input):
    return parse_rule_list(input, **SKIP)
Exemple #16
0
def preprocess_stylesheet(device_media_type,
                          base_url,
                          stylesheet_rules,
                          url_fetcher,
                          matcher,
                          page_rules,
                          fonts,
                          font_config,
                          ignore_imports=False):
    """Do the work that can be done early on stylesheet, before they are
    in a document.

    """
    for rule in stylesheet_rules:
        if getattr(rule, 'content', None) is None and (
                rule.type != 'at-rule' or rule.lower_at_keyword != 'import'):
            continue

        if rule.type == 'qualified-rule':
            declarations = list(
                preprocess_declarations(
                    base_url, tinycss2.parse_declaration_list(rule.content)))
            if declarations:
                logger_level = WARNING
                try:
                    selectors = cssselect2.compile_selector_list(rule.prelude)
                    for selector in selectors:
                        matcher.add_selector(selector, declarations)
                        if selector.pseudo_element not in PSEUDO_ELEMENTS:
                            if selector.pseudo_element.startswith('-'):
                                logger_level = DEBUG
                                raise cssselect2.SelectorError(
                                    'ignored prefixed pseudo-element: %s' %
                                    selector.pseudo_element)
                            else:
                                raise cssselect2.SelectorError(
                                    'unknown pseudo-element: %s' %
                                    selector.pseudo_element)
                    ignore_imports = True
                except cssselect2.SelectorError as exc:
                    LOGGER.log(logger_level,
                               "Invalid or unsupported selector '%s', %s",
                               tinycss2.serialize(rule.prelude), exc)
                    continue
            else:
                ignore_imports = True

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'import':
            if ignore_imports:
                LOGGER.warning(
                    '@import rule "%s" not at the beginning of the '
                    'the whole rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue

            tokens = remove_whitespace(rule.prelude)
            if tokens and tokens[0].type in ('url', 'string'):
                url = tokens[0].value
            else:
                continue
            media = parse_media_query(tokens[1:])
            if media is None:
                LOGGER.warning(
                    'Invalid media type "%s" '
                    'the whole @import rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            if not evaluate_media_query(media, device_media_type):
                continue
            url = url_join(base_url,
                           url,
                           allow_relative=False,
                           context='@import at %s:%s',
                           context_args=(rule.source_line, rule.source_column))
            if url is not None:
                try:
                    CSS(url=url,
                        url_fetcher=url_fetcher,
                        media_type=device_media_type,
                        font_config=font_config,
                        matcher=matcher,
                        page_rules=page_rules)
                except URLFetchingError as exc:
                    LOGGER.error('Failed to load stylesheet at %s : %s', url,
                                 exc)

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'media':
            media = parse_media_query(rule.prelude)
            if media is None:
                LOGGER.warning(
                    'Invalid media type "%s" '
                    'the whole @media rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            ignore_imports = True
            if not evaluate_media_query(media, device_media_type):
                continue
            content_rules = tinycss2.parse_rule_list(rule.content)
            preprocess_stylesheet(device_media_type,
                                  base_url,
                                  content_rules,
                                  url_fetcher,
                                  matcher,
                                  page_rules,
                                  fonts,
                                  font_config,
                                  ignore_imports=True)

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'page':
            data = parse_page_selectors(rule)

            if data is None:
                LOGGER.warning(
                    'Unsupported @page selector "%s", '
                    'the whole @page rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue

            ignore_imports = True
            for page_type in data:
                specificity = page_type.pop('specificity')
                page_type = PageType(**page_type)
                # Use a double lambda to have a closure that holds page_types
                match = (lambda page_type: lambda page_names: list(
                    matching_page_types(page_type, names=page_names))
                         )(page_type)
                content = tinycss2.parse_declaration_list(rule.content)
                declarations = list(preprocess_declarations(base_url, content))

                if declarations:
                    selector_list = [(specificity, None, match)]
                    page_rules.append((rule, selector_list, declarations))

                for margin_rule in content:
                    if margin_rule.type != 'at-rule' or (margin_rule.content is
                                                         None):
                        continue
                    declarations = list(
                        preprocess_declarations(
                            base_url,
                            tinycss2.parse_declaration_list(
                                margin_rule.content)))
                    if declarations:
                        selector_list = [
                            (specificity, '@' + margin_rule.lower_at_keyword,
                             match)
                        ]
                        page_rules.append(
                            (margin_rule, selector_list, declarations))

        elif rule.type == 'at-rule' and rule.lower_at_keyword == 'font-face':
            ignore_imports = True
            content = tinycss2.parse_declaration_list(rule.content)
            rule_descriptors = dict(preprocess_descriptors(base_url, content))
            for key in ('src', 'font_family'):
                if key not in rule_descriptors:
                    LOGGER.warning(
                        "Missing %s descriptor in '@font-face' rule at %s:%s",
                        key.replace('_', '-'), rule.source_line,
                        rule.source_column)
                    break
            else:
                if font_config is not None:
                    font_filename = font_config.add_font_face(
                        rule_descriptors, url_fetcher)
                    if font_filename:
                        fonts.append(font_filename)
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
                          url_fetcher, rules, fonts, font_config):
    """Do the work that can be done early on stylesheet, before they are
    in a document.

    """
    selector_to_xpath = cssselect.HTMLTranslator().selector_to_xpath
    for rule in stylesheet_rules:
        if rule.type == 'qualified-rule':
            declarations = list(
                preprocess_declarations(
                    base_url, tinycss2.parse_declaration_list(rule.content)))
            if declarations:
                selector_string = tinycss2.serialize(rule.prelude)
                try:
                    selector_list = []
                    for selector in cssselect.parse(selector_string):
                        xpath = selector_to_xpath(selector)
                        try:
                            lxml_xpath = lxml.etree.XPath(xpath)
                        except ValueError as exc:
                            # TODO: Some characters are not supported by lxml's
                            # XPath implementation (including control
                            # characters), but these characters are valid in
                            # the CSS2.1 specification.
                            raise cssselect.SelectorError(str(exc))
                        selector_list.append(
                            Selector((0, ) + selector.specificity(),
                                     selector.pseudo_element, lxml_xpath))
                    for selector in selector_list:
                        if selector.pseudo_element not in PSEUDO_ELEMENTS:
                            raise cssselect.ExpressionError(
                                'Unknown pseudo-element: %s' %
                                selector.pseudo_element)
                except cssselect.SelectorError as exc:
                    LOGGER.warning("Invalid or unsupported selector '%s', %s",
                                   selector_string, exc)
                    continue
                rules.append((rule, selector_list, declarations))

        elif rule.type == 'at-rule' and rule.at_keyword == 'import':
            tokens = remove_whitespace(rule.prelude)
            if tokens and tokens[0].type in ('url', 'string'):
                url = tokens[0].value
            else:
                continue
            media = parse_media_query(tokens[1:])
            if media is None:
                LOGGER.warning(
                    'Invalid media type "%s" '
                    'the whole @import rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
            if not evaluate_media_query(media, device_media_type):
                continue
            url = url_join(base_url,
                           url,
                           allow_relative=False,
                           context='@import at %s:%s',
                           context_args=(rule.source_line, rule.source_column))
            if url is not None:
                try:
                    stylesheet = CSS(url=url,
                                     url_fetcher=url_fetcher,
                                     media_type=device_media_type,
                                     font_config=font_config)
                except URLFetchingError as exc:
                    LOGGER.warning('Failed to load stylesheet at %s : %s', url,
                                   exc)
                else:
                    for result in stylesheet.rules:
                        rules.append(result)

        elif rule.type == 'at-rule' and rule.at_keyword == 'media':
            media = parse_media_query(rule.prelude)
            if media is None:
                LOGGER.warning(
                    'Invalid media type "%s" '
                    'the whole @media rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            if not evaluate_media_query(media, device_media_type):
                continue
            content_rules = tinycss2.parse_rule_list(rule.content)
            preprocess_stylesheet(device_media_type, base_url, content_rules,
                                  url_fetcher, rules, fonts, font_config)

        elif rule.type == 'at-rule' and rule.at_keyword == 'page':
            tokens = remove_whitespace(rule.prelude)
            # TODO: support named pages (see CSS3 Paged Media)
            if not tokens:
                pseudo_class = None
                specificity = (0, 0)
            elif (len(tokens) == 2 and tokens[0].type == 'literal'
                  and tokens[0].value == ':' and tokens[1].type == 'ident'):
                pseudo_class = tokens[1].lower_value
                specificity = {
                    'first': (1, 0),
                    'blank': (1, 0),
                    'left': (0, 1),
                    'right': (0, 1),
                }.get(pseudo_class)
                if not specificity:
                    LOGGER.warning(
                        'Unknown @page pseudo-class "%s", '
                        'the whole @page rule was ignored '
                        'at %s:%s.', pseudo_class, rule.source_line,
                        rule.source_column)
                    continue
            else:
                LOGGER.warning(
                    'Unsupported @page selector "%s", '
                    'the whole @page rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            content = tinycss2.parse_declaration_list(rule.content)
            declarations = list(preprocess_declarations(base_url, content))

            # Use a double lambda to have a closure that holds page_types
            match = (lambda page_types: lambda _document: page_types)(
                PAGE_PSEUDOCLASS_TARGETS[pseudo_class])

            if declarations:
                selector_list = [Selector(specificity, None, match)]
                rules.append((rule, selector_list, declarations))

            for margin_rule in content:
                if margin_rule.type != 'at-rule':
                    continue
                declarations = list(
                    preprocess_declarations(
                        base_url,
                        tinycss2.parse_declaration_list(margin_rule.content)))
                if declarations:
                    selector_list = [
                        Selector(specificity, '@' + margin_rule.at_keyword,
                                 match)
                    ]
                    rules.append((margin_rule, selector_list, declarations))

        elif rule.type == 'at-rule' and rule.at_keyword == 'font-face':
            content = tinycss2.parse_declaration_list(rule.content)
            rule_descriptors = dict(preprocess_descriptors(base_url, content))
            for key in ('src', 'font_family'):
                if key not in rule_descriptors:
                    LOGGER.warning(
                        "Missing %s descriptor in '@font-face' rule at %s:%s",
                        key.replace('_', '-'), rule.source_line,
                        rule.source_column)
                    break
            else:
                if font_config is not None:
                    font_filename = font_config.add_font_face(
                        rule_descriptors, url_fetcher)
                    if font_filename:
                        fonts.append(font_filename)
Exemple #18
0
def preprocess_stylesheet(device_media_type, base_url, stylesheet_rules,
                          url_fetcher, matcher, page_rules, fonts,
                          font_config):
    """Do the work that can be done early on stylesheet, before they are
    in a document.

    """
    for rule in stylesheet_rules:
        if rule.type == 'qualified-rule':
            declarations = list(
                preprocess_declarations(
                    base_url, tinycss2.parse_declaration_list(rule.content)))
            if declarations:
                try:
                    selectors = cssselect2.compile_selector_list(rule.prelude)
                    for selector in selectors:
                        matcher.add_selector(selector, declarations)
                        if selector.pseudo_element not in PSEUDO_ELEMENTS:
                            raise cssselect2.SelectorError(
                                'Unknown pseudo-element: %s' %
                                selector.pseudo_element)
                except cssselect2.SelectorError as exc:
                    LOGGER.warning("Invalid or unsupported selector '%s', %s",
                                   tinycss2.serialize(rule.prelude), exc)
                    continue

        elif rule.type == 'at-rule' and rule.at_keyword == 'import':
            tokens = remove_whitespace(rule.prelude)
            if tokens and tokens[0].type in ('url', 'string'):
                url = tokens[0].value
            else:
                continue
            media = parse_media_query(tokens[1:])
            if media is None:
                LOGGER.warning(
                    'Invalid media type "%s" '
                    'the whole @import rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
            if not evaluate_media_query(media, device_media_type):
                continue
            url = url_join(base_url,
                           url,
                           allow_relative=False,
                           context='@import at %s:%s',
                           context_args=(rule.source_line, rule.source_column))
            if url is not None:
                try:
                    CSS(url=url,
                        url_fetcher=url_fetcher,
                        media_type=device_media_type,
                        font_config=font_config,
                        matcher=matcher,
                        page_rules=page_rules)
                except URLFetchingError as exc:
                    LOGGER.error('Failed to load stylesheet at %s : %s', url,
                                 exc)

        elif rule.type == 'at-rule' and rule.at_keyword == 'media':
            media = parse_media_query(rule.prelude)
            if media is None:
                LOGGER.warning(
                    'Invalid media type "%s" '
                    'the whole @media rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            if not evaluate_media_query(media, device_media_type):
                continue
            content_rules = tinycss2.parse_rule_list(rule.content)
            preprocess_stylesheet(device_media_type, base_url, content_rules,
                                  url_fetcher, matcher, page_rules, fonts,
                                  font_config)

        elif rule.type == 'at-rule' and rule.at_keyword == 'page':
            tokens = remove_whitespace(rule.prelude)
            types = {
                'side': None,
                'blank': False,
                'first': False,
                'name': None
            }
            # TODO: Specificity is probably wrong, should clean and test that.
            if not tokens:
                specificity = (0, 0, 0)
            elif (len(tokens) == 2 and tokens[0].type == 'literal'
                  and tokens[0].value == ':' and tokens[1].type == 'ident'):
                pseudo_class = tokens[1].lower_value
                if pseudo_class in ('left', 'right'):
                    types['side'] = pseudo_class
                    specificity = (0, 0, 1)
                elif pseudo_class in ('blank', 'first'):
                    types[pseudo_class] = True
                    specificity = (0, 1, 0)
                else:
                    LOGGER.warning(
                        'Unknown @page pseudo-class "%s", '
                        'the whole @page rule was ignored '
                        'at %s:%s.', pseudo_class, rule.source_line,
                        rule.source_column)
                    continue
            elif len(tokens) == 1 and tokens[0].type == 'ident':
                types['name'] = tokens[0].value
                specificity = (1, 0, 0)
            else:
                LOGGER.warning(
                    'Unsupported @page selector "%s", '
                    'the whole @page rule was ignored at %s:%s.',
                    tinycss2.serialize(rule.prelude), rule.source_line,
                    rule.source_column)
                continue
            page_type = PageType(**types)
            # Use a double lambda to have a closure that holds page_types
            match = (lambda page_type: lambda page_names: list(
                matching_page_types(page_type, names=page_names)))(page_type)
            content = tinycss2.parse_declaration_list(rule.content)
            declarations = list(preprocess_declarations(base_url, content))

            if declarations:
                selector_list = [(specificity, None, match)]
                page_rules.append((rule, selector_list, declarations))

            for margin_rule in content:
                if margin_rule.type != 'at-rule':
                    continue
                declarations = list(
                    preprocess_declarations(
                        base_url,
                        tinycss2.parse_declaration_list(margin_rule.content)))
                if declarations:
                    selector_list = [(specificity,
                                      '@' + margin_rule.at_keyword, match)]
                    page_rules.append(
                        (margin_rule, selector_list, declarations))

        elif rule.type == 'at-rule' and rule.at_keyword == 'font-face':
            content = tinycss2.parse_declaration_list(rule.content)
            rule_descriptors = dict(preprocess_descriptors(base_url, content))
            for key in ('src', 'font_family'):
                if key not in rule_descriptors:
                    LOGGER.warning(
                        "Missing %s descriptor in '@font-face' rule at %s:%s",
                        key.replace('_', '-'), rule.source_line,
                        rule.source_column)
                    break
            else:
                if font_config is not None:
                    font_filename = font_config.add_font_face(
                        rule_descriptors, url_fetcher)
                    if font_filename:
                        fonts.append(font_filename)