예제 #1
0
def parse_stylesheets(tree, url, session=None):
    """Find and parse the stylesheets in ``tree``.

    Return two :class:`cssselect2.Matcher` objects,
    for normal and !important declarations.

    """
    normal_matcher = cssselect2.Matcher()
    important_matcher = cssselect2.Matcher()
    for stylesheet in find_stylesheets(tree):
        for rule in find_stylesheets_rules(tree,
                                           stylesheet,
                                           url,
                                           session=session):
            normal_declarations, important_declarations = parse_declarations(
                rule.content)
            for selector in cssselect2.compile_selector_list(rule.prelude):
                if (selector.pseudo_element is None
                        and not selector.never_matches):
                    if normal_declarations:
                        normal_matcher.add_selector(selector,
                                                    normal_declarations)
                    if important_declarations:
                        important_matcher.add_selector(selector,
                                                       important_declarations)
    return normal_matcher, important_matcher
예제 #2
0
def parse_stylesheets(tree, url):
    """Find stylesheets and return rule matchers in given tree."""
    normal_matcher = cssselect2.Matcher()
    important_matcher = cssselect2.Matcher()

    # Find stylesheets
    # TODO: support contentStyleType on <svg>
    stylesheets = []
    for element in tree.etree_element.iter():
        # http://www.w3.org/TR/SVG/styling.html#StyleElement
        if (element.tag == '{http://www.w3.org/2000/svg}style' and
                element.get('type', 'text/css') == 'text/css' and
                element.text):
            # TODO: pass href for relative URLs
            # TODO: support media types
            # TODO: what if <style> has children elements?
            stylesheets.append(tinycss2.parse_stylesheet(
                element.text, skip_comments=True, skip_whitespace=True))

    # Parse rules and fill matchers
    for stylesheet in stylesheets:
        for rule in find_stylesheets_rules(tree, stylesheet, url):
            normal_declarations, important_declarations = parse_declarations(
                rule.content)
            for selector in cssselect2.compile_selector_list(rule.prelude):
                if (selector.pseudo_element is None and
                        not selector.never_matches):
                    if normal_declarations:
                        normal_matcher.add_selector(
                            selector, normal_declarations)
                    if important_declarations:
                        important_matcher.add_selector(
                            selector, important_declarations)

    return normal_matcher, important_matcher
예제 #3
0
 def __init__(self, guess=None, filename=None, url=None, file_obj=None,
              string=None, encoding=None, base_url=None,
              url_fetcher=default_url_fetcher, _check_mime_type=False,
              media_type='print', font_config=None, counter_style=None,
              matcher=None, page_rules=None):
     PROGRESS_LOGGER.info(
         'Step 2 - Fetching and parsing CSS - %s',
         filename or url or getattr(file_obj, 'name', 'CSS string'))
     result = _select_source(
         guess, filename, url, file_obj, string,
         base_url=base_url, url_fetcher=url_fetcher,
         check_css_mime_type=_check_mime_type)
     with result as (source_type, source, base_url, protocol_encoding):
         if source_type == 'string' and not isinstance(source, bytes):
             # unicode, no encoding
             stylesheet = tinycss2.parse_stylesheet(source)
         else:
             if source_type == 'file_obj':
                 source = source.read()
             stylesheet, encoding = tinycss2.parse_stylesheet_bytes(
                 source, environment_encoding=encoding,
                 protocol_encoding=protocol_encoding)
     self.base_url = base_url
     self.matcher = matcher or cssselect2.Matcher()
     self.page_rules = [] if page_rules is None else page_rules
     self.fonts = []
     preprocess_stylesheet(
         media_type, base_url, stylesheet, url_fetcher, self.matcher,
         self.page_rules, self.fonts, font_config, counter_style)
예제 #4
0
def select(rule: str, element: et.Element) -> 'PQuery':
    rules = tinycss2.parse_stylesheet(rule + ' {}', skip_whitespace=True)
    matcher = cssselect2.Matcher()

    for rule in rules:
        selectors = cssselect2.compile_selector_list(rule.prelude)
        for selector in selectors:
            matcher.add_selector(selector, None)

    wrapper = cssselect2.ElementWrapper.from_html_root(element)

    matching_elements: List[et.Element] = []

    for element in wrapper.iter_subtree():
        matches = matcher.match(element)
        if matches:
            matching_elements.append(element.etree_element)

    return PQuery(matching_elements)
예제 #5
0
파일: example.py 프로젝트: Kozea/cssselect2
from xml.etree import ElementTree

import cssselect2
import tinycss2

# Parse CSS and add rules to the matcher

matcher = cssselect2.Matcher()

rules = tinycss2.parse_stylesheet('''
  body { font-size: 2em }
  body p { background: red }
  p { color: blue }
''', skip_whitespace=True)

for rule in rules:
    selectors = cssselect2.compile_selector_list(rule.prelude)
    selector_string = tinycss2.serialize(rule.prelude)
    content_string = tinycss2.serialize(rule.content)
    payload = (selector_string, content_string)
    for selector in selectors:
        matcher.add_selector(selector, payload)


# Parse HTML and find CSS rules applying to each tag

html_tree = ElementTree.fromstring('''
  <html>
    <body>
      <p>Test</p>
    </body>