Beispiel #1
0
def _load_css(user):
    """
    Loads default.css and user.css style sheets, and merges them into a
    complete css  like this:
    - if a css node exists in both, the one defined in user.css is taken
      into resulting style sheet
    - if a css node exists in only one of the sheets, it is taken into
      resulting style sheet
    """
    css_default = None
    css_user = None

    try:
        with open(os.path.join("users", "default.css"), "rb") as css_file:
            css_default = css_file.read()
    except OSError:
        pass

    try:
        with open(os.path.join("users", user + ".css"), "rb") as css_file:
            css_user = css_file.read()
    except OSError:
        pass

    if not css_default and not css_user:
        return ""

    rules = dict()
    if css_default:
        rules_default, _ = tinycss2.parse_stylesheet_bytes(
            css_default, skip_whitespace=True, skip_comments=True)
        for rule in rules_default:
            if rule.type == "qualified-rule":
                rules[str(rule.prelude)] = rule

    if css_user:
        rules_user, _ = tinycss2.parse_stylesheet_bytes(css_user,
                                                        skip_whitespace=True,
                                                        skip_comments=True)
        for rule in rules_user:
            if rule.type == "qualified-rule":
                rules[str(rule.prelude)] = rule

    css = ""
    for rule in rules.values():
        css += f"\n{rule.serialize()}"

    return css
Beispiel #2
0
    def _process_nbconvert_css(self, css):
        print("Processing downloaded ipython notebook CSS.")
        try:
            css = css.split(IPYTHON_NOTEBOOK_DECLARE_STR.encode())[1]
            css = IPYTHON_NOTEBOOK_DECLARE_STR.encode() + css
        except IndexError:
            raise ValueError("Bad splitter for notebook css %s" %
                             IPYTHON_NOTEBOOK_DECLARE_STR)

        print("Done.")
        if REPLACE_HIGHLIGHT_WITH_CODEHILITE:
            css = css.replace(HIGHLIGHT_CSS_CLASS.encode() + b" ",
                              CODEHILITE_CSS_CLASS.encode() + b" ")

        import tinycss2
        css_parsed, encoding = tinycss2.parse_stylesheet_bytes(css)
        for n in css_parsed:
            if isinstance(n, tinycss2.ast.QualifiedRule):
                n.prelude[0:0] = [
                    tinycss2.ast.LiteralToken(None, None, "."),
                    tinycss2.ast.IdentToken(None, None,
                                            "relate-notebook-container"),
                    tinycss2.ast.WhitespaceToken(None, None, " "),
                ]
        result = tinycss2.serialize(css_parsed).encode(encoding.name)
        return result
Beispiel #3
0
 def __init__(self, guess=None, filename=None, url=None, file_obj=None,
              string=None, encoding=None, base_url=None,
              url_fetcher=default_url_fetcher, _check_mime_type=False,
              media_type='print', font_config=None, matcher=None,
              page_rules=None):
     PROGRESS_LOGGER.info(
         'Step 2 - Fetching and parsing CSS - %s',
         filename or url or getattr(file_obj, 'name', 'CSS string'))
     result = _select_source(
         guess, filename, url, file_obj, string,
         base_url=base_url, url_fetcher=url_fetcher,
         check_css_mime_type=_check_mime_type)
     with result as (source_type, source, base_url, protocol_encoding):
         if source_type == 'string' and not isinstance(source, bytes):
             # unicode, no encoding
             stylesheet = tinycss2.parse_stylesheet(source)
         else:
             if source_type == 'file_obj':
                 source = source.read()
             stylesheet, encoding = tinycss2.parse_stylesheet_bytes(
                 source, environment_encoding=encoding,
                 protocol_encoding=protocol_encoding)
     self.base_url = base_url
     self.matcher = matcher or cssselect2.Matcher()
     self.page_rules = [] if page_rules is None else page_rules
     # TODO: fonts are stored here and should be cleaned after rendering
     self.fonts = []
     preprocess_stylesheet(
         media_type, base_url, stylesheet, url_fetcher, self.matcher,
         self.page_rules, self.fonts, font_config)
Beispiel #4
0
def get_urls_from_css_resource(bytes_text):
    # type: (bytes) -> List[Text]
    def is_import_node(n):
        return n.type == "at-rule" and n.lower_at_keyword == "import"

    def is_font_node(n):
        return n.type == "at-rule" and n.lower_at_keyword == "font-face"

    try:
        rules, encoding = tinycss2.parse_stylesheet_bytes(css_bytes=bytes_text,
                                                          skip_comments=True,
                                                          skip_whitespace=True)
    except Exception:
        logger.error("Failed to read CSS string")
        return []
    urls = []
    for rule in rules:
        tags = rule.content
        if is_import_node(rule):
            logger.debug("The node has @import")
            tags = rule.prelude
        if is_font_node(rule):
            logger.debug("The node has @font-face")
            tags = rule.content
        if tags:
            urls.extend(list(_url_from_tags(tags)))
    return urls
Beispiel #5
0
def get_dummy_sel_rule(sel):
    '''
    '''
    input = sel + '{}'
    cssbyts = input.encode('utf-8')
    rules, codec = tycss.parse_stylesheet_bytes(cssbyts)
    return (rules[0])
    def _process_nbconvert_css(self, css):
        print("Processing downloaded ipython notebook CSS.")
        try:
            css = css.split(IPYTHON_NOTEBOOK_DECLARE_STR.encode())[1]
            css = IPYTHON_NOTEBOOK_DECLARE_STR.encode() + css
        except IndexError:
            raise ValueError("Bad splitter for notebook css %s"
                             % IPYTHON_NOTEBOOK_DECLARE_STR)

        print("Done.")
        if REPLACE_HIGHLIGHT_WITH_CODEHILITE:
            css = css.replace(HIGHLIGHT_CSS_CLASS.encode() + b" ",
                                  CODEHILITE_CSS_CLASS.encode() + b" ")

        import tinycss2
        css_parsed, encoding = tinycss2.parse_stylesheet_bytes(css)
        for n in css_parsed:
            if isinstance(n, tinycss2.ast.QualifiedRule):
                n.prelude[0:0] = [
                        tinycss2.ast.LiteralToken(None, None, "."),
                        tinycss2.ast.IdentToken(
                            None, None, "relate-notebook-container"),
                        tinycss2.ast.WhitespaceToken(None, None, " "),
                        ]
        result = tinycss2.serialize(css_parsed).encode(encoding.name)
        return result
Beispiel #7
0
def test_stylesheet_bytes(kwargs):
    kwargs['css_bytes'] = kwargs['css_bytes'].encode('latin1')
    kwargs.pop('comment', None)
    if kwargs.get('environment_encoding'):
        kwargs['environment_encoding'] = lookup(kwargs['environment_encoding'])
    kwargs.update(SKIP)
    return parse_stylesheet_bytes(**kwargs)
Beispiel #8
0
 def __init__(self, guess=None, filename=None, url=None, file_obj=None,
              string=None, encoding=None, base_url=None,
              url_fetcher=default_url_fetcher, _check_mime_type=False,
              media_type='print', font_config=None, counter_style=None,
              matcher=None, page_rules=None):
     PROGRESS_LOGGER.info(
         'Step 2 - Fetching and parsing CSS - %s',
         filename or url or getattr(file_obj, 'name', 'CSS string'))
     result = _select_source(
         guess, filename, url, file_obj, string,
         base_url=base_url, url_fetcher=url_fetcher,
         check_css_mime_type=_check_mime_type)
     with result as (source_type, source, base_url, protocol_encoding):
         if source_type == 'string' and not isinstance(source, bytes):
             # unicode, no encoding
             stylesheet = tinycss2.parse_stylesheet(source)
         else:
             if source_type == 'file_obj':
                 source = source.read()
             stylesheet, encoding = tinycss2.parse_stylesheet_bytes(
                 source, environment_encoding=encoding,
                 protocol_encoding=protocol_encoding)
     self.base_url = base_url
     self.matcher = matcher or cssselect2.Matcher()
     self.page_rules = [] if page_rules is None else page_rules
     self.fonts = []
     preprocess_stylesheet(
         media_type, base_url, stylesheet, url_fetcher, self.matcher,
         self.page_rules, self.fonts, font_config, counter_style)
Beispiel #9
0
 def get_urls_for_retrieval_from_css(self, data):
     urls = []
     css_rules, css_encoding = tinycss2.parse_stylesheet_bytes(
         data, skip_comments=True, skip_whitespace=True)
     for rule in css_rules:
         urls = urls + self.check_css_for_urls(rule.prelude)
         urls = urls + self.check_css_for_urls(rule.content)
     return urls
Beispiel #10
0
    def parse(cls,
              url,
              owner_node=None,
              parent_style_sheet=None,
              parent_rule=None,
              encoding=None):
        """Parses the CSS style sheet.

        Arguments:
            url (str): The location of the style sheet.
            owner_node (Element, optional): The owner node of the style sheet.
            parent_style_sheet (CSSStyleSheet, optional): The parent CSS style
                sheet.
            parent_rule (CSSRule, optional): The parent CSS rule.
            encoding (str, optional): An advisory character encoding for the
                referenced style sheet.
        Returns:
            CSSStyleSheet: A new CSSStyleSheet object.
        """
        extra = dict({
            'type_': None,
            'href': None,
            'owner_node': owner_node,
            'parent_style_sheet': parent_style_sheet,
            'title': None,
            'media': None,
        })
        if owner_node is not None:
            extra.update({
                'type_': owner_node.get('type'),
                'href': owner_node.get('href'),
                'title': owner_node.get('title'),
                'media': owner_node.get('media'),
            })
        css_style_sheet = CSSStyleSheet(owner_rule=parent_rule, **extra)
        logger = getLogger('{}.{}'.format(__name__, cls.__name__))
        try:
            logger.debug('urlopen \'{}\''.format(url))
            data, headers = load(url)
            if encoding is None:
                content_type = get_content_type(headers)
                if content_type is None:
                    encoding = 'utf-8'
                else:
                    encoding = content_type.get('charset', 'utf-8')
            rules, encoding = tinycss2.parse_stylesheet_bytes(
                css_bytes=data,
                protocol_encoding=encoding,
                skip_comments=True,
                skip_whitespace=True)
            css_rules = CSSParser.parse_rules(
                rules,
                parent_style_sheet=css_style_sheet,
                parent_rule=parent_rule)
            css_style_sheet.css_rules.extend(css_rules)
        except URLError as exp:
            logger.info('failed to parse: \'{}\': {}'.format(url, repr(exp)))
        return css_style_sheet
Beispiel #11
0
 def __create_tynicss_stylesheet(cls, data):
     if hasattr(data, "read"):  # is file like object
         css_parser, _ = tinycss2.parse_stylesheet_bytes(
             data.read(), skip_comments=True, skip_whitespace=True)
     else:
         css_parser = tinycss2.parse_stylesheet(data,
                                                skip_comments=True,
                                                skip_whitespace=True)
     return css_parser
Beispiel #12
0
    def css(self):
        pq = PyQuery(self.tender_src)

        for style in pq('link[rel="stylesheet"]'):
            href = style.get('href')
            if href and href.startswith('/') and not href.startswith('//'):
                resp = self.client.get(href)
                if resp.status_code == 200:
                    css = resp.content
                    self.csses.append(tinycss2.parse_stylesheet_bytes(css, skip_comments=True))
Beispiel #13
0
def parse_css_file(fname):
    with open(fname, 'r') as f:
        content = f.read()
    f.close()
    content = content.encode('utf-8')
    rules, encoding = parse_stylesheet_bytes(css_bytes=content,
                                             protocol_encoding='utf-8',
                                             environment_encoding='utf-8',
                                             skip_comments=True,
                                             skip_whitespace=True)
    return rules
def extract_css_classes_definitions(css_file):
    with open(css_file, 'rb') as open_file:
        rules, _ = parse_stylesheet_bytes(open_file.read())
    next_is_class_name = False
    while rules:
        rule = rules.pop(0)
        if rule.type == 'at-rule' and rule.content:
            rules.extend(rule.content)
        elif rule.type == 'qualified-rule':
            rules.extend(rule.prelude)
        elif rule.type == 'ident' and next_is_class_name:
            yield rule.value
        next_is_class_name = (rule.type == 'literal' and rule.value == '.')
def extract_css_classes_definitions(css_file):
    with open(css_file, 'rb') as open_file:
        rules, _ = parse_stylesheet_bytes(open_file.read())
    next_is_class_name = False
    while rules:
        rule = rules.pop(0)
        if rule.type == 'at-rule' and rule.content:
            rules.extend(rule.content)
        elif rule.type == 'qualified-rule':
            rules.extend(rule.prelude)
        elif rule.type == 'ident' and next_is_class_name:
            yield rule.value
        next_is_class_name = (rule.type == 'literal' and rule.value == '.')
Beispiel #16
0
def fromfile(filename, multiprop=False):
    """Function for parsing the CSS file

    :param filename:
        string name of file to parse
    :param multiprops:
        Argument as in CSSFile, default False
    :returns:
        a CSSFile representation of the CSS file
    """
    bytes = open(filename).read()
    stylesheet, enc = tinycss2.parse_stylesheet_bytes(bytes,
                                                      skip_whitespace=True,
                                                      skip_comments=True)
    return CSSFile(stylesheet, multiprop)
Beispiel #17
0
 def get_font_file(self, html):
     soup = BeautifulSoup(html, 'html.parser')
     urls = soup.findAll('link', rel='stylesheet')
     for u in urls:
         url = u['href']
         if url.endswith("font.css"):
             response = request.urlopen(url)
             rules, encoding = tinycss2.parse_stylesheet_bytes(
                 css_bytes=response.read(),
             )
             for rule in rules:
                 if rule.type == "at-rule":
                     for list in rule.content:
                         if list.type == "url":
                             if list.value.endswith("woff"):
                                 return list.value
Beispiel #18
0
def get_css_at_rules(css_url, at_class):
    """Get at-rules of type ``at_class`` from CSS ``css_url``.

    The CSS file is read by :py:func:`urllib.request.urlopen`.  If the URL
    points to the google fonts api, the CSS is read by
    :py:func:`.googlefont.read_google_font_css`.

    Both funtions return the byte stream from the URL, which is parsed by
    :py:func:`tinycss2.parse_stylesheet_bytes`.  The resulting CSS rules are
    filtered by ``at_class``.

    :type css_url:   str
    :param css_url:  URL of the CSS (stylesheet) file

    :type at_class:  css.AtRule
    :param at_class: class of the at-rule

    :rtype: [css.AtRule]
    :return: list of ``at_class`` objects

    """
    if is_google_font_url(css_url):
        css_bytes = read_google_font_css(css_url)
    else:
        with urlopen(css_url) as handle:
            css_bytes= handle.read()

    # parse css ...
    css_rules, _encoding = tinycss2.parse_stylesheet_bytes(css_bytes=css_bytes)

    # filter @font-face (at rules)
    font_face_rules =  [
        rule for rule in css_rules
        if (rule.type == 'at-rule' and rule.at_keyword == at_class.rule_name) ]

    # instances of class CSSRule
    css_rules = []
    for rule in font_face_rules:
        obj = at_class(css_url=css_url)
        css_rules.append(obj)
        obj.parse_css_rule(rule)

    log.debug("found %s at-rules", len(css_rules))
    return css_rules
    def _extractCss(self, css):
        # Parsing CSS is always a clusterfuck

        ss, coding = tinycss2.parse_stylesheet_bytes(css)
        ssf = [tmp.content for tmp in ss if tmp.type == "at-rule"]

        ssf = [
            isplit(tmp,
                   lambda x: x.type == "literal" and x.value.strip() == ";")
            for tmp in ssf
        ]
        fonts = {}
        for fontdef in ssf:
            name = None
            urls = []
            for subsection in [
                    tmp for tmp in fontdef
                    if len(tmp) and tmp[0].type == "ident"
            ]:
                if subsection[0].value == "font-family":
                    name = subsection[2].value
                if subsection[0].value == 'src':
                    for tmp in subsection:
                        # We want the woffs
                        if tmp.type == "url" and tmp.value.lower().endswith(
                                "woff"):
                            value = tmp.value
                            if "http://" in value or "https://" in value:
                                urls.append(value)
                            else:
                                urls.append(
                                    urllib.parse.urljoin(self.pageUrl, value))

                            self.log.info(
                                "Found font-family tag: '%s' -> '%s'", name,
                                value)

            if name and urls:
                fonts.setdefault(name, [])
                fonts[name].append(list(set(urls))[0])
        self.log.info("Found %s font-family tags!", len(fonts))
        return fonts
Beispiel #20
0
def get_urls_from_css_resource(bytes_text):
    # type: (bytes) -> List[Text]
    def is_import_node(n):
        return n.prelude and n.type == "at-rule" and n.lower_at_keyword == "import"

    try:
        rules, encoding = tinycss2.parse_stylesheet_bytes(css_bytes=bytes_text,
                                                          skip_comments=True,
                                                          skip_whitespace=True)
    except Exception:
        logger.error("Failed to read CSS string")
        return []
    urls = []
    for rule in rules:
        if is_import_node(rule):
            extracted = _url_from_tags(rule.prelude, ("url", "string"))
        elif rule.content:
            extracted = _url_from_tags(rule.content, ("url", ))
        else:
            continue
        urls.extend(list(extracted))
    return urls
Beispiel #21
0
 def __init__(self,
              guess=None,
              filename=None,
              url=None,
              file_obj=None,
              string=None,
              encoding=None,
              base_url=None,
              url_fetcher=default_url_fetcher,
              _check_mime_type=False,
              media_type='print',
              font_config=None):
     result = _select_source(guess,
                             filename,
                             url,
                             file_obj,
                             string,
                             tree=None,
                             base_url=base_url,
                             url_fetcher=url_fetcher,
                             check_css_mime_type=_check_mime_type)
     with result as (source_type, source, base_url, protocol_encoding):
         if source_type == 'string' and not isinstance(source, bytes):
             # unicode, no encoding
             stylesheet = tinycss2.parse_stylesheet(source)
         else:
             if source_type == 'file_obj':
                 source = source.read()
             stylesheet, encoding = tinycss2.parse_stylesheet_bytes(
                 source,
                 environment_encoding=encoding,
                 protocol_encoding=protocol_encoding)
     self.base_url = base_url
     self.rules = []
     # TODO: fonts are stored here and should be cleaned after rendering
     self.fonts = []
     preprocess_stylesheet(media_type, base_url, stylesheet, url_fetcher,
                           self.rules, self.fonts, font_config)
Beispiel #22
0
def CCS_Find_Resources(resource_text):

    urlDictionary = {}
    rawUrlDictionary = {}
    global pageURL
    #print ("CCS pageURL2 = " + pageURL)
    ##resource_resolver = deepcopy(markup_resolver)
    ##resource_resolver.resource_url = ccs_url

    ##resource_text = ResourceLoader.download(resource_resolver.resource_url)
    #response = urlopen(ccs_url)

   # print(response)
   # print(response.info())
   # print(response.info().get_content_type())
    #print(resource_text)
    rules, encoding = tinycss2.parse_stylesheet_bytes(css_bytes=str.encode(resource_text))

    for rule in rules:
        contents = '';
        if (isinstance(rule, tinycss2.ast.QualifiedRule) or isinstance(rule, tinycss2.ast.AtRule)):
            contents = rule.content
        if contents == None:
            continue

        for token in contents:
            if (isinstance(token, tinycss2.ast.URLToken)):
                url = token.value
                url = url.strip()
                if (len(url) == 0):
                    continue
                url = ResolveURL(token.value)
                if (url not in urlDictionary):
                    urlDictionary[url] = url
                    rawUrlDictionary[url] = token.value
                    #print(url)
    return (urlDictionary, rawUrlDictionary)
def CCS_Find_Resources(ccs_url):

    urlDictionary = {}
    response = urlopen(ccs_url)

    # print(response)
    # print(response.info())
    # print(response.info().get_content_type())

    rules, encoding = tinycss2.parse_stylesheet_bytes(
        css_bytes=response.read()  #,
        # Python 3.x
        #protocol_encoding=response.info().get_content_type().get_param('charset'),
        # Python 2.x
        #protocol_encoding=response.info().gettype().getparam('charset'),
    )

    for rule in rules:
        contents = ''
        if (isinstance(rule, tinycss2.ast.QualifiedRule)
                or isinstance(rule, tinycss2.ast.AtRule)):
            contents = rule.content

            for token in contents:
                if (isinstance(token, tinycss2.ast.URLToken)):
                    url = token.value
                    if (len(url) >= 2 and url[0] == '/' and url[1] == '/'):
                        temp = url[2:]
                        url = temp
                    elif (url.find(':') == -1):
                        temp = pageURL + url
                        url = temp

                    if (url not in urlDictionary):
                        urlDictionary[url] = url
                        print(url)
Beispiel #24
0
import tinycss2 as tinycss


# Create parser object. Can add extra features by overriding the class's methods...
with open('./inputs/test.css', 'r') as f:
	# Import test stylesheet
	rules, encoding = tinycss.parse_stylesheet_bytes( f.read(),
								skip_comments=True, skip_whitespace=True );

print(rules)
print(encoding)
Beispiel #25
0
def get_rules_from_str(input, codec='utf-8'):
    cssbyts = input.encode(codec)
    rules, codec = tycss.parse_stylesheet_bytes(cssbyts)
    rules = elel.filter(rules, lambda rule: (rule.type != 'whitespace'))
    return (rules)
Beispiel #26
0
def gen_dummy_atrule():
    input = '''@dummy (dummy:dummy) {dummy{}}'''
    cssbyts = input.encode('utf-8')
    rules, codec = tycss.parse_stylesheet_bytes(cssbyts)
    return (rules[0])