def _load_css(user): """ Loads default.css and user.css style sheets, and merges them into a complete css like this: - if a css node exists in both, the one defined in user.css is taken into resulting style sheet - if a css node exists in only one of the sheets, it is taken into resulting style sheet """ css_default = None css_user = None try: with open(os.path.join("users", "default.css"), "rb") as css_file: css_default = css_file.read() except OSError: pass try: with open(os.path.join("users", user + ".css"), "rb") as css_file: css_user = css_file.read() except OSError: pass if not css_default and not css_user: return "" rules = dict() if css_default: rules_default, _ = tinycss2.parse_stylesheet_bytes( css_default, skip_whitespace=True, skip_comments=True) for rule in rules_default: if rule.type == "qualified-rule": rules[str(rule.prelude)] = rule if css_user: rules_user, _ = tinycss2.parse_stylesheet_bytes(css_user, skip_whitespace=True, skip_comments=True) for rule in rules_user: if rule.type == "qualified-rule": rules[str(rule.prelude)] = rule css = "" for rule in rules.values(): css += f"\n{rule.serialize()}" return css
def _process_nbconvert_css(self, css): print("Processing downloaded ipython notebook CSS.") try: css = css.split(IPYTHON_NOTEBOOK_DECLARE_STR.encode())[1] css = IPYTHON_NOTEBOOK_DECLARE_STR.encode() + css except IndexError: raise ValueError("Bad splitter for notebook css %s" % IPYTHON_NOTEBOOK_DECLARE_STR) print("Done.") if REPLACE_HIGHLIGHT_WITH_CODEHILITE: css = css.replace(HIGHLIGHT_CSS_CLASS.encode() + b" ", CODEHILITE_CSS_CLASS.encode() + b" ") import tinycss2 css_parsed, encoding = tinycss2.parse_stylesheet_bytes(css) for n in css_parsed: if isinstance(n, tinycss2.ast.QualifiedRule): n.prelude[0:0] = [ tinycss2.ast.LiteralToken(None, None, "."), tinycss2.ast.IdentToken(None, None, "relate-notebook-container"), tinycss2.ast.WhitespaceToken(None, None, " "), ] result = tinycss2.serialize(css_parsed).encode(encoding.name) return result
def __init__(self, guess=None, filename=None, url=None, file_obj=None, string=None, encoding=None, base_url=None, url_fetcher=default_url_fetcher, _check_mime_type=False, media_type='print', font_config=None, matcher=None, page_rules=None): PROGRESS_LOGGER.info( 'Step 2 - Fetching and parsing CSS - %s', filename or url or getattr(file_obj, 'name', 'CSS string')) result = _select_source( guess, filename, url, file_obj, string, base_url=base_url, url_fetcher=url_fetcher, check_css_mime_type=_check_mime_type) with result as (source_type, source, base_url, protocol_encoding): if source_type == 'string' and not isinstance(source, bytes): # unicode, no encoding stylesheet = tinycss2.parse_stylesheet(source) else: if source_type == 'file_obj': source = source.read() stylesheet, encoding = tinycss2.parse_stylesheet_bytes( source, environment_encoding=encoding, protocol_encoding=protocol_encoding) self.base_url = base_url self.matcher = matcher or cssselect2.Matcher() self.page_rules = [] if page_rules is None else page_rules # TODO: fonts are stored here and should be cleaned after rendering self.fonts = [] preprocess_stylesheet( media_type, base_url, stylesheet, url_fetcher, self.matcher, self.page_rules, self.fonts, font_config)
def get_urls_from_css_resource(bytes_text): # type: (bytes) -> List[Text] def is_import_node(n): return n.type == "at-rule" and n.lower_at_keyword == "import" def is_font_node(n): return n.type == "at-rule" and n.lower_at_keyword == "font-face" try: rules, encoding = tinycss2.parse_stylesheet_bytes(css_bytes=bytes_text, skip_comments=True, skip_whitespace=True) except Exception: logger.error("Failed to read CSS string") return [] urls = [] for rule in rules: tags = rule.content if is_import_node(rule): logger.debug("The node has @import") tags = rule.prelude if is_font_node(rule): logger.debug("The node has @font-face") tags = rule.content if tags: urls.extend(list(_url_from_tags(tags))) return urls
def get_dummy_sel_rule(sel): ''' ''' input = sel + '{}' cssbyts = input.encode('utf-8') rules, codec = tycss.parse_stylesheet_bytes(cssbyts) return (rules[0])
def _process_nbconvert_css(self, css): print("Processing downloaded ipython notebook CSS.") try: css = css.split(IPYTHON_NOTEBOOK_DECLARE_STR.encode())[1] css = IPYTHON_NOTEBOOK_DECLARE_STR.encode() + css except IndexError: raise ValueError("Bad splitter for notebook css %s" % IPYTHON_NOTEBOOK_DECLARE_STR) print("Done.") if REPLACE_HIGHLIGHT_WITH_CODEHILITE: css = css.replace(HIGHLIGHT_CSS_CLASS.encode() + b" ", CODEHILITE_CSS_CLASS.encode() + b" ") import tinycss2 css_parsed, encoding = tinycss2.parse_stylesheet_bytes(css) for n in css_parsed: if isinstance(n, tinycss2.ast.QualifiedRule): n.prelude[0:0] = [ tinycss2.ast.LiteralToken(None, None, "."), tinycss2.ast.IdentToken( None, None, "relate-notebook-container"), tinycss2.ast.WhitespaceToken(None, None, " "), ] result = tinycss2.serialize(css_parsed).encode(encoding.name) return result
def test_stylesheet_bytes(kwargs): kwargs['css_bytes'] = kwargs['css_bytes'].encode('latin1') kwargs.pop('comment', None) if kwargs.get('environment_encoding'): kwargs['environment_encoding'] = lookup(kwargs['environment_encoding']) kwargs.update(SKIP) return parse_stylesheet_bytes(**kwargs)
def __init__(self, guess=None, filename=None, url=None, file_obj=None, string=None, encoding=None, base_url=None, url_fetcher=default_url_fetcher, _check_mime_type=False, media_type='print', font_config=None, counter_style=None, matcher=None, page_rules=None): PROGRESS_LOGGER.info( 'Step 2 - Fetching and parsing CSS - %s', filename or url or getattr(file_obj, 'name', 'CSS string')) result = _select_source( guess, filename, url, file_obj, string, base_url=base_url, url_fetcher=url_fetcher, check_css_mime_type=_check_mime_type) with result as (source_type, source, base_url, protocol_encoding): if source_type == 'string' and not isinstance(source, bytes): # unicode, no encoding stylesheet = tinycss2.parse_stylesheet(source) else: if source_type == 'file_obj': source = source.read() stylesheet, encoding = tinycss2.parse_stylesheet_bytes( source, environment_encoding=encoding, protocol_encoding=protocol_encoding) self.base_url = base_url self.matcher = matcher or cssselect2.Matcher() self.page_rules = [] if page_rules is None else page_rules self.fonts = [] preprocess_stylesheet( media_type, base_url, stylesheet, url_fetcher, self.matcher, self.page_rules, self.fonts, font_config, counter_style)
def get_urls_for_retrieval_from_css(self, data): urls = [] css_rules, css_encoding = tinycss2.parse_stylesheet_bytes( data, skip_comments=True, skip_whitespace=True) for rule in css_rules: urls = urls + self.check_css_for_urls(rule.prelude) urls = urls + self.check_css_for_urls(rule.content) return urls
def parse(cls, url, owner_node=None, parent_style_sheet=None, parent_rule=None, encoding=None): """Parses the CSS style sheet. Arguments: url (str): The location of the style sheet. owner_node (Element, optional): The owner node of the style sheet. parent_style_sheet (CSSStyleSheet, optional): The parent CSS style sheet. parent_rule (CSSRule, optional): The parent CSS rule. encoding (str, optional): An advisory character encoding for the referenced style sheet. Returns: CSSStyleSheet: A new CSSStyleSheet object. """ extra = dict({ 'type_': None, 'href': None, 'owner_node': owner_node, 'parent_style_sheet': parent_style_sheet, 'title': None, 'media': None, }) if owner_node is not None: extra.update({ 'type_': owner_node.get('type'), 'href': owner_node.get('href'), 'title': owner_node.get('title'), 'media': owner_node.get('media'), }) css_style_sheet = CSSStyleSheet(owner_rule=parent_rule, **extra) logger = getLogger('{}.{}'.format(__name__, cls.__name__)) try: logger.debug('urlopen \'{}\''.format(url)) data, headers = load(url) if encoding is None: content_type = get_content_type(headers) if content_type is None: encoding = 'utf-8' else: encoding = content_type.get('charset', 'utf-8') rules, encoding = tinycss2.parse_stylesheet_bytes( css_bytes=data, protocol_encoding=encoding, skip_comments=True, skip_whitespace=True) css_rules = CSSParser.parse_rules( rules, parent_style_sheet=css_style_sheet, parent_rule=parent_rule) css_style_sheet.css_rules.extend(css_rules) except URLError as exp: logger.info('failed to parse: \'{}\': {}'.format(url, repr(exp))) return css_style_sheet
def __create_tynicss_stylesheet(cls, data): if hasattr(data, "read"): # is file like object css_parser, _ = tinycss2.parse_stylesheet_bytes( data.read(), skip_comments=True, skip_whitespace=True) else: css_parser = tinycss2.parse_stylesheet(data, skip_comments=True, skip_whitespace=True) return css_parser
def css(self): pq = PyQuery(self.tender_src) for style in pq('link[rel="stylesheet"]'): href = style.get('href') if href and href.startswith('/') and not href.startswith('//'): resp = self.client.get(href) if resp.status_code == 200: css = resp.content self.csses.append(tinycss2.parse_stylesheet_bytes(css, skip_comments=True))
def parse_css_file(fname): with open(fname, 'r') as f: content = f.read() f.close() content = content.encode('utf-8') rules, encoding = parse_stylesheet_bytes(css_bytes=content, protocol_encoding='utf-8', environment_encoding='utf-8', skip_comments=True, skip_whitespace=True) return rules
def extract_css_classes_definitions(css_file): with open(css_file, 'rb') as open_file: rules, _ = parse_stylesheet_bytes(open_file.read()) next_is_class_name = False while rules: rule = rules.pop(0) if rule.type == 'at-rule' and rule.content: rules.extend(rule.content) elif rule.type == 'qualified-rule': rules.extend(rule.prelude) elif rule.type == 'ident' and next_is_class_name: yield rule.value next_is_class_name = (rule.type == 'literal' and rule.value == '.')
def fromfile(filename, multiprop=False): """Function for parsing the CSS file :param filename: string name of file to parse :param multiprops: Argument as in CSSFile, default False :returns: a CSSFile representation of the CSS file """ bytes = open(filename).read() stylesheet, enc = tinycss2.parse_stylesheet_bytes(bytes, skip_whitespace=True, skip_comments=True) return CSSFile(stylesheet, multiprop)
def get_font_file(self, html): soup = BeautifulSoup(html, 'html.parser') urls = soup.findAll('link', rel='stylesheet') for u in urls: url = u['href'] if url.endswith("font.css"): response = request.urlopen(url) rules, encoding = tinycss2.parse_stylesheet_bytes( css_bytes=response.read(), ) for rule in rules: if rule.type == "at-rule": for list in rule.content: if list.type == "url": if list.value.endswith("woff"): return list.value
def get_css_at_rules(css_url, at_class): """Get at-rules of type ``at_class`` from CSS ``css_url``. The CSS file is read by :py:func:`urllib.request.urlopen`. If the URL points to the google fonts api, the CSS is read by :py:func:`.googlefont.read_google_font_css`. Both funtions return the byte stream from the URL, which is parsed by :py:func:`tinycss2.parse_stylesheet_bytes`. The resulting CSS rules are filtered by ``at_class``. :type css_url: str :param css_url: URL of the CSS (stylesheet) file :type at_class: css.AtRule :param at_class: class of the at-rule :rtype: [css.AtRule] :return: list of ``at_class`` objects """ if is_google_font_url(css_url): css_bytes = read_google_font_css(css_url) else: with urlopen(css_url) as handle: css_bytes= handle.read() # parse css ... css_rules, _encoding = tinycss2.parse_stylesheet_bytes(css_bytes=css_bytes) # filter @font-face (at rules) font_face_rules = [ rule for rule in css_rules if (rule.type == 'at-rule' and rule.at_keyword == at_class.rule_name) ] # instances of class CSSRule css_rules = [] for rule in font_face_rules: obj = at_class(css_url=css_url) css_rules.append(obj) obj.parse_css_rule(rule) log.debug("found %s at-rules", len(css_rules)) return css_rules
def _extractCss(self, css): # Parsing CSS is always a clusterfuck ss, coding = tinycss2.parse_stylesheet_bytes(css) ssf = [tmp.content for tmp in ss if tmp.type == "at-rule"] ssf = [ isplit(tmp, lambda x: x.type == "literal" and x.value.strip() == ";") for tmp in ssf ] fonts = {} for fontdef in ssf: name = None urls = [] for subsection in [ tmp for tmp in fontdef if len(tmp) and tmp[0].type == "ident" ]: if subsection[0].value == "font-family": name = subsection[2].value if subsection[0].value == 'src': for tmp in subsection: # We want the woffs if tmp.type == "url" and tmp.value.lower().endswith( "woff"): value = tmp.value if "http://" in value or "https://" in value: urls.append(value) else: urls.append( urllib.parse.urljoin(self.pageUrl, value)) self.log.info( "Found font-family tag: '%s' -> '%s'", name, value) if name and urls: fonts.setdefault(name, []) fonts[name].append(list(set(urls))[0]) self.log.info("Found %s font-family tags!", len(fonts)) return fonts
def get_urls_from_css_resource(bytes_text): # type: (bytes) -> List[Text] def is_import_node(n): return n.prelude and n.type == "at-rule" and n.lower_at_keyword == "import" try: rules, encoding = tinycss2.parse_stylesheet_bytes(css_bytes=bytes_text, skip_comments=True, skip_whitespace=True) except Exception: logger.error("Failed to read CSS string") return [] urls = [] for rule in rules: if is_import_node(rule): extracted = _url_from_tags(rule.prelude, ("url", "string")) elif rule.content: extracted = _url_from_tags(rule.content, ("url", )) else: continue urls.extend(list(extracted)) return urls
def __init__(self, guess=None, filename=None, url=None, file_obj=None, string=None, encoding=None, base_url=None, url_fetcher=default_url_fetcher, _check_mime_type=False, media_type='print', font_config=None): result = _select_source(guess, filename, url, file_obj, string, tree=None, base_url=base_url, url_fetcher=url_fetcher, check_css_mime_type=_check_mime_type) with result as (source_type, source, base_url, protocol_encoding): if source_type == 'string' and not isinstance(source, bytes): # unicode, no encoding stylesheet = tinycss2.parse_stylesheet(source) else: if source_type == 'file_obj': source = source.read() stylesheet, encoding = tinycss2.parse_stylesheet_bytes( source, environment_encoding=encoding, protocol_encoding=protocol_encoding) self.base_url = base_url self.rules = [] # TODO: fonts are stored here and should be cleaned after rendering self.fonts = [] preprocess_stylesheet(media_type, base_url, stylesheet, url_fetcher, self.rules, self.fonts, font_config)
def CCS_Find_Resources(resource_text): urlDictionary = {} rawUrlDictionary = {} global pageURL #print ("CCS pageURL2 = " + pageURL) ##resource_resolver = deepcopy(markup_resolver) ##resource_resolver.resource_url = ccs_url ##resource_text = ResourceLoader.download(resource_resolver.resource_url) #response = urlopen(ccs_url) # print(response) # print(response.info()) # print(response.info().get_content_type()) #print(resource_text) rules, encoding = tinycss2.parse_stylesheet_bytes(css_bytes=str.encode(resource_text)) for rule in rules: contents = ''; if (isinstance(rule, tinycss2.ast.QualifiedRule) or isinstance(rule, tinycss2.ast.AtRule)): contents = rule.content if contents == None: continue for token in contents: if (isinstance(token, tinycss2.ast.URLToken)): url = token.value url = url.strip() if (len(url) == 0): continue url = ResolveURL(token.value) if (url not in urlDictionary): urlDictionary[url] = url rawUrlDictionary[url] = token.value #print(url) return (urlDictionary, rawUrlDictionary)
def CCS_Find_Resources(ccs_url): urlDictionary = {} response = urlopen(ccs_url) # print(response) # print(response.info()) # print(response.info().get_content_type()) rules, encoding = tinycss2.parse_stylesheet_bytes( css_bytes=response.read() #, # Python 3.x #protocol_encoding=response.info().get_content_type().get_param('charset'), # Python 2.x #protocol_encoding=response.info().gettype().getparam('charset'), ) for rule in rules: contents = '' if (isinstance(rule, tinycss2.ast.QualifiedRule) or isinstance(rule, tinycss2.ast.AtRule)): contents = rule.content for token in contents: if (isinstance(token, tinycss2.ast.URLToken)): url = token.value if (len(url) >= 2 and url[0] == '/' and url[1] == '/'): temp = url[2:] url = temp elif (url.find(':') == -1): temp = pageURL + url url = temp if (url not in urlDictionary): urlDictionary[url] = url print(url)
import tinycss2 as tinycss # Create parser object. Can add extra features by overriding the class's methods... with open('./inputs/test.css', 'r') as f: # Import test stylesheet rules, encoding = tinycss.parse_stylesheet_bytes( f.read(), skip_comments=True, skip_whitespace=True ); print(rules) print(encoding)
def get_rules_from_str(input, codec='utf-8'): cssbyts = input.encode(codec) rules, codec = tycss.parse_stylesheet_bytes(cssbyts) rules = elel.filter(rules, lambda rule: (rule.type != 'whitespace')) return (rules)
def gen_dummy_atrule(): input = '''@dummy (dummy:dummy) {dummy{}}''' cssbyts = input.encode('utf-8') rules, codec = tycss.parse_stylesheet_bytes(cssbyts) return (rules[0])