def test_content_preserved(): output = escape_cdata(docs[0]) assert "Success!" in output assert "1 > 0 && 2 < 3" not in output assert "1 __GT__ 0 __AMP____AMP__ 2 __LT__ 3" in output output = escape_cdata(docs[1]) assert "Success!" in output assert "A second success!" in output
def test_lxml_output(): output = escape_cdata(docs[0]) output = tostring(fromstring(output), method="xml") assert ">" not in output assert "<" not in output assert "&" not in output output = escape_cdata(docs[1]) output = tostring(fromstring(output), method="xml") assert ">" not in output assert "<" not in output assert "&" not in output output = escape_cdata(docs[2]) output = tostring(fromstring(output), method="xml") assert output.count(">") == 1 assert output.count("<") == 1 assert output.count("&") == 2
def get_theme_doc(self, resp, url, should_escape_cdata=False, should_fix_meta_charset_position=False): body = resp.unicode_body if should_escape_cdata: body = escape_cdata(body) if should_fix_meta_charset_position: body = fix_meta_charset_position(body) doc = self.parse_document(body, url) self.make_links_absolute(doc) return doc
def test_no_escape_outside_cdata(): output = escape_cdata(docs[2]) assert " > " in output assert " && " in output assert " < " in output
def test_symmetry(): for doc in docs: assert_equals(unescape_cdata(escape_cdata(doc)), doc)
def apply_rules(self, req, resp, resource_fetcher, log, default_theme=None): """ Apply the whatever the appropriate rules are to the request/response. """ extra_headers = parse_meta_headers(resp.body) if extra_headers: response_headers = HeaderDict(resp.headerlist + extra_headers) else: response_headers = resp.headers try: classes = run_matches(self.matchers, req, resp, response_headers, log) except AbortTheme: return resp if 'X-Deliverance-Page-Class' in response_headers: log.debug(self, "Found page class %s in headers", response_headers['X-Deliverance-Page-Class'].strip()) classes.extend(response_headers['X-Deliverance-Page-Class'].strip().split()) if 'deliverance.page_classes' in req.environ: log.debug(self, "Found page class in WSGI environ: %s", ' '.join(req.environ["deliverance.page_classes"])) classes.extend(req.environ['deliverance.page_classes']) if not classes: classes = ['default'] rules = [] theme = None for class_name in classes: ## FIXME: handle case of unknown classes ## Or do that during compilation? for rule in self.rules_by_class.get(class_name, []): if rule not in rules: rules.append(rule) if rule.theme: theme = rule.theme if theme is None: theme = self.default_theme if theme is None and default_theme is not None: theme = Theme(href=default_theme, source_location=self.source_location) if theme is None: log.error(self, "No theme has been defined for the request") return resp try: theme_href = theme.resolve_href(req, resp, log) original_theme_resp = self.get_theme_response( theme_href, resource_fetcher, log) theme_doc = self.get_theme_doc( original_theme_resp, theme_href, should_escape_cdata=True, should_fix_meta_charset_position=True) resp = force_charset(resp) body = resp.unicode_body body = escape_cdata(body) body = fix_meta_charset_position(body) content_doc = self.parse_document(body, req.url) run_standard = True for rule in rules: if rule.match is not None: matches = rule.match(req, resp, response_headers, log) if not matches: log.debug(rule, "Skipping <rule>") continue rule.apply(content_doc, theme_doc, resource_fetcher, log) if rule.suppress_standard: run_standard = False if run_standard: ## FIXME: should it be possible to put the standard rule in the ruleset? standard_rule.apply(content_doc, theme_doc, resource_fetcher, log) except AbortTheme: return resp remove_content_attribs(theme_doc) ## FIXME: handle caching? if original_theme_resp.body.strip().startswith("<!DOCTYPE"): tree = theme_doc.getroottree() else: tree = content_doc.getroottree() if "XHTML" in tree.docinfo.doctype: method = "xml" else: method = "html" theme_str = tostring(theme_doc, include_meta_content_type=True) theme_str = tree.docinfo.doctype + theme_str theme_doc = document_fromstring(theme_str) tree = theme_doc.getroottree() resp.body = tostring(tree, method=method, include_meta_content_type=True) resp.body = unescape_cdata(resp.body) return resp
def apply_rules(self, req, resp, resource_fetcher, log, default_theme=None): """ Apply the whatever the appropriate rules are to the request/response. """ extra_headers = parse_meta_headers(resp.body) if extra_headers: response_headers = ResponseHeaders(resp.headerlist + extra_headers) else: response_headers = resp.headers try: classes = run_matches(self.matchers, req, resp, response_headers, log) except AbortTheme: return resp if 'X-Deliverance-Page-Class' in response_headers: log.debug(self, "Found page class %s in headers", response_headers['X-Deliverance-Page-Class'].strip()) classes.extend(response_headers['X-Deliverance-Page-Class'].strip().split()) if 'deliverance.page_classes' in req.environ: log.debug(self, "Found page class in WSGI environ: %s", ' '.join(req.environ["deliverance.page_classes"])) classes.extend(req.environ['deliverance.page_classes']) if not classes: classes = ['default'] rules = [] theme = None for class_name in classes: ## FIXME: handle case of unknown classes ## Or do that during compilation? for rule in self.rules_by_class.get(class_name, []): if rule not in rules: rules.append(rule) if rule.theme: theme = rule.theme if theme is None: theme = self.default_theme if theme is None and default_theme is not None: theme = Theme(href=default_theme, source_location=self.source_location) if theme is None: log.error(self, "No theme has been defined for the request") return resp try: theme_href = theme.resolve_href(req, resp, log) original_theme_resp = self.get_theme_response( theme_href, resource_fetcher, log) theme_doc = self.get_theme_doc( original_theme_resp, theme_href, should_escape_cdata=True, should_fix_meta_charset_position=True) resp = force_charset(resp) body = resp.unicode_body body = escape_cdata(body) body = fix_meta_charset_position(body) content_doc = self.parse_document(body, req.url) run_standard = True for rule in rules: if rule.match is not None: matches = rule.match(req, resp, response_headers, log) if not matches: log.debug(rule, "Skipping <rule>") continue rule.apply(content_doc, theme_doc, resource_fetcher, log) if rule.suppress_standard: run_standard = False if run_standard: ## FIXME: should it be possible to put the standard rule in the ruleset? standard_rule.apply(content_doc, theme_doc, resource_fetcher, log) except AbortTheme: return resp remove_content_attribs(theme_doc) ## FIXME: handle caching? if original_theme_resp.body.strip().startswith("<!DOCTYPE"): tree = theme_doc.getroottree() else: tree = content_doc.getroottree() if "XHTML" in tree.docinfo.doctype: method = "xml" else: method = "html" theme_str = tostring(theme_doc, include_meta_content_type=True) theme_str = tree.docinfo.doctype + theme_str theme_doc = document_fromstring(theme_str) tree = theme_doc.getroottree() resp.body = tostring(tree, method=method, include_meta_content_type=True) resp.body = unescape_cdata(resp.body) return resp
def apply(self, content_doc, theme_doc, resource_fetcher, log): """ Applies this action to the theme_doc. """ if self.content_href: ## FIXME: Is this a weird way to resolve the href? href = urlparse.urljoin(log.request.url, self.content_href) content_resp = resource_fetcher(href) log.debug( self, 'Fetching resource from href="%s": %s', href, content_resp.status) if content_resp.status_int != 200: log.warn( self, 'Resource %s returned the status %s; skipping rule', href, content_resp.status) return body = content_resp.body body = escape_cdata(body) body = fix_meta_charset_position(body) content_doc = document_fromstring( body, base_url=self.content_href) if not self.if_content_matches(content_doc, log): return content_type, content_els, content_attributes = self.select_elements( self.content, content_doc, theme=False) if not content_els: if self.nocontent == 'abort': log.debug( self, 'aborting theming because no content matches rule content="%s"', self.content) raise AbortTheme('No content matches content="%s"' % self.content) elif self.nocontent == 'ignore': log_meth = log.debug else: log_meth = log.warn log_meth( self, 'skipping rule because no content matches rule content="%s"', self.content) return theme_type, theme_els, theme_attributes = self.select_elements( self.theme, theme_doc, theme=True) attributes = self.join_attributes(content_attributes, theme_attributes) if not theme_els: if self.notheme == 'abort': log.debug( self, 'aborting theming because no theme elements match rule theme="%s"', self.theme) raise AbortTheme('No theme element matches theme="%s"' % self.theme) elif self.notheme == 'ignore': log_meth = log.debug else: log_meth = log.warn log_meth( self, 'skipping rule because no theme element matches rule theme="%s"', self.theme) return if len(theme_els) > 1: if self.manytheme[0] == 'abort': log.debug( self, 'aborting theming because %i elements (%s) match theme="%s"', len(theme_els), self.format_tags(theme_els, include_name=False), self.theme) raise AbortTheme('Many elements match theme="%s"' % self.theme) elif self.manytheme[0] == 'warn': log_meth = log.warn else: log_meth = log.debug if self.manytheme[1] == 'first': theme_el = theme_els[0] else: theme_el = theme_els[-1] log_meth( self, '%s elements match theme="%s", using the %s match', len(theme_els), self.theme, self.manytheme[1]) else: theme_el = theme_els[0] if not self.move and theme_type in ('children', 'elements'): content_els = copy.deepcopy(content_els) mark_content_els(content_els) self.apply_transformation(content_type, content_els, attributes, theme_type, theme_el, log)