def test_escape_xml_without_bleach(): fh = io.StringIO() w = writer.XMLWriter(fh) with pytest.raises(ValueError) as err: with w.xml_cleaning_method('bleach_clean'): pass assert 'bleach package is required when HTML escaping is disabled' in str(err.value)
def test_writer(): fh = io.StringIO() w = writer.XMLWriter(fh) with w.tag("html"): with w.tag("body"): w.data("This is the content") w.comment("comment") value = ''.join(fh.getvalue().split()) assert value == '<html><body>Thisisthecontent<!--comment--></body></html>'
def test_escape_xml_with_bleach(): fh = io.StringIO() w = writer.XMLWriter(fh) # Turn off XML escaping, but still sanitize unsafe tags like <script> with w.xml_cleaning_method('bleach_clean'): w.start('td') w.data('<script>x</script> <em>OK</em>') w.end(indent=False) assert fh.getvalue() == '<td><script>x</script> <em>OK</em></td>\n' fh = io.StringIO() w = writer.XMLWriter(fh) # Default is True (all XML tags escaped) with w.xml_cleaning_method(): w.start('td') w.data('<script>x</script> <em>OK</em>') w.end(indent=False) assert fh.getvalue() == '<td><script>x</script> <em>OK</em></td>\n'
def write(self, table): """ Return data in ``table`` converted to HTML as a list of strings. """ cols = list(table.columns.values()) self.data.header.cols = cols if isinstance(self.data.fill_values, tuple): self.data.fill_values = [self.data.fill_values] self.data._set_fill_values(cols) lines = [] # Set HTML escaping to False for any column in the raw_html_cols input raw_html_cols = self.html.get('raw_html_cols', []) if isinstance(raw_html_cols, str): raw_html_cols = [raw_html_cols ] # Allow for a single string as input cols_escaped = [col.info.name not in raw_html_cols for col in cols] # Kwargs that get passed on to bleach.clean() if that is available. raw_html_clean_kwargs = self.html.get('raw_html_clean_kwargs', {}) # Use XMLWriter to output HTML to lines w = writer.XMLWriter(ListWriter(lines)) with w.tag('html'): with w.tag('head'): # Declare encoding and set CSS style for table with w.tag('meta', attrib={'charset': 'utf-8'}): pass with w.tag('meta', attrib={ 'http-equiv': 'Content-type', 'content': 'text/html;charset=UTF-8' }): pass if 'css' in self.html: with w.tag('style'): w.data(self.html['css']) if 'cssfiles' in self.html: for filename in self.html['cssfiles']: with w.tag('link', rel="stylesheet", href=filename, type='text/css'): pass if 'jsfiles' in self.html: for filename in self.html['jsfiles']: with w.tag('script', src=filename): w.data( '' ) # need this instead of pass to get <script></script> with w.tag('body'): if 'js' in self.html: with w.xml_cleaning_method('none'): with w.tag('script'): w.data(self.html['js']) if isinstance(self.html['table_id'], str): html_table_id = self.html['table_id'] else: html_table_id = None if 'table_class' in self.html: html_table_class = self.html['table_class'] attrib = {"class": html_table_class} else: attrib = {} with w.tag('table', id=html_table_id, attrib=attrib): with w.tag('thead'): with w.tag('tr'): for col in cols: if len(col.shape ) > 1 and self.html['multicol']: # Set colspan attribute for multicolumns w.start('th', colspan=col.shape[1]) else: w.start('th') w.data(col.info.name.strip()) w.end(indent=False) col_str_iters = [] new_cols_escaped = [] # Make a container to hold any new_col objects created # below for multicolumn elements. This is purely to # maintain a reference for these objects during # subsequent iteration to format column values. This # requires that the weakref info._parent be maintained. new_cols = [] for col, col_escaped in zip(cols, cols_escaped): if len(col.shape) > 1 and self.html['multicol']: span = col.shape[1] for i in range(span): # Split up multicolumns into separate columns new_col = Column([el[i] for el in col]) new_col_iter_str_vals = self.fill_values( col, new_col.info.iter_str_vals()) col_str_iters.append(new_col_iter_str_vals) new_cols_escaped.append(col_escaped) new_cols.append(new_col) else: col_iter_str_vals = self.fill_values( col, col.info.iter_str_vals()) col_str_iters.append(col_iter_str_vals) new_cols_escaped.append(col_escaped) for row in zip(*col_str_iters): with w.tag('tr'): for el, col_escaped in zip(row, new_cols_escaped): # Potentially disable HTML escaping for column method = ('escape_xml' if col_escaped else 'bleach_clean') with w.xml_cleaning_method( method, **raw_html_clean_kwargs): w.start('td') w.data(el.strip()) w.end(indent=False) # Fixes XMLWriter's insertion of unwanted line breaks return [''.join(lines)]