Ejemplo n.º 1
0
def test_escape_xml_without_bleach():
    fh = io.StringIO()
    w = writer.XMLWriter(fh)

    with pytest.raises(ValueError) as err:
        with w.xml_cleaning_method('bleach_clean'):
            pass
    assert 'bleach package is required when HTML escaping is disabled' in str(err.value)
Ejemplo n.º 2
0
def test_writer():
    fh = io.StringIO()
    w = writer.XMLWriter(fh)
    with w.tag("html"):
        with w.tag("body"):
            w.data("This is the content")
            w.comment("comment")

    value = ''.join(fh.getvalue().split())
    assert value == '<html><body>Thisisthecontent<!--comment--></body></html>'
Ejemplo n.º 3
0
def test_escape_xml_with_bleach():
    fh = io.StringIO()
    w = writer.XMLWriter(fh)

    # Turn off XML escaping, but still sanitize unsafe tags like <script>
    with w.xml_cleaning_method('bleach_clean'):
        w.start('td')
        w.data('<script>x</script> <em>OK</em>')
        w.end(indent=False)
    assert fh.getvalue() == '<td>&lt;script&gt;x&lt;/script&gt; <em>OK</em></td>\n'

    fh = io.StringIO()
    w = writer.XMLWriter(fh)

    # Default is True (all XML tags escaped)
    with w.xml_cleaning_method():
        w.start('td')
        w.data('<script>x</script> <em>OK</em>')
        w.end(indent=False)
    assert fh.getvalue() == '<td>&lt;script&gt;x&lt;/script&gt; &lt;em&gt;OK&lt;/em&gt;</td>\n'
Ejemplo n.º 4
0
    def write(self, table):
        """
        Return data in ``table`` converted to HTML as a list of strings.
        """
        cols = list(table.columns.values())

        self.data.header.cols = cols

        if isinstance(self.data.fill_values, tuple):
            self.data.fill_values = [self.data.fill_values]

        self.data._set_fill_values(cols)

        lines = []

        # Set HTML escaping to False for any column in the raw_html_cols input
        raw_html_cols = self.html.get('raw_html_cols', [])
        if isinstance(raw_html_cols, str):
            raw_html_cols = [raw_html_cols
                             ]  # Allow for a single string as input
        cols_escaped = [col.info.name not in raw_html_cols for col in cols]

        # Kwargs that get passed on to bleach.clean() if that is available.
        raw_html_clean_kwargs = self.html.get('raw_html_clean_kwargs', {})

        # Use XMLWriter to output HTML to lines
        w = writer.XMLWriter(ListWriter(lines))

        with w.tag('html'):
            with w.tag('head'):
                # Declare encoding and set CSS style for table
                with w.tag('meta', attrib={'charset': 'utf-8'}):
                    pass
                with w.tag('meta',
                           attrib={
                               'http-equiv': 'Content-type',
                               'content': 'text/html;charset=UTF-8'
                           }):
                    pass
                if 'css' in self.html:
                    with w.tag('style'):
                        w.data(self.html['css'])
                if 'cssfiles' in self.html:
                    for filename in self.html['cssfiles']:
                        with w.tag('link',
                                   rel="stylesheet",
                                   href=filename,
                                   type='text/css'):
                            pass
                if 'jsfiles' in self.html:
                    for filename in self.html['jsfiles']:
                        with w.tag('script', src=filename):
                            w.data(
                                ''
                            )  # need this instead of pass to get <script></script>
            with w.tag('body'):
                if 'js' in self.html:
                    with w.xml_cleaning_method('none'):
                        with w.tag('script'):
                            w.data(self.html['js'])
                if isinstance(self.html['table_id'], str):
                    html_table_id = self.html['table_id']
                else:
                    html_table_id = None
                if 'table_class' in self.html:
                    html_table_class = self.html['table_class']
                    attrib = {"class": html_table_class}
                else:
                    attrib = {}
                with w.tag('table', id=html_table_id, attrib=attrib):
                    with w.tag('thead'):
                        with w.tag('tr'):
                            for col in cols:
                                if len(col.shape
                                       ) > 1 and self.html['multicol']:
                                    # Set colspan attribute for multicolumns
                                    w.start('th', colspan=col.shape[1])
                                else:
                                    w.start('th')
                                w.data(col.info.name.strip())
                                w.end(indent=False)
                        col_str_iters = []
                        new_cols_escaped = []

                        # Make a container to hold any new_col objects created
                        # below for multicolumn elements.  This is purely to
                        # maintain a reference for these objects during
                        # subsequent iteration to format column values.  This
                        # requires that the weakref info._parent be maintained.
                        new_cols = []

                        for col, col_escaped in zip(cols, cols_escaped):
                            if len(col.shape) > 1 and self.html['multicol']:
                                span = col.shape[1]
                                for i in range(span):
                                    # Split up multicolumns into separate columns
                                    new_col = Column([el[i] for el in col])

                                    new_col_iter_str_vals = self.fill_values(
                                        col, new_col.info.iter_str_vals())
                                    col_str_iters.append(new_col_iter_str_vals)
                                    new_cols_escaped.append(col_escaped)
                                    new_cols.append(new_col)
                            else:

                                col_iter_str_vals = self.fill_values(
                                    col, col.info.iter_str_vals())
                                col_str_iters.append(col_iter_str_vals)

                                new_cols_escaped.append(col_escaped)

                    for row in zip(*col_str_iters):
                        with w.tag('tr'):
                            for el, col_escaped in zip(row, new_cols_escaped):
                                # Potentially disable HTML escaping for column
                                method = ('escape_xml'
                                          if col_escaped else 'bleach_clean')
                                with w.xml_cleaning_method(
                                        method, **raw_html_clean_kwargs):
                                    w.start('td')
                                    w.data(el.strip())
                                    w.end(indent=False)

        # Fixes XMLWriter's insertion of unwanted line breaks
        return [''.join(lines)]