def test_clean_tag_node_with_data_linktype(self):
     soup = BeautifulSoup(
         '<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>',
         'html5lib')
     tag = soup.a
     DbWhitelister.clean_tag_node(soup, tag)
     self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>')
Beispiel #2
0
def editor_js():
    js_files = [
        'waf/purify.1.0.2.min.js',
        'waf/rangy-core.1.3.0.js',
        'waf/rangy-selectionsaverestore.1.3.0.js',
        'waf/hallo-dompurify.js',
    ]
    js_includes = format_html_join('\n', '<script src="{0}{1}"></script>',
                                   ((settings.STATIC_URL, filename)
                                    for filename in js_files))

    # fake a clean to ensure construct_whitelister_element_rules hooks are hooked
    DbWhitelister.clean('')
    allowed_tags = ['#text'] + [
        key for key in DbWhitelister.element_rules if re.match('[a-z]+', key)
    ]

    # DOMpurify will be more forgiving than DbWhitelister as it's not filtering attributes
    # but it's not practical to extrapolate from DbWhitelister which ones to retain
    return js_includes + format_html(
        """
            <script>
                (function() {{
                    var config = {{
                        ALLOWED_TAGS: [{allowed_tags}],
                        KEEP_CONTENT: true
                    }};
                    registerHalloPlugin('dompurify', config);
                }})();
            </script>
        """,
        allowed_tags=format_html_join(', ', "'{}'",
                                      ((tag, ) for tag in allowed_tags)),
    )
Beispiel #3
0
 def test_clean_tag_node_with_data_linktype(self):
     soup = BeautifulSoup(
         '<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>'
     )
     tag = soup.a
     DbWhitelister.clean_tag_node(soup, tag)
     self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>')
Beispiel #4
0
 def test_clean_tag_node(self):
     soup = BeautifulSoup(
         '<a irrelevant="baz">foo</a>'
     )
     tag = soup.a
     DbWhitelister.clean_tag_node(soup, tag)
     self.assertEqual(str(tag), '<a>foo</a>')
Beispiel #5
0
 def test_clean_tag_node_div(self):
     soup = BeautifulSoup(
         '<div>foo</div>'
     )
     tag = soup.div
     self.assertEqual(tag.name, 'div')
     DbWhitelister.clean_tag_node(soup, tag)
     self.assertEqual(tag.name, 'p')
Beispiel #6
0
 def test_clean_tag_node_with_data_embedtype(self):
     soup = BeautifulSoup(
         '<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>'
     )
     tag = soup.p
     DbWhitelister.clean_tag_node(soup, tag)
     self.assertEqual(str(tag),
                      '<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>')
Beispiel #7
0
 def test_clean_tag_node_with_data_embedtype(self):
     soup = BeautifulSoup(
         '<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>'
     )
     tag = soup.p
     DbWhitelister.clean_tag_node(soup, tag)
     self.assertEqual(
         str(tag),
         '<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>')
    def test_whitelist_hooks(self):
        # wagtail.tests.wagtail_hooks overrides the whitelist to permit <blockquote> and <a target="...">
        input_html = (
            '<blockquote>I would put a tax on all people who'
            ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"'
            ' target="_blank" tea="darjeeling">'
            'stand in water</a>.</blockquote><p>- <character>Gumby</character></p>'
        )
        output_html = DbWhitelister.clean(input_html)
        expected = (
            '<blockquote>I would put a tax on all people who'
            ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"'
            ' target="_blank">stand in water</a>.</blockquote><p>- Gumby</p>'
        )
        self.assertHtmlEqual(expected, output_html)

        # check that the base Whitelister class is unaffected by these custom whitelist rules
        input_html = (
            '<blockquote>I would put a tax on all people who'
            ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank"'
            ' tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character></p>'
        )
        output_html = Whitelister.clean(input_html)
        expected = (
            'I would put a tax on all people who'
            ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1">'
            'stand in water</a>.<p>- Gumby</p>'
        )
        self.assertHtmlEqual(expected, output_html)
Beispiel #9
0
 def test_document_link_is_rewritten(self):
     input_html = (
         '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">'
         'horribly oversized brochure</a></p>')
     output_html = DbWhitelister.clean(input_html)
     expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>'
     self.assertHtmlEqual(expected, output_html)
Beispiel #10
0
    def test_whitelist_hooks(self):
        """Test that DbWhitelister does not strip new elements and attributes.

        The new allowed elements and attributes are added in v1.wagtail_hooks.
        """
        input_html = '''
<span class="schema-container"
      itemprop="step"
      itemscope=""
      itemtype="http://schema.org/HowToSection">
    <h4 itemprop="name">Step 1: Learn about the debt</h4>
    <span class="schema-container" itemprop="itemListElement">
        <table>
            <thead>
                <tr>
                    <th>Col 1 header</th>
                    <th>Col 2 header</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>Row 1 Col 1</td>
                    <td>Row 1 Col 2</td>
                </tr>
                <tr>
                    <td>Row 2 Col 1</td>
                    <td>Row 2 Col 2</td>
                </tr>
            </tbody>
        </table>
    </span>
</span>
        '''
        output_html = DbWhitelister.clean(input_html)
        self.assertHTMLEqual(input_html, output_html)
Beispiel #11
0
 def test_document_link_is_rewritten(self):
     input_html = (
         '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">'
         'horribly oversized brochure</a></p>'
     )
     output_html = DbWhitelister.clean(input_html)
     expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>'
     self.assertHtmlEqual(expected, output_html)
Beispiel #12
0
 def value_from_datadict(self, data, files, name):
     original_value = super(FroalaRichTextArea, self).value_from_datadict(data, files, name)
     if original_value is None:
         return None
     if WAGTAIL_VERSION >= "2.0":
         return self.converter.to_database_format(original_value)
     else:
         return DbWhitelister.clean(original_value)
Beispiel #13
0
 def test_page_link_is_rewritten(self):
     input_html = (
         '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>'
         ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>')
     output_html = DbWhitelister.clean(input_html)
     expected = (
         '<p>Look at the <a linktype="page" id="2">lovely homepage</a>'
         ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>')
     self.assertHtmlEqual(expected, output_html)
 def value_from_datadict(self, data, files, name):
     original_value = super().value_from_datadict(data, files, name)
     # print("\n ------=========----- \n")
     if original_value is None:
         return None
     if wagtail_version == 2:
         return self.converter.to_database_format(original_value)
     else:
         return DbWhitelister.clean(original_value)
Beispiel #15
0
 def test_image_embed_is_rewritten(self):
     input_html = (
         '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"'
         ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">'
         '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />'
         '<figcaption>A kitten, yesterday.</figcaption></figure>')
     output_html = DbWhitelister.clean(input_html)
     expected = (
         '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"'
         ' format="image-with-caption" alt="A cute kitten" />')
     self.assertHtmlEqual(expected, output_html)
Beispiel #16
0
 def test_page_link_is_rewritten(self):
     input_html = (
         '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>'
         ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>'
     )
     output_html = DbWhitelister.clean(input_html)
     expected = (
         '<p>Look at the <a linktype="page" id="2">lovely homepage</a>'
         ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>'
     )
     self.assertHtmlEqual(expected, output_html)
 def test_media_embed_is_rewritten(self):
     input_html = (
         '<p>OMG look at this video of a kitten: '
         '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" width="640"'
         ' height="480" src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>'
     )
     output_html = DbWhitelister.clean(input_html)
     expected = (
         '<p>OMG look at this video of a kitten:'
         ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p>'
     )
     self.assertHtmlEqual(expected, output_html)
    def value_from_datadict(self, data, files, name):  # #W7MnV#
        """
        This method is called by Wagtail when the page is saved.
        Cf https://github.com/torchbox/wagtail/blob/master/wagtail/wagtailadmin/rich_text.py#L29
        """
        original_value = super(MediumRichTextArea,
                               self).value_from_datadict(data, files, name)

        if original_value is None:
            return None

        return DbWhitelister.clean(original_value)
Beispiel #19
0
 def test_image_embed_is_rewritten(self):
     input_html = (
         '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"'
         ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">'
         '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />'
         '<figcaption>A kitten, yesterday.</figcaption></figure>'
     )
     output_html = DbWhitelister.clean(input_html)
     expected = (
         '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"'
         ' format="image-with-caption" alt="A cute kitten" />'
     )
     self.assertHtmlEqual(expected, output_html)
 def test_div_conversion(self):
     # DIVs should be converted to P, and all whitelist / conversion rules still applied
     input_html = (
         '<p>before</p><div class="shiny">OMG <b>look</b> at this <blink>video</blink> of a kitten: '
         '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ"'
         ' width="640" height="480"'
         ' src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></div><p>after</p>'
     )
     output_html = DbWhitelister.clean(input_html)
     expected = (
         '<p>before</p><p>OMG <b>look</b> at this video of a kitten:'
         ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p><p>after</p>'
     )
     self.assertHtmlEqual(expected, output_html)
Beispiel #21
0
    def __update_page(self, file_id, title, date):
        """
        Update page with the given title, date and body.
        """

        # Retrieve file contents.
        body = self.__get_resource(file_id)

        # Get the page class.
        page_class = self.page_class

        # Get the parent page.
        parent_page = self.parent_page

        # Determine whether to add new page or update existing one.
        existing = SyncMeta.get_value('file:' + file_id)
        if not (existing is False):
            page = page_class.objects.get(pk=existing)
            page.date = date
            page.body = DbWhitelister.clean(body)
            page.save()
            return

        # Create an empty page.
        page = page_class()

        # Get the form class.
        form_class = page_class.get_edit_handler().get_form_class(page_class)

        # Instantiate a form class.
        form = form_class(data={
            'title': title,
            'date': date,
            'body': body,
            'slug': self.__generate_slug(title, parent_page)
        },
                          instance=page,
                          parent_page=parent_page)

        if form.is_valid():
            page = form.save(commit=False)
            parent_page.add_child(instance=page)

            # Keep the synced matches.
            SyncMeta.set_value('file:' + file_id, page.id)
    def test_whitelist_hooks(self):
        """Test that DbWhitelister does not strip new elements and attributes.

        The new allowed elements and attributes are added in v1.wagtail_hooks.
        """

        input_html = '''
<span class="schema-container"
      itemprop="step"
      itemscope=""
      itemtype="http://schema.org/HowToSection">
    <h4 itemprop="name">Step 1: Learn about the debt</h4>
    <span class="schema-container" itemprop="itemListElement">
        <table>
            <thead>
                <tr>
                    <th>Col 1 header</th>
                    <th>Col 2 header</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>Row 1 Col 1</td>
                    <td>Row 1 Col 2</td>
                </tr>
                <tr>
                    <td>Row 2 Col 1</td>
                    <td>Row 2 Col 2</td>
                </tr>
            </tbody>
        </table>
    </span>
</span>
        '''
        output_html = DbWhitelister.clean(input_html)
        self.assertHTMLEqual(input_html, output_html)
Beispiel #23
0
 def test_clean_tag_node_div(self):
     soup = BeautifulSoup("<div>foo</div>", "html5lib")
     tag = soup.div
     self.assertEqual(tag.name, "div")
     DbWhitelister.clean_tag_node(soup, tag)
     self.assertEqual(tag.name, "p")
Beispiel #24
0
 def value_from_datadict(self, data, files, name):
     original_value = super(RichTextArea,
                            self).value_from_datadict(data, files, name)
     if original_value is None:
         return None
     return DbWhitelister.clean(original_value)
Beispiel #25
0
 def test_clean_tag_node(self):
     soup = BeautifulSoup('<a irrelevant="baz">foo</a>')
     tag = soup.a
     DbWhitelister.clean_tag_node(soup, tag)
     self.assertEqual(str(tag), '<a>foo</a>')
Beispiel #26
0
 def test_clean_tag_node_div(self):
     soup = BeautifulSoup('<div>foo</div>')
     tag = soup.div
     self.assertEqual(tag.name, 'div')
     DbWhitelister.clean_tag_node(soup, tag)
     self.assertEqual(tag.name, 'p')
Beispiel #27
0
 def value_from_datadict(self, data, files, name):
     original_value = super(HalloRichTextArea, self).value_from_datadict(data, files, name)
     if original_value is None:
         return None
     return DbWhitelister.clean(original_value)