def __init__(self, attrs=None, **kwargs): super(TinyMCERichTextArea, self).__init__(attrs) self.kwargs = self.getDefaultArgs() self.features = kwargs.pop('features', None) if kwargs is not None: self.kwargs.update(kwargs) if self.features is None: self.features = features.get_default_features() self.converter = EditorHTMLConverter() else: self.converter = EditorHTMLConverter(self.features)
class TestDbWhitelisterMethods(TestCase): def setUp(self): self.whitelister = EditorHTMLConverter().whitelister def test_clean_tag_node_div(self): soup = BeautifulSoup('<div>foo</div>', 'html5lib') tag = soup.div self.assertEqual(tag.name, 'div') self.whitelister.clean_tag_node(soup, tag) self.assertEqual(tag.name, 'p') def test_clean_tag_node_with_data_embedtype(self): soup = BeautifulSoup( '<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>', 'html5lib') tag = soup.p self.whitelister.clean_tag_node(soup, tag) self.assertEqual( str(tag), '<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>') def test_clean_tag_node_with_data_linktype(self): soup = BeautifulSoup( '<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>', 'html5lib') tag = soup.a self.whitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>') def test_clean_tag_node(self): soup = BeautifulSoup('<a irrelevant="baz">foo</a>', 'html5lib') tag = soup.a self.whitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<a>foo</a>')
class TestDbWhitelisterMethods(TestCase): def setUp(self): self.whitelister = EditorHTMLConverter().whitelister def test_clean_tag_node_div(self): soup = BeautifulSoup('<div>foo</div>', 'html5lib') tag = soup.div self.assertEqual(tag.name, 'div') self.whitelister.clean_tag_node(soup, tag) self.assertEqual(tag.name, 'p') def test_clean_tag_node_with_data_embedtype(self): soup = BeautifulSoup( '<p><a data-embedtype="image" data-id=1 data-format="left" data-alt="bar" irrelevant="baz">foo</a></p>', 'html5lib' ) tag = soup.p self.whitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<p><embed alt="bar" embedtype="image" format="left" id="1"/></p>') def test_clean_tag_node_with_data_linktype(self): soup = BeautifulSoup( '<a data-linktype="document" data-id="1" irrelevant="baz">foo</a>', 'html5lib' ) tag = soup.a self.whitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<a id="1" linktype="document">foo</a>') def test_clean_tag_node(self): soup = BeautifulSoup('<a irrelevant="baz">foo</a>', 'html5lib') tag = soup.a self.whitelister.clean_tag_node(soup, tag) self.assertEqual(str(tag), '<a>foo</a>')
def test_whitelist_with_feature_list(self): converter = EditorHTMLConverter(features=['h1', 'bold', 'link', 'something_i_just_made_up']) input_html = ( '<h1>this heading is allowed</h1> <h2>but not this one</h2> ' '<p><b>bold</b> <i>italic</i></p>' '<p><a href="http://torchbox.com">external link</a> <a data-linktype="page" data-id="2" href="/">internal link</a></p>' ) output_html = converter.to_database_format(input_html) expected = ( '<h1>this heading is allowed</h1> but not this one ' '<p><b>bold</b> italic</p>' '<p><a href="http://torchbox.com">external link</a> <a linktype="page" id="2">internal link</a></p>' ) self.assertHtmlEqual(expected, output_html)
def __init__(self, **kwargs): try: self.options = kwargs.pop("options") except KeyError: self.options = {"key": settings.FROALA_LICENSE_KEY} self.options.update(getattr(settings, "FROALA_OPTIONS", {})) super(FroalaRichTextArea, self).__init__(**kwargs) self.features = kwargs.pop("features", None) if WAGTAIL_VERSION >= "2.0": if self.features is None: self.features = features.get_default_features() self.converter = EditorHTMLConverter() else: self.converter = EditorHTMLConverter(self.features)
def __init__(self, *args, **kwargs): self.options = kwargs.pop('options', None) self.features = kwargs.pop('features', None) if self.features is None: self.features = features.get_default_features() self.converter = EditorHTMLConverter(self.features) # construct a list of plugin objects, by querying the feature registry # and keeping the non-null responses from get_editor_plugin self.plugins = CORE_HALLO_PLUGINS + list(filter(None, [ features.get_editor_plugin('hallo', feature_name) for feature_name in self.features ])) self.plugins.sort(key=lambda plugin: plugin.order) super().__init__(*args, **kwargs)
def construct_text_block(text): """ Helper function to construct a text block for a LivePostBlock content. Args: text (str): Text to add Returns: RichText: a TextBlock filled with the given text. """ # Make sure no malicious html is accepted # BeautifulSoup prefers markup that contains at least 1 tag, # if that's not the case we can accept the input as is. if "<" in text: features = rich_text.features.get_default_features() cleaned_text = EditorHTMLConverter(features=features).whitelister.clean(text) else: cleaned_text = text return RichTextBlock().to_python(cleaned_text)
class CKEditor(WidgetWithScript, widgets.Textarea): def __init__(self): self.converter = EditorHTMLConverter() def get_panel(self): return RichTextFieldPanel def render(self, name, value, attrs=None): if value is None: translated_value = None else: translated_value = self.converter.from_database_format(value) return super().render(name, translated_value, attrs) def render_js_init(self, editor_id, name, value): return "CKEDITOR.replace( '%s', %s);" % (editor_id, mark_safe(json.dumps(settings.WAGTAIL_CKEDITOR_CONFIG))) def value_from_datadict(self, data, files, name): original_value = super().value_from_datadict(data, files, name) if original_value is None: return None return DbWhitelister.clean(original_value)
class TestDbWhitelister(TestCase): def setUp(self): self.whitelister = EditorHTMLConverter().whitelister def assertHtmlEqual(self, str1, str2): """ Assert that two HTML strings are equal at the DOM level (necessary because we can't guarantee the order that attributes are output in) """ self.assertEqual(BeautifulSoup(str1, 'html5lib'), BeautifulSoup(str2, 'html5lib')) def test_page_link_is_rewritten(self): input_html = ( '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>') output_html = self.whitelister.clean(input_html) expected = ( '<p>Look at the <a linktype="page" id="2">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>') self.assertHtmlEqual(expected, output_html) def test_document_link_is_rewritten(self): input_html = ( '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">' 'horribly oversized brochure</a></p>') output_html = self.whitelister.clean(input_html) expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>' self.assertHtmlEqual(expected, output_html) def test_image_embed_is_rewritten(self): input_html = ( '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"' ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">' '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />' '<figcaption>A kitten, yesterday.</figcaption></figure>') output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"' ' format="image-with-caption" alt="A cute kitten" />') self.assertHtmlEqual(expected, output_html) def test_media_embed_is_rewritten(self): input_html = ( '<p>OMG look at this video of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" width="640"' ' height="480" src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p>' ) self.assertHtmlEqual(expected, output_html) def test_div_conversion(self): # DIVs should be converted to P, and all whitelist / conversion rules still applied input_html = ( '<p>before</p><div class="shiny">OMG <b>look</b> at this <blink>video</blink> of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ"' ' width="640" height="480"' ' src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></div><p>after</p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>before</p><p>OMG <b>look</b> at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p><p>after</p>' ) self.assertHtmlEqual(expected, output_html) def test_whitelist_hooks(self): # wagtail.tests.wagtail_hooks overrides the whitelist to permit <blockquote> and <a target="..."> input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank" tea="darjeeling">' 'stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank">stand in water</a>.</blockquote><p>- Gumby</p>') self.assertHtmlEqual(expected, output_html) # check that the base Whitelister class is unaffected by these custom whitelist rules input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank"' ' tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = Whitelister().clean(input_html) expected = ( 'I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1">' 'stand in water</a>.<p>- Gumby</p>') self.assertHtmlEqual(expected, output_html) def test_whitelist_with_feature_list(self): converter = EditorHTMLConverter( features=['h1', 'bold', 'link', 'something_i_just_made_up']) input_html = ( '<h1>this heading is allowed</h1> <h2>but not this one</h2> ' '<p><b>bold</b> <i>italic</i></p>' '<p><a href="http://torchbox.com">external link</a> <a data-linktype="page" data-id="2" href="/">internal link</a></p>' ) output_html = converter.to_database_format(input_html) expected = ( '<h1>this heading is allowed</h1> but not this one ' '<p><b>bold</b> italic</p>' '<p><a href="http://torchbox.com">external link</a> <a linktype="page" id="2">internal link</a></p>' ) self.assertHtmlEqual(expected, output_html)
class TestDbWhitelister(TestCase): def setUp(self): self.whitelister = EditorHTMLConverter().whitelister def assertHtmlEqual(self, str1, str2): """ Assert that two HTML strings are equal at the DOM level (necessary because we can't guarantee the order that attributes are output in) """ self.assertEqual(BeautifulSoup(str1, 'html5lib'), BeautifulSoup(str2, 'html5lib')) def test_page_link_is_rewritten(self): input_html = ( '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>Look at the <a linktype="page" id="2">lovely homepage</a>' ' of my <a href="http://wagtail.io/">Wagtail</a> site</p>' ) self.assertHtmlEqual(expected, output_html) def test_document_link_is_rewritten(self): input_html = ( '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">' 'horribly oversized brochure</a></p>' ) output_html = self.whitelister.clean(input_html) expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>' self.assertHtmlEqual(expected, output_html) def test_image_embed_is_rewritten(self): input_html = ( '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"' ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">' '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />' '<figcaption>A kitten, yesterday.</figcaption></figure>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"' ' format="image-with-caption" alt="A cute kitten" />' ) self.assertHtmlEqual(expected, output_html) def test_media_embed_is_rewritten(self): input_html = ( '<p>OMG look at this video of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" width="640"' ' height="480" src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p>' ) self.assertHtmlEqual(expected, output_html) def test_div_conversion(self): # DIVs should be converted to P, and all whitelist / conversion rules still applied input_html = ( '<p>before</p><div class="shiny">OMG <b>look</b> at this <blink>video</blink> of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ"' ' width="640" height="480"' ' src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></div><p>after</p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<p>before</p><p>OMG <b>look</b> at this video of a kitten:' ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p><p>after</p>' ) self.assertHtmlEqual(expected, output_html) def test_whitelist_hooks(self): # wagtail.tests.wagtail_hooks overrides the whitelist to permit <blockquote> and <a target="..."> input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank" tea="darjeeling">' 'stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = self.whitelister.clean(input_html) expected = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1"' ' target="_blank">stand in water</a>.</blockquote><p>- Gumby</p>' ) self.assertHtmlEqual(expected, output_html) # check that the base Whitelister class is unaffected by these custom whitelist rules input_html = ( '<blockquote>I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1" target="_blank"' ' tea="darjeeling">stand in water</a>.</blockquote><p>- <character>Gumby</character></p>' ) output_html = Whitelister().clean(input_html) expected = ( 'I would put a tax on all people who' ' <a href="https://twitter.com/DMReporter/status/432914941201223680/photo/1">' 'stand in water</a>.<p>- Gumby</p>' ) self.assertHtmlEqual(expected, output_html) def test_whitelist_with_feature_list(self): converter = EditorHTMLConverter(features=['h1', 'bold', 'link', 'something_i_just_made_up']) input_html = ( '<h1>this heading is allowed</h1> <h2>but not this one</h2> ' '<p><b>bold</b> <i>italic</i></p>' '<p><a href="http://torchbox.com">external link</a> <a data-linktype="page" data-id="2" href="/">internal link</a></p>' ) output_html = converter.to_database_format(input_html) expected = ( '<h1>this heading is allowed</h1> but not this one ' '<p><b>bold</b> italic</p>' '<p><a href="http://torchbox.com">external link</a> <a linktype="page" id="2">internal link</a></p>' ) self.assertHtmlEqual(expected, output_html)
class HalloRichTextArea(WidgetWithScript, widgets.Textarea): # this class's constructor accepts a 'features' kwarg accepts_features = True def get_panel(self): return RichTextFieldPanel def __init__(self, *args, **kwargs): self.options = kwargs.pop('options', None) self.features = kwargs.pop('features', None) if self.features is None: self.features = features.get_default_features() self.converter = EditorHTMLConverter(self.features) # construct a list of plugin objects, by querying the feature registry # and keeping the non-null responses from get_editor_plugin self.plugins = CORE_HALLO_PLUGINS + list(filter(None, [ features.get_editor_plugin('hallo', feature_name) for feature_name in self.features ])) self.plugins.sort(key=lambda plugin: plugin.order) super().__init__(*args, **kwargs) def render(self, name, value, attrs=None): if value is None: translated_value = None else: translated_value = self.converter.from_database_format(value) return super().render(name, translated_value, attrs) def render_js_init(self, id_, name, value): if self.options is not None and 'plugins' in self.options: # explicit 'plugins' config passed in options, so use that plugin_data = self.options['plugins'] else: plugin_data = OrderedDict() for plugin in self.plugins: plugin.construct_plugins_list(plugin_data) return "makeHalloRichTextEditable({0}, {1});".format( json.dumps(id_), json.dumps(plugin_data) ) def value_from_datadict(self, data, files, name): original_value = super().value_from_datadict(data, files, name) if original_value is None: return None return self.converter.to_database_format(original_value) @property def media(self): media = Media(js=[ 'wagtailadmin/js/vendor/hallo.js', 'wagtailadmin/js/hallo-bootstrap.js', ], css={ 'all': ['wagtailadmin/css/panels/hallo.css'] }) for plugin in self.plugins: media += plugin.media return media
class TestDbWhitelister(TestCase): def setUp(self): self.whitelister = EditorHTMLConverter().whitelister def assertHtmlEqual(self, str1, str2): """ Assert that two HTML strings are equal at the DOM level (necessary because we can't guarantee the order that attributes are output in) """ self.assertEqual(BeautifulSoup(str1, "html5lib"), BeautifulSoup(str2, "html5lib")) def test_page_link_is_rewritten(self): input_html = ( '<p>Look at the <a data-linktype="page" data-id="2" href="/">lovely homepage</a>' ' of my <a href="http://wagtail.org/">Wagtail</a> site</p>') output_html = self.whitelister.clean(input_html) expected = ( '<p>Look at the <a linktype="page" id="2">lovely homepage</a>' ' of my <a href="http://wagtail.org/">Wagtail</a> site</p>') self.assertHtmlEqual(expected, output_html) def test_document_link_is_rewritten(self): input_html = ( '<p>Look at our <a data-linktype="document" data-id="1" href="/documents/1/brochure.pdf">' "horribly oversized brochure</a></p>") output_html = self.whitelister.clean(input_html) expected = '<p>Look at our <a linktype="document" id="1">horribly oversized brochure</a></p>' self.assertHtmlEqual(expected, output_html) def test_image_embed_is_rewritten(self): input_html = ( '<p>OMG look at this picture of a kitten:</p><figure data-embedtype="image" data-id="5"' ' data-format="image-with-caption" data-alt="A cute kitten" class="fancy-image">' '<img src="/media/images/kitten.jpg" width="320" height="200" alt="A cute kitten" />' "<figcaption>A kitten, yesterday.</figcaption></figure>") output_html = self.whitelister.clean(input_html) expected = ( '<p>OMG look at this picture of a kitten:</p><embed embedtype="image" id="5"' ' format="image-with-caption" alt="A cute kitten" />') self.assertHtmlEqual(expected, output_html) def test_media_embed_is_rewritten(self): input_html = ( "<p>OMG look at this video of a kitten: " '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" width="640"' ' height="480" src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></p>' ) output_html = self.whitelister.clean(input_html) expected = ( "<p>OMG look at this video of a kitten:" ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p>' ) self.assertHtmlEqual(expected, output_html) def test_div_conversion(self): # DIVs should be converted to P, and all whitelist / conversion rules still applied input_html = ( '<p>before</p><div class="shiny">OMG <b>look</b> at this <blink>video</blink> of a kitten: ' '<iframe data-embedtype="media" data-url="https://www.youtube.com/watch?v=dQw4w9WgXcQ"' ' width="640" height="480"' ' src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen></iframe></div><p>after</p>' ) output_html = self.whitelister.clean(input_html) expected = ( "<p>before</p><p>OMG <b>look</b> at this video of a kitten:" ' <embed embedtype="media" url="https://www.youtube.com/watch?v=dQw4w9WgXcQ" /></p><p>after</p>' ) self.assertHtmlEqual(expected, output_html) def test_whitelist_with_feature_list(self): converter = EditorHTMLConverter( features=["h1", "bold", "link", "something_i_just_made_up"]) input_html = ( "<h1>this heading is allowed</h1> <h2>but not this one</h2> " "<p><b>bold</b> <i>italic</i></p>" '<p><a href="http://torchbox.com">external link</a> <a data-linktype="page" data-id="2" href="/">internal link</a></p>' ) output_html = converter.to_database_format(input_html) expected = ( "<h1>this heading is allowed</h1> but not this one " "<p><b>bold</b> italic</p>" '<p><a href="http://torchbox.com">external link</a> <a linktype="page" id="2">internal link</a></p>' ) self.assertHtmlEqual(expected, output_html)
def setUp(self): self.whitelister = EditorHTMLConverter().whitelister
class HalloRichTextArea(WidgetWithScript, widgets.Textarea): # this class's constructor accepts a 'features' kwarg accepts_features = True def get_panel(self): return RichTextFieldPanel def __init__(self, *args, **kwargs): self.options = kwargs.pop('options', None) self.features = kwargs.pop('features', None) if self.features is None: self.features = features.get_default_features() self.converter = EditorHTMLConverter(self.features) # construct a list of plugin objects, by querying the feature registry # and keeping the non-null responses from get_editor_plugin self.plugins = CORE_HALLO_PLUGINS + list(filter(None, [ features.get_editor_plugin('hallo', feature_name) for feature_name in self.features ])) self.plugins.sort(key=lambda plugin: plugin.order) super().__init__(*args, **kwargs) def translate_value(self, value): # Convert database rich text representation to the format required by # the input field if value is None: return None return self.converter.from_database_format(value) def render(self, name, value, attrs=None): translated_value = self.translate_value(value) return super().render(name, translated_value, attrs) def render_js_init(self, id_, name, value): if self.options is not None and 'plugins' in self.options: # explicit 'plugins' config passed in options, so use that plugin_data = self.options['plugins'] else: plugin_data = OrderedDict() for plugin in self.plugins: plugin.construct_plugins_list(plugin_data) return "makeHalloRichTextEditable({0}, {1});".format( json.dumps(id_), json.dumps(plugin_data) ) def value_from_datadict(self, data, files, name): original_value = super().value_from_datadict(data, files, name) if original_value is None: return None return self.converter.to_database_format(original_value) @property def media(self): media = Media(js=[ 'wagtailadmin/js/vendor/hallo.js', 'wagtailadmin/js/hallo-bootstrap.js', ], css={ 'all': ['wagtailadmin/css/panels/hallo.css'] }) for plugin in self.plugins: media += plugin.media return media
def __init__(self): self.converter = EditorHTMLConverter()
class FroalaRichTextArea(WidgetWithScript, widgets.Textarea): def __init__(self, **kwargs): try: self.options = kwargs.pop("options") except KeyError: self.options = {"key": settings.FROALA_LICENSE_KEY} self.options.update(getattr(settings, "FROALA_OPTIONS", {})) super(FroalaRichTextArea, self).__init__(**kwargs) self.features = kwargs.pop("features", None) if WAGTAIL_VERSION >= "2.0": if self.features is None: self.features = features.get_default_features() self.converter = EditorHTMLConverter() else: self.converter = EditorHTMLConverter(self.features) def get_panel(self): return RichTextFieldPanel def render(self, name, value, attrs=None): if value is None: translated_value = None else: if WAGTAIL_VERSION >= "2.0": translated_value = self.converter.from_database_format(value) else: translated_value = expand_db_html(value, for_editor=True) return super(FroalaRichTextArea, self).render(name, translated_value, attrs) def render_js_init(self, id_, name, value): return "makeFroalaRichTextEditable({0}, {1});".format(json.dumps(id_), json.dumps(self.options)) def value_from_datadict(self, data, files, name): original_value = super(FroalaRichTextArea, self).value_from_datadict(data, files, name) if original_value is None: return None if WAGTAIL_VERSION >= "2.0": return self.converter.to_database_format(original_value) else: return DbWhitelister.clean(original_value) @property def media(self): js = [static("froala/vendor/js/froala_editor.pkgd.min.js")] css = [ static("froala/css/wagtailfroala.css"), static("froala/vendor/css/froala_editor.pkgd.min.css"), "//cdnjs.cloudflare.com/ajax/libs/codemirror/5.3.0/codemirror.min.css", "//cdnjs.cloudflare.com/ajax/libs/font-awesome/4.4.0/css/font-awesome.min.css", ] if getattr(settings, "FROALA_CODEMIRROR", True): js.append("//cdnjs.cloudflare.com/ajax/libs/codemirror/5.3.0/codemirror.min.js") js.append("//cdnjs.cloudflare.com/ajax/libs/codemirror/5.3.0/mode/xml/xml.min.js") css.append("//cdnjs.cloudflare.com/ajax/libs/codemirror/5.3.0/codemirror.min.css") # Maintain the order of JavaScript files. js.append(static("froala/js/froala.js")) if getattr(settings, "FROALA_FONT_AWESOME", True): css.append("//cdnjs.cloudflare.com/ajax/libs/font-awesome/4.4.0/css/font-awesome.min.css") return Media(js=js, css={"all": css})
class HalloRichTextArea(widgets.Textarea): template_name = 'wagtailadmin/widgets/hallo_rich_text_area.html' # this class's constructor accepts a 'features' kwarg accepts_features = True def get_panel(self): return RichTextFieldPanel def __init__(self, *args, **kwargs): self.options = kwargs.pop('options', None) self.features = kwargs.pop('features', None) if self.features is None: self.features = features.get_default_features() self.converter = EditorHTMLConverter(self.features) # construct a list of plugin objects, by querying the feature registry # and keeping the non-null responses from get_editor_plugin self.plugins = CORE_HALLO_PLUGINS + list( filter(None, [ features.get_editor_plugin('hallo', feature_name) for feature_name in self.features ])) self.plugins.sort(key=lambda plugin: plugin.order) super().__init__(*args, **kwargs) def format_value(self, value): # Convert database rich text representation to the format required by # the input field value = super().format_value(value) if value is None: return None return self.converter.from_database_format(value) def get_context(self, name, value, attrs): context = super().get_context(name, value, attrs) if self.options is not None and 'plugins' in self.options: # explicit 'plugins' config passed in options, so use that plugin_data = self.options['plugins'] else: plugin_data = OrderedDict() for plugin in self.plugins: plugin.construct_plugins_list(plugin_data) context['widget']['plugins_json'] = json.dumps(plugin_data) return context def value_from_datadict(self, data, files, name): original_value = super().value_from_datadict(data, files, name) if original_value is None: return None return self.converter.to_database_format(original_value) @property def media(self): media = Media( js=[ versioned_static('wagtailadmin/js/vendor/hallo.js'), versioned_static('wagtailadmin/js/hallo-bootstrap.js'), ], css={ 'all': [versioned_static('wagtailadmin/css/panels/hallo.css')] }) for plugin in self.plugins: media += plugin.media return media
class Command(BaseCommand): help = "Project migration script. Requires a source JSON file." data = [] terms = {} whitelister = EditorHTMLConverter().whitelister def add_arguments(self, parser): parser.add_argument('source', type=argparse.FileType('r'), help='Migration source JSON file') @transaction.atomic def handle(self, *args, **options): # Prepare the list of categories. for item in CATEGORIES: category, _ = Category.objects.get_or_create(name=item['category']) option, _ = Option.objects.get_or_create(value=item['name'], category=category) self.terms[item['tid']] = option self.parent_page = ProjectIndexPage.objects.first() if not self.parent_page: raise ProjectIndexPage.DoesNotExist( 'Project Index Page must exist to import projects') self.funds = { '3625': Page.objects.get(title='Internet Freedom Fund'), '3654': Page.objects.get(title='Rapid Response Fund'), '3905': Page.objects.get(title='Core Infrastructure Fund'), '7791': Page.objects.get(title='Community Lab'), } with options['source'] as json_data: self.data = json.load(json_data) counter = 0 for id in self.data: self.process(id) counter += 1 self.stdout.write(f"Imported {counter} submissions.") def process(self, id): node = self.data[id] try: project = ProjectPage.objects.get(drupal_id=node['nid']) except ProjectPage.DoesNotExist: project = ProjectPage(drupal_id=node['nid']) # TODO timezone? project.submit_time = datetime.fromtimestamp(int(node['created']), timezone.utc) project.title = node['title'] image_url_base = 'https://www.opentech.fund/sites/default/files/' try: uri = node['field_project_image']['uri'] except TypeError: # There was no image pass else: parts = urlsplit(uri) image_url = image_url_base + parts.netloc + parts.path project.icon = self.wagtail_image_obj_from_url( image_url, node['field_project_image']['fid']) project.introduction = self.get_field(node, 'field_preamble') cleaned_body = self.whitelister.clean(self.get_field(node, 'body')) if project.introduction: project.body = [('paragraph', RichText(cleaned_body))] else: # Use the first sentence of the body as an intro very_clean_body = bleach.clean(cleaned_body, strip=True) introduction = very_clean_body.split('.')[0] + '.' project.introduction = introduction body_without_intro = cleaned_body.replace(introduction, '').strip() project.body = [('paragraph', RichText(body_without_intro))] status = { '329': 'idea', '328': 'exists', '366': 'release', '367': 'production', } project.status = status[node['field_proposal_status']['tid']] project.contact_details.clear() sites = node['field_project_url'] if isinstance(sites, dict): sites = [sites] for site in sites: url = site['url'] if 'github' in url: page_type = 'github' url = urlsplit(url).path else: page_type = 'website' project.contact_details.add( ProjectContactDetails( service=page_type, value=url, )) project.contact_details.add( ProjectContactDetails(service='twitter', value=self.get_field( node, 'field_project_twitter'))) # Funding project.funding.clear() years = self.ensure_iterable(node['field_project_funding_year']) amounts = self.ensure_iterable(node['field_project_funding_amount']) durations = self.ensure_iterable(node['field_project_term_time']) funds = self.ensure_iterable(node['field_project_funding_request']) for year, amount, duration, fund in itertools.zip_longest( years, amounts, durations, funds): try: fund = self.funds[fund['target_id']] except TypeError: fund = None try: duration = duration['value'] except TypeError: duration = 0 try: amount = amount['value'] except TypeError: # This is an error, don't give funding continue project.funding.add( ProjectFunding( value=amount, year=year['value'], duration=duration, source=fund, )) category_fields = [ 'field_term_region', 'field_term_country', 'field_technology_attribute', 'field_proposal_theme', 'field_proposal_focus', 'field_proposal_beneficiaries', ] categories = {} for category in category_fields: terms = self.ensure_iterable(node[category]) for term in terms: option = self.get_referenced_term(term['tid']) if option: categories.setdefault(option.category.id, []).append(option.id) project.categories = json.dumps(categories) try: if not project.get_parent(): self.parent_page.add_child(instance=project) project.save_revision().publish() self.stdout.write( f"Processed \"{node['title'].encode('utf8')}\" ({node['nid']})" ) except IntegrityError: self.stdout.write( f"*** Skipped \"{node['title']}\" ({node['nid']}) due to IntegrityError" ) pass def ensure_iterable(self, value): if isinstance(value, dict): value = [value] return value def get_field(self, node, field): try: return node[field]['safe_value'] except TypeError: pass try: return node[field]['value'] except TypeError: return '' def get_referenced_term(self, tid): try: return self.terms[tid] except KeyError: return None def nl2br(self, value): return value.replace('\r\n', '<br>\n') @staticmethod def wagtail_image_obj_from_url(url, drupal_id=None): """ Get the image from the Nesta site if it doesn't already exist. """ if drupal_id is not None and drupal_id: try: return WagtailImage.objects.get(drupal_id=drupal_id) except WagtailImage.DoesNotExist: pass if url and valid_url_extension(url) and valid_url_mimetype(url): r = requests.get(url, stream=True) if r.status_code == requests.codes.ok: img_buffer = BytesIO(r.content) img_filename = url.rsplit('/', 1)[1] # Test downloaded file is valid image file try: pil_image = Image.open(img_buffer) pil_image.verify() except Exception as e: print(f"Invalid image {url}: {e}") else: img = WagtailImage.objects.create(title=img_filename, file=ImageFile( img_buffer, name=img_filename), drupal_id=drupal_id) return img return None
class Command(BaseCommand): help = "News migration script. Requires a source JSON file." data = [] terms = {} whitelister = EditorHTMLConverter().whitelister def add_arguments(self, parser): parser.add_argument('source', type=argparse.FileType('r'), help='Migration source JSON file') @transaction.atomic def handle(self, *args, **options): # Prepare the list of categories. for item in CATEGORIES: category, _ = Category.objects.get_or_create(name=item['category']) option, _ = Option.objects.get_or_create(value=item['name'], category=category) self.terms[item['tid']] = option self.parent_page = NewsIndex.objects.first() if not self.parent_page: raise NewsIndex.DoesNotExist( 'News Index Page must exist to import News') self.types = { '4': NewsType.objects.get_or_create(title='Press Clip')[0], '5': NewsType.objects.get_or_create(title='Program Update')[0], '388': NewsType.objects.get_or_create(title='Research')[0], } with options['source'] as json_data: self.data = json.load(json_data) counter = 0 for id in self.data: self.process(id) counter += 1 self.stdout.write(f"Imported {counter} submissions.") def process(self, id): node = self.data[id] try: news = NewsPage.objects.get(drupal_id=node['nid']) except NewsPage.DoesNotExist: news = NewsPage(drupal_id=node['nid']) # TODO timezone? news.submit_time = datetime.fromtimestamp(int(node['created']), timezone.utc) news.publication_date = datetime.fromtimestamp(int(node['created']), timezone.utc) news.title = node['title'] news.introduction = self.get_field(node, 'field_preamble') cleaned_body = self.whitelister.clean(self.get_field(node, 'body')) news.body = [('paragraph', RichText(cleaned_body))] news.news_types.clear() for news_type in self.ensure_iterable(node['field_article_type']): news.news_types.add( NewsPageNewsType(news_type=self.types[news_type['tid']], )) news.related_projects.clear() for project in self.ensure_iterable(node['field_article_project']): try: project_page = ProjectPage.objects.get( drupal_id=project['target_id']) except ProjectPage.DoesNotExist: self.stdout.write(f"Missing project ID {project['target_id']}") else: news.related_projects.add( NewsProjectRelatedPage(page=project_page, )) news.authors.clear() for author in self.ensure_iterable(node['field_article_authors']): user = User.objects.get(drupal_id=author['target_id']) news.authors.add( NewsPageAuthor(author=PersonPage.objects.get( title=user.full_name))) try: user = User.objects.get(drupal_id=node['uid']) except User.DoesNotExist: pass else: user_map = {'Dan Blah': 'Dan "Blah" Meredith'} name = user_map.get(user.full_name, user.full_name) # missing amin jobran try: news.authors.add( NewsPageAuthor(author=PersonPage.objects.get(title=name))) except PersonPage.DoesNotExist: self.stdout.write(f'Missing person page: {name}') try: if not news.get_parent(): self.parent_page.add_child(instance=news) news.save_revision().publish() self.stdout.write( f"Processed \"{node['title'].encode('utf8')}\" ({node['nid']})" ) except IntegrityError: self.stdout.write( f"*** Skipped \"{node['title']}\" ({node['nid']}) due to IntegrityError" ) def ensure_iterable(self, value): if isinstance(value, dict): value = [value] return value def get_field(self, node, field): try: return node[field]['safe_value'] except TypeError: pass try: return node[field]['value'] except TypeError: return ''
class Command(BaseCommand): help = "Person migration script. Requires a source JSON file." data = [] terms = {} whitelister = EditorHTMLConverter().whitelister def add_arguments(self, parser): parser.add_argument('source', type=argparse.FileType('r'), help='Migration source JSON file') @transaction.atomic def handle(self, *args, **options): # Prepare the list of categories. for item in CATEGORIES: category, _ = Category.objects.get_or_create(name=item['category']) option, _ = Option.objects.get_or_create(value=item['name'], category=category) self.terms[item['tid']] = option self.parent_page = PersonIndexPage.objects.first() if not self.parent_page: raise PersonIndexPage.DoesNotExist( 'Project Index Page must exist to import projects') self.types = { 'team': PersonType.objects.get_or_create(title='Team')[0], 'council': PersonType.objects.get_or_create(title='Advisory Council')[0], 'fellow': PersonType.objects.get_or_create(title='Fellow')[0], } self.funds = { '3625': Page.objects.get(title='Internet Freedom Fund'), '3654': Page.objects.get(title='Rapid Response Fund'), '3905': Page.objects.get(title='Core Infrastructure Fund'), '7791': Page.objects.get(title='Community Lab'), '3618': Page.objects.get(title='Information Controls Fellowship'), '3613': None, '3681': Page.objects.get(title='Digital Integrity Fellowship'), } self.review_funds = { '393': Page.objects.get(title='Internet Freedom Fund'), '389': Page.objects.get(title='Rapid Response Fund'), '391': Page.objects.get(title='Core Infrastructure Fund'), 'NOT_USED': Page.objects.get(title='Community Lab'), '394': Page.objects.get(title='Information Controls Fellowship'), '390': Page.objects.get(title='Digital Integrity Fellowship'), } with options['source'] as json_data: self.data = json.load(json_data) counter = 0 for id in self.data: self.process(id) counter += 1 self.stdout.write(f"Imported {counter} submissions.") def process(self, id): node = self.data[id] print(node['title'].encode('utf8')) try: person = PersonPage.objects.get(drupal_id=node['nid']) except PersonPage.DoesNotExist: person = PersonPage(drupal_id=node['nid']) # TODO timezone? person.submit_time = datetime.fromtimestamp(int(node['created']), timezone.utc) *first_name, last_name = node['title'].split() person.first_name = ' '.join(first_name) person.last_name = last_name person.title = node['title'] person.job_title = self.get_field(node, 'field_team_title') person.active = bool(int(node['field_team_status']['value'])) person.person_types.clear() for person_type in self.ensure_iterable(node['field_team_type']): person.person_types.add( PersonPagePersonType( person_type=self.types[person_type['value']], )) image_url_base = 'https://www.opentech.fund/sites/default/files/' try: uri = node['field_team_photo']['uri'] except TypeError: # There was no image pass else: parts = urlsplit(uri) image_url = image_url_base + parts.netloc + parts.path person.photo = self.wagtail_image_obj_from_url( image_url, node['field_team_photo']['fid']) cleaned_body = self.whitelister.clean(self.get_field(node, 'body')) # Use the first sentence of the body as an intro very_clean_body = bleach.clean(cleaned_body, strip=True) very_clean_body = very_clean_body.replace('.\n', '. ') parts = very_clean_body.split('. ') introduction = '' while len(introduction) < 20: try: introduction += parts.pop(0) introduction += '. ' except IndexError: break introduction = introduction.strip() person.introduction = introduction body_without_intro = cleaned_body.replace(introduction, '').strip() person.biography = [('paragraph', RichText(body_without_intro))] person.social_media_profile.clear() if self.get_field(node, 'field_team_twitter'): person.social_media_profile.add( SocialMediaProfile(service='twitter', username=self.get_field( node, 'field_team_twitter'))) person.contact_details.clear() for contact in ['im', 'otr', 'irc', 'pgp', 'phone']: if self.get_field(node, f'field_team_{contact}'): person.contact_details.add( PersonContactInfomation(contact_method=contact, contact_detail=self.get_field( node, f'field_team_{contact}'))) person.funds_reviewed.clear() for reviewer in self.ensure_iterable(node['field_team_review_panel']): person.funds_reviewed.add( FundReviewers(page=self.review_funds[reviewer['tid']], )) # Funding person.funding.clear() years = self.ensure_iterable(node['field_project_funding_year']) amounts = self.ensure_iterable(node['field_project_funding_amount']) durations = self.ensure_iterable(node['field_project_term_time']) funds = self.ensure_iterable(node['field_project_funding_request']) for year, amount, duration, fund in itertools.zip_longest( years, amounts, durations, funds): try: fund = self.funds[fund['target_id']] except TypeError: fund = None try: duration = duration['value'] except TypeError: duration = 0 try: amount = amount['value'] except TypeError: # This is an error, don't give funding continue person.funding.add( Funding( value=amount, year=year['value'], duration=duration, source=fund, )) try: if not person.get_parent(): self.parent_page.add_child(instance=person) person.save_revision().publish() self.stdout.write( f"Processed \"{node['title'].encode('utf8')}\" ({node['nid']})" ) except IntegrityError: self.stdout.write( f"*** Skipped \"{node['title']}\" ({node['nid']}) due to IntegrityError" ) pass def ensure_iterable(self, value): if isinstance(value, dict): value = [value] return value def get_field(self, node, field): try: return node[field]['safe_value'] except TypeError: pass try: return node[field]['value'] except TypeError: return '' def get_referenced_term(self, tid): try: return self.terms[tid] except KeyError: return None def nl2br(self, value): return value.replace('\r\n', '<br>\n') @staticmethod def wagtail_image_obj_from_url(url, drupal_id=None): """ Get the image from the Nesta site if it doesn't already exist. """ if drupal_id is not None and drupal_id: try: return WagtailImage.objects.get(drupal_id=drupal_id) except WagtailImage.DoesNotExist: pass if url and valid_url_extension(url) and valid_url_mimetype(url): r = requests.get(url, stream=True) if r.status_code == requests.codes.ok: img_buffer = BytesIO(r.content) img_filename = url.rsplit('/', 1)[1] # Test downloaded file is valid image file try: pil_image = Image.open(img_buffer) pil_image.verify() except Exception as e: print(f"Invalid image {url}: {e}") else: img = WagtailImage.objects.create(title=img_filename, file=ImageFile( img_buffer, name=img_filename), drupal_id=drupal_id) return img return None
class TinyMCERichTextArea(WidgetWithScript, widgets.Textarea): @classmethod def getDefaultArgs(cls): return { 'buttons': [[ ['undo', 'redo'], ['styleselect'], ['bold', 'italic'], ['bullist', 'numlist', 'outdent', 'indent'], ['table'], ['link', 'unlink'], ['wagtaildoclink', 'wagtailimage', 'wagtailembed'], ['pastetext', 'fullscreen'], ]], 'menus': False, 'options': { 'browser_spellcheck': True, 'noneditable_leave_contenteditable': True, 'language': translation.to_locale(translation.get_language()), 'language_load': True, }, } def __init__(self, attrs=None, **kwargs): super(TinyMCERichTextArea, self).__init__(attrs) self.kwargs = self.getDefaultArgs() self.features = kwargs.pop('features', None) if kwargs is not None: self.kwargs.update(kwargs) if WAGTAIL_VERSION >= '2.0': if self.features is None: self.features = features.get_default_features() self.converter = EditorHTMLConverter() else: self.converter = EditorHTMLConverter(self.features) def get_panel(self): return RichTextFieldPanel def render(self, name, value, attrs=None): if value is None: translated_value = None else: # if WAGTAIL_VERSION >= '2.0': # translated_value = self.converter.from_database_format(value, for_editor=True) # else: # print(self.converter) translated_value = expand_db_html(value) return super(TinyMCERichTextArea, self).render(name, translated_value, attrs) def render_js_init(self, id_, name, value): kwargs = { 'options': self.kwargs.get('options', {}), } if 'buttons' in self.kwargs: if self.kwargs['buttons'] is False: kwargs['toolbar'] = False else: kwargs['toolbar'] = [ ' | '.join([' '.join(groups) for groups in rows]) for rows in self.kwargs['buttons'] ] if 'menus' in self.kwargs: if self.kwargs['menus'] is False: kwargs['menubar'] = False else: kwargs['menubar'] = ' '.join(self.kwargs['menus']) return "makeTinyMCEEditable({0}, {1});".format(json.dumps(id_), json.dumps(kwargs)) def value_from_datadict(self, data, files, name): original_value = super(TinyMCERichTextArea, self).value_from_datadict(data, files, name) if original_value is None: return None if WAGTAIL_VERSION >= '2.0': return self.converter.to_database_format(original_value) else: return DbWhitelister.clean(original_value)
def editor_html_converter(self): return EditorHTMLConverter(self.features)