def get_parser(parser, fmt='html'): if fmt == 'raw': return RawParser() elif fmt == 'text': return TextParser() else: if parser == 'signature': parser_instance = HtmlParser(supported_tags=ONELINE_TAGS_LIST) parser_instance.auto_paragraphs = False return parser_instance elif parser == 'profile': parser_instance = HtmlParser(supported_tags=FULL_TAGS_LIST) parser_instance.add_nofollow = False return parser_instance elif parser == 'blog' or parser == 'full': parser_instance = HtmlParser(supported_tags=FULL_TAGS_LIST) parser_instance.auto_paragraphs = False return parser_instance elif parser == 'news_short': parser_instance = HtmlParser(supported_tags=DEFAULT_TAG_LIST) parser_instance.add_nofollow = False return parser_instance elif parser == 'news_long': parser_instance = HtmlParser(supported_tags=FULL_TAGS_LIST) parser_instance.add_nofollow = False return parser_instance else: return HtmlParser()
def get_parser(editor_type): if editor_type == 'signature': supported_tags = copy(ONELINE_TAGS) supported_tags['a'] = deepcopy(supported_tags['a']) supported_tags['a'].req_attributes['rel'] = 'nofollow' supported_tags['a'].attribute_validators = {'rel': [NofollowValidator()]} parser = HtmlParser(supported_tags = supported_tags) parser.auto_paragraphs = False return parser elif editor_type == 'profile': supported_tags = copy(DEFAULT_TAGS) supported_tags['img'] = HtmlTag('img', opt_attributes=['title'], req_attributes={'src': '', 'alt': ''}, empty=True, attribute_validators = {'src': [HrefValidator()]}) supported_tags[''] = deepcopy(supported_tags['']) supported_tags[''].opt.add('img') return HtmlParser(supported_tags = supported_tags) elif editor_type == 'blog': supported_tags = copy(FULL_TAGS) supported_tags['a'] = deepcopy(supported_tags['a']) supported_tags['a'].req_attributes['rel'] = 'nofollow' supported_tags['a'].attribute_validators = {'rel': [NofollowValidator()]} parser = HtmlParser(supported_tags=supported_tags) parser.auto_paragraphs = False return parser else: return HtmlParser()
def get_parser(parser, fmt='html'): #pylint: disable=too-many-return-statements if fmt == 'raw': return RawParser() elif fmt == 'text': return TextParser() else: if parser == 'signature': supported_tags = copy(ONELINE_TAGS) supported_tags['a'] = deepcopy(supported_tags['a']) supported_tags['a'].req_attributes['rel'] = 'nofollow' supported_tags['a'].attribute_validators = {'rel': [NofollowValidator()]} parser_instance = HtmlParser(supported_tags=supported_tags) parser_instance.auto_paragraphs = False return parser_instance elif parser == 'profile': supported_tags = copy(DEFAULT_TAGS) supported_tags['img'] = HtmlTag('img', opt_attributes=['title'], req_attributes={'src': '', 'alt': ''}, empty=True, attribute_validators={'src': [HrefValidator()]}) supported_tags[''] = deepcopy(supported_tags['']) supported_tags[''].opt.add('img') return HtmlParser(supported_tags=supported_tags) elif parser == 'blog' or parser == 'full': supported_tags = copy(FULL_TAGS) supported_tags['a'] = deepcopy(supported_tags['a']) supported_tags['a'].req_attributes['rel'] = 'nofollow' supported_tags['a'].attribute_validators = {'rel': [NofollowValidator()]} parser_instance = HtmlParser(supported_tags=supported_tags) parser_instance.auto_paragraphs = False return parser_instance elif parser == 'news_short': return HtmlParser() elif parser == 'news_long': supported_tags = copy(FULL_TAGS) parser_instance = HtmlParser(supported_tags=supported_tags) return parser_instance else: return HtmlParser()
def setUp(self): self.parser = HtmlParser() self.parser.auto_paragraphs = True self.parser.add_nofollow = False
class ParserTest(TestCase): def setUp(self): self.parser = HtmlParser() self.parser.auto_paragraphs = True self.parser.add_nofollow = False def test_auto_paragraph(self): code = """Test""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), "<p>" + code + "</p>") def test_valid_html(self): code = "<p><strong>Test</strong></p>\n<pre>Tadaaa\n\n</pre><p>Text</p>" self.parser.parse(code) self.assertEqual(self.parser.get_output(), "<p><strong>Test</strong></p>\n<pre>Tadaaa\n\n</pre><p>Text</p>") def test_opened_tag(self): code = """<p>Test""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), code + "</p>") def test_opened_nested_tag(self): code = """<p><strong>Test</p>""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), code[:-4] + "</strong></p>") def test_unknown_tag(self): code = """<xxx>Test</xxx>""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), '<p>' + code.replace('<', '<').replace('>', '>') + '</p>') def test_attribute(self): code = """<p><a href="#test">Test</a></p>""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), code) def test_missing_attribute(self): code = """<p><a>Test</a></p>""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), """<p><a href="#">Test</a></p>""") def test_malicious_href(self): code = """<p><a href="javascript:alert('XSS')">Test</a></p>""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), """<p><a href="#">Test</a></p>""") def test_signature_nofollow(self): code = """<a href="http://www.linuxos.sk">Test</a>""" parser = get_parser('signature') parser.parse(code) self.assertEqual(parser.get_output(), """<a href="http://www.linuxos.sk" rel="nofollow">Test</a>""") def test_signature_bad_nofollow(self): code = """<a href="http://www.linuxos.sk" rel="follow">Test</a>""" parser = get_parser('signature') parser.parse(code) self.assertEqual(parser.get_output(), """<a href="http://www.linuxos.sk" rel="nofollow">Test</a>""") def test_profile_parser(self): code = """<p><img alt="" src="http://www.linuxos.sk/img.png"></p>""" parser = get_parser('profile') parser.parse(code) self.assertEqual(parser.get_output(), code) def test_auto_paragraphs(self): code = "Paragraph1\n\nParagraph2" self.parser.parse(code) self.assertEqual(self.parser.get_output(), "<p>Paragraph1</p>\n\n<p>Paragraph2</p>") def test_auto_paragraphs_between(self): code = "<blockquote>BQ</blockquote>Paragraph" self.parser.parse(code) self.assertEqual(self.parser.get_output(), "<blockquote>BQ</blockquote><p>Paragraph</p>") def test_attr_entity(self): code = """<p><a href="http://linuxos.sk/a&b">Test</a></p>""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), """<p><a href="http://linuxos.sk/a&b">Test</a></p>""") def test_code_class(self): code = """<pre class="code-cpp">cpp</pre>""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), """<pre class="code-cpp">cpp</pre>""") code = """<pre class="wrong">wrong</pre>""" self.parser.parse(code) self.assertEqual(self.parser.get_output(), """<pre>wrong</pre>""")
def setUp(self): self.parser = HtmlParser() self.parser.auto_paragraphs = True