Beispiel #1
0
def get_parser(parser, fmt='html'):
	if fmt == 'raw':
		return RawParser()
	elif fmt == 'text':
		return TextParser()
	else:
		if parser == 'signature':
			parser_instance = HtmlParser(supported_tags=ONELINE_TAGS_LIST)
			parser_instance.auto_paragraphs = False
			return parser_instance
		elif parser == 'profile':
			parser_instance = HtmlParser(supported_tags=FULL_TAGS_LIST)
			parser_instance.add_nofollow = False
			return parser_instance
		elif parser == 'blog' or parser == 'full':
			parser_instance = HtmlParser(supported_tags=FULL_TAGS_LIST)
			parser_instance.auto_paragraphs = False
			return parser_instance
		elif parser == 'news_short':
			parser_instance = HtmlParser(supported_tags=DEFAULT_TAG_LIST)
			parser_instance.add_nofollow = False
			return parser_instance
		elif parser == 'news_long':
			parser_instance = HtmlParser(supported_tags=FULL_TAGS_LIST)
			parser_instance.add_nofollow = False
			return parser_instance
		else:
			return HtmlParser()
Beispiel #2
0
def get_parser(editor_type):
	if editor_type == 'signature':
		supported_tags = copy(ONELINE_TAGS)
		supported_tags['a'] = deepcopy(supported_tags['a'])
		supported_tags['a'].req_attributes['rel'] = 'nofollow'
		supported_tags['a'].attribute_validators = {'rel': [NofollowValidator()]}
		parser = HtmlParser(supported_tags = supported_tags)
		parser.auto_paragraphs = False
		return parser
	elif editor_type == 'profile':
		supported_tags = copy(DEFAULT_TAGS)
		supported_tags['img'] = HtmlTag('img', opt_attributes=['title'], req_attributes={'src': '', 'alt': ''}, empty=True, attribute_validators = {'src': [HrefValidator()]})
		supported_tags[''] = deepcopy(supported_tags[''])
		supported_tags[''].opt.add('img')
		return HtmlParser(supported_tags = supported_tags)
	elif editor_type == 'blog':
		supported_tags = copy(FULL_TAGS)
		supported_tags['a'] = deepcopy(supported_tags['a'])
		supported_tags['a'].req_attributes['rel'] = 'nofollow'
		supported_tags['a'].attribute_validators = {'rel': [NofollowValidator()]}
		parser = HtmlParser(supported_tags=supported_tags)
		parser.auto_paragraphs = False
		return parser
	else:
		return HtmlParser()
Beispiel #3
0
def get_parser(parser, fmt='html'): #pylint: disable=too-many-return-statements
	if fmt == 'raw':
		return RawParser()
	elif fmt == 'text':
		return TextParser()
	else:
		if parser == 'signature':
			supported_tags = copy(ONELINE_TAGS)
			supported_tags['a'] = deepcopy(supported_tags['a'])
			supported_tags['a'].req_attributes['rel'] = 'nofollow'
			supported_tags['a'].attribute_validators = {'rel': [NofollowValidator()]}
			parser_instance = HtmlParser(supported_tags=supported_tags)
			parser_instance.auto_paragraphs = False
			return parser_instance
		elif parser == 'profile':
			supported_tags = copy(DEFAULT_TAGS)
			supported_tags['img'] = HtmlTag('img', opt_attributes=['title'], req_attributes={'src': '', 'alt': ''}, empty=True, attribute_validators={'src': [HrefValidator()]})
			supported_tags[''] = deepcopy(supported_tags[''])
			supported_tags[''].opt.add('img')
			return HtmlParser(supported_tags=supported_tags)
		elif parser == 'blog' or parser == 'full':
			supported_tags = copy(FULL_TAGS)
			supported_tags['a'] = deepcopy(supported_tags['a'])
			supported_tags['a'].req_attributes['rel'] = 'nofollow'
			supported_tags['a'].attribute_validators = {'rel': [NofollowValidator()]}
			parser_instance = HtmlParser(supported_tags=supported_tags)
			parser_instance.auto_paragraphs = False
			return parser_instance
		elif parser == 'news_short':
			return HtmlParser()
		elif parser == 'news_long':
			supported_tags = copy(FULL_TAGS)
			parser_instance = HtmlParser(supported_tags=supported_tags)
			return parser_instance
		else:
			return HtmlParser()
Beispiel #4
0
	def setUp(self):
		self.parser = HtmlParser()
		self.parser.auto_paragraphs = True
		self.parser.add_nofollow = False
Beispiel #5
0
class ParserTest(TestCase):
	def setUp(self):
		self.parser = HtmlParser()
		self.parser.auto_paragraphs = True
		self.parser.add_nofollow = False

	def test_auto_paragraph(self):
		code = """Test"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), "<p>" + code + "</p>")

	def test_valid_html(self):
		code = "<p><strong>Test</strong></p>\n<pre>Tadaaa\n\n</pre><p>Text</p>"
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), "<p><strong>Test</strong></p>\n<pre>Tadaaa\n\n</pre><p>Text</p>")

	def test_opened_tag(self):
		code = """<p>Test"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), code + "</p>")

	def test_opened_nested_tag(self):
		code = """<p><strong>Test</p>"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), code[:-4] + "</strong></p>")

	def test_unknown_tag(self):
		code = """<xxx>Test</xxx>"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), '<p>' + code.replace('<', '&lt;').replace('>', '&gt;') + '</p>')

	def test_attribute(self):
		code = """<p><a href="#test">Test</a></p>"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), code)

	def test_missing_attribute(self):
		code = """<p><a>Test</a></p>"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), """<p><a href="#">Test</a></p>""")

	def test_malicious_href(self):
		code = """<p><a href="javascript:alert('XSS')">Test</a></p>"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), """<p><a href="#">Test</a></p>""")

	def test_signature_nofollow(self):
		code = """<a href="http://www.linuxos.sk">Test</a>"""
		parser = get_parser('signature')
		parser.parse(code)
		self.assertEqual(parser.get_output(), """<a href="http://www.linuxos.sk" rel="nofollow">Test</a>""")

	def test_signature_bad_nofollow(self):
		code = """<a href="http://www.linuxos.sk" rel="follow">Test</a>"""
		parser = get_parser('signature')
		parser.parse(code)
		self.assertEqual(parser.get_output(), """<a href="http://www.linuxos.sk" rel="nofollow">Test</a>""")

	def test_profile_parser(self):
		code = """<p><img alt="" src="http://www.linuxos.sk/img.png"></p>"""
		parser = get_parser('profile')
		parser.parse(code)
		self.assertEqual(parser.get_output(), code)

	def test_auto_paragraphs(self):
		code = "Paragraph1\n\nParagraph2"
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), "<p>Paragraph1</p>\n\n<p>Paragraph2</p>")

	def test_auto_paragraphs_between(self):
		code = "<blockquote>BQ</blockquote>Paragraph"
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), "<blockquote>BQ</blockquote><p>Paragraph</p>")

	def test_attr_entity(self):
		code = """<p><a href="http://linuxos.sk/a&amp;b">Test</a></p>"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), """<p><a href="http://linuxos.sk/a&amp;b">Test</a></p>""")

	def test_code_class(self):
		code = """<pre class="code-cpp">cpp</pre>"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), """<pre class="code-cpp">cpp</pre>""")
		code = """<pre class="wrong">wrong</pre>"""
		self.parser.parse(code)
		self.assertEqual(self.parser.get_output(), """<pre>wrong</pre>""")
Beispiel #6
0
	def setUp(self):
		self.parser = HtmlParser()
		self.parser.auto_paragraphs = True