def test_parse_html_entities(self): e = '<script></script>' p = '<p>Test %s</p>' % e self.assertEqual(test_pr.parse_html(p), p) a = '<p>http://google.com %s</p>' % e self.assertEqual( test_pr.parse_html(a), '<p><a href="http://google.com">http://google.com</a>' ' %s</p>' % e) h = ('<p><a href="http://foo.com">http://foo.com</a> http://bar.com ' '<span>http://baz.com <script> ' '<b>http://nug.com <i>X <foo></i></b></span></p>') self.assertEqual(test_pr.parse_html(h), ( '<p><a href="http://foo.com">http://foo.com</a> ' '<a href="http://bar.com">http://bar.com</a> ' '<span><a href="http://baz.com">http://baz.com</a> <script> ' '<b><a href="http://nug.com">http://nug.com</a> ' '<i>X <foo></i></b></span></p>')) h = ('<p><a href="http://foo.com">http://foo.com</a> http://bar.com ' '<script> http://baz.com </script>\n' 'http://baze.com\n<foo></p>') self.assertEqual( test_pr.parse_html(h), ('<p><a href="http://foo.com">http://foo.com</a> ' '<a href="http://bar.com">http://bar.com</a> <script> ' '<a href="http://baz.com">http://baz.com</a> </script>\n' '<a href="http://baze.com">http://baze.com</a>\n' '<foo></p>'))
def test_parse_text(self): for url, expected in self.inline_pairs.items(): parsed = test_pr.parse_text('this is inline: %s' % url) self.assertHTMLEqual(parsed, 'this is inline: %s' % expected) # if the link comes on its own line it gets included in full for url, expected in self.full_pairs.items(): parsed = test_pr.parse_text(url) self.assertHTMLEqual(parsed, expected) # links inside block tags will render as inline frame = '<p>Testing %s</p>' for url, expected in self.inline_pairs.items(): parsed = test_pr.parse_html(frame % (url)) self.assertHTMLEqual(parsed, frame % (expected)) # links inside <a> tags won't change at all frame = '<p><a href="%s">%s</a></p>' for url, expected in self.inline_pairs.items(): parsed = test_pr.parse_html(frame % (url, url)) self.assertHTMLEqual(parsed, frame % (url, url)) # links within tags within a tags are fine too frame = '<p><a href="%s"><span>%s</span></a></p>' for url, expected in self.inline_pairs.items(): parsed = test_pr.parse_html(frame % (url, url)) self.assertHTMLEqual(parsed, frame % (url, url))
def test_multiline(self): for url, expected in self.full_pairs.items(): expected_inline = self.inline_pairs[url] frame = 'this is inline: %s\n%s\nand yet another %s' test_str = frame % (url, url, url) parsed = test_pr.parse_text(test_str) self.assertHTMLEqual( parsed, frame % (expected_inline, expected, expected_inline)) # On multi-line text, if we specify block_handler=None, only standalone # links will be handled. for url, expected in self.full_pairs.items(): frame = 'this is inline: %s\n%s\nand yet another %s' test_str = frame % (url, url, url) parsed = test_pr.parse_text(test_str, block_handler=None) self.assertHTMLEqual(parsed, frame % (url, expected, url)) for url, expected in self.full_pairs.items(): expected_inline = self.inline_pairs[url] frame = '%s\nthis is inline: %s\n%s' test_str = frame % (url, url, url) parsed = test_pr.parse_text(test_str) self.assertHTMLEqual(parsed, frame % (expected, expected_inline, expected)) # test mixing multiline with p tags for url, expected in self.full_pairs.items(): expected_inline = self.inline_pairs[url] frame = '<p>%s</p>\n<p>this is inline: %s</p>\n<p>\n%s\n</p><p>last test\n%s\n</p>' test_str = frame % (url, url, url, url) parsed = test_pr.parse_html(test_str) self.assertHTMLEqual( parsed, frame % (expected, expected_inline, expected, expected_inline)) for url, expected in self.full_pairs.items(): expected_inline = self.inline_pairs[url] frame = '<p><a href="#foo">%s</a></p>\n<p>this is inline: %s</p>\n<p>last test\n%s\n</p>' test_str = frame % (url, url, url) parsed = test_pr.parse_html(test_str) self.assertHTMLEqual( parsed, frame % (url, expected_inline, expected_inline))
def test_urlize(self): blank = 'http://fapp.io/foo/' blank_e = '<a href="http://fapp.io/foo/">http://fapp.io/foo/</a>' for url, expected in self.full_pairs.items(): expected_inline = self.inline_pairs[url] frame = 'test %s\n%s\n%s\nand finally %s' test_str = frame % (url, blank, url, blank) parsed = test_pr.parse_text(test_str) self.assertHTMLEqual( parsed, frame % (expected_inline, blank_e, expected, blank_e)) parsed = test_pr.parse_text(test_str, urlize_all=False) self.assertHTMLEqual( parsed, frame % (expected_inline, blank, expected, blank)) parsed = test_pr.parse_text_full(test_str) self.assertHTMLEqual( parsed, frame % (expected, blank_e, expected, blank_e)) parsed = test_pr.parse_text_full(test_str, urlize_all=False) self.assertHTMLEqual(parsed, frame % (expected, blank, expected, blank)) parsed = test_pr.parse_html(test_str) self.assertHTMLEqual( parsed, frame % (expected_inline, blank_e, expected_inline, blank_e)) parsed = test_pr.parse_html(test_str, urlize_all=False) self.assertHTMLEqual( parsed, frame % (expected_inline, blank, expected_inline, blank)) frame = '<p>test %s</p>\n<a href="foo">%s</a>\n<a href="foo2">%s</a>\n<p>and finally %s</p>' test_str = frame % (url, blank, url, blank) parsed = test_pr.parse_html(test_str) self.assertHTMLEqual( parsed, frame % (expected_inline, blank, url, blank_e)) parsed = test_pr.parse_html(test_str, urlize_all=False) self.assertHTMLEqual(parsed, frame % (expected_inline, blank, url, blank))
def test_multiline(self): for url, expected in self.full_pairs.items(): expected_inline = self.inline_pairs[url] frame = 'this is inline: %s\n%s\nand yet another %s' test_str = frame % (url, url, url) parsed = test_pr.parse_text(test_str) self.assertHTMLEqual( parsed, frame % (expected_inline, expected, expected_inline)) for url, expected in self.full_pairs.items(): expected_inline = self.inline_pairs[url] frame = '%s\nthis is inline: %s\n%s' test_str = frame % (url, url, url) parsed = test_pr.parse_text(test_str) self.assertHTMLEqual(parsed, frame % (expected, expected_inline, expected)) # test mixing multiline with p tags for url, expected in self.full_pairs.items(): expected_inline = self.inline_pairs[url] frame = '<p>%s</p>\n<p>this is inline: %s</p>\n<p>\n%s\n</p><p>last test\n%s\n</p>' test_str = frame % (url, url, url, url) parsed = test_pr.parse_html(test_str) self.assertHTMLEqual( parsed, frame % (expected, expected_inline, expected, expected_inline)) for url, expected in self.full_pairs.items(): expected_inline = self.inline_pairs[url] frame = '<p><a href="#foo">%s</a></p>\n<p>this is inline: %s</p>\n<p>last test\n%s\n</p>' test_str = frame % (url, url, url) parsed = test_pr.parse_html(test_str) self.assertHTMLEqual( parsed, frame % (url, expected_inline, expected_inline))
def test_parse_text_full(self): for url, expected in self.full_pairs.items(): parsed = test_pr.parse_text_full(url) self.assertHTMLEqual(parsed, expected) # the parse_text_full will replace even inline content for url, expected in self.full_pairs.items(): parsed = test_pr.parse_text_full('this is inline: %s' % url) self.assertHTMLEqual(parsed, 'this is inline: %s' % expected) for url, expected in self.full_pairs.items(): parsed = test_pr.parse_html('<p>%s</p>' % url) self.assertHTMLEqual(parsed, '<p>%s</p>' % expected)
def test_parse_text(self): for url, expected in self.inline_pairs.items(): parsed = test_pr.parse_text('this is inline: %s' % url) self.assertHTMLEqual(parsed, 'this is inline: %s' % expected) # We can disable parsing inline links by specifying block_handler=None. for url, expected in self.inline_pairs.items(): parsed = test_pr.parse_text('this is inline: %s' % url, block_handler=None) self.assertEqual(parsed, 'this is inline: %s' % url) # if the link comes on its own line it gets included in full for url, expected in self.full_pairs.items(): parsed = test_pr.parse_text(url) self.assertHTMLEqual(parsed, expected) # Specifying block_handler=None only applies to inline links, so # the behavior is the same for standalone links. parsed = test_pr.parse_text(url, block_handler=None) self.assertHTMLEqual(parsed, expected) # links inside block tags will render as inline frame = '<p>Testing %s</p>' for url, expected in self.inline_pairs.items(): parsed = test_pr.parse_html(frame % (url)) self.assertHTMLEqual(parsed, frame % (expected)) # links inside <a> tags won't change at all frame = '<p><a href="%s">%s</a></p>' for url, expected in self.inline_pairs.items(): parsed = test_pr.parse_html(frame % (url, url)) self.assertHTMLEqual(parsed, frame % (url, url)) # links within tags within a tags are fine too frame = '<p><a href="%s"><span>%s</span></a></p>' for url, expected in self.inline_pairs.items(): parsed = test_pr.parse_html(frame % (url, url)) self.assertHTMLEqual(parsed, frame % (url, url))
def test_urlize_params(self): text = 'test http://foo.com/' urlize_params = {'target': '_blank', 'rel': 'nofollow'} exp = ('test <a href="http://foo.com/" rel="nofollow" target="_blank">' 'http://foo.com/</a>') result = test_pr.parse_text(text, urlize_params=urlize_params) self.assertEqual(result, exp) result = test_pr.parse_text_full(text, urlize_params=urlize_params) self.assertEqual(result, exp) result = test_pr.parse_html(text, urlize_params=urlize_params) self.assertEqual(result, exp)
def test_html_entities(self): frame_html = '<p>test %s</p><p><a href="foo">%s</a></p>' for url, expected in self.data_pairs.items(): esc_url = url.replace('&', '&') html = frame_html % (esc_url, esc_url) all_urls, extracted = test_pr.extract_html(html) self.assertEqual(all_urls, [url]) if 'url' not in expected: expected['url'] = url if 'title' not in expected: expected['title'] = expected['url'] self.assertEqual(extracted, {url: expected}) rendered = test_pr.parse_html('<p>%s</p>' % esc_url) self.assertHTMLEqual(rendered, '<p>%s</p>' % self.full_pairs[url])
def test_outside_of_markup(self): frame = '%s<p>testing</p>' for url, expected in self.full_pairs.items(): parsed = test_pr.parse_html(frame % (url)) self.assertHTMLEqual(parsed, frame % (expected))