def test_string_markdown_link(self): p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <http://localhost>')) self.assertEqual( p.markdown().strip(), u2s(u'<p>A link <a href="http://localhost">http://localhost</a></p>' ))
def test_string_stext(self): p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'A string with [email protected] *embedded* \u00df')) self.assertEqual( p.stext(), u2s(u'A string with <a href="mailto:[email protected]">[email protected]</a> *embedded* \u00df' ))
def test_string_markdown_javascript_link(self): # make sure we don't get a "javascript:" link p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'<javascript:alert(1)>')) self.assertTrue(p.markdown().find('href="javascript:') == -1) p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'[link](javascript:alert(1))')) self.assertTrue(p.markdown().find('href="javascript:') == -1)
def test_string_markdown(self): p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'A string http://localhost with [email protected] <br> *embedded* \u00df' )) self.assertEqual( p.markdown(), u2s(u'A string <a href="http://localhost" rel="nofollow noopener">http://localhost</a> with <a href="mailto:[email protected]">[email protected]</a> <br> *embedded* \u00df' ))
def test_string_markdown_code_block(self): ''' also verify that embedded html is escaped ''' p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'embedded code block <pre>\n\n```\nline 1\nline 2\n```\n\nnew </pre> paragraph' )) self.assertEqual( p.markdown().strip().replace('\n\n', '\n'), u2s(u'<p>embedded code block <pre></p>\n<pre><code>line 1\nline 2\n</code></pre>\n<p>new </pre> paragraph</p>' ))
def test_string_markdown_data_link(self): # make sure we don't get a "data:" link p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'<data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==>')) print(p.markdown()) self.assertTrue(p.markdown().find('href="data:') == -1) p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'[data link](data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==)')) print(p.markdown()) self.assertTrue(p.markdown().find('href="data:') == -1)
def test_string_markdown_link(self): # markdown2 and markdown escape the email address try: from html import unescape as html_unescape except ImportError: from HTMLParser import HTMLParser html_unescape = HTMLParser().unescape p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <*****@*****.**>')) m = html_unescape(p.markdown().strip()) m = self.mangleMarkdown2(m) self.assertEqual( m, u2s(u'<p>A link <a href="mailto:[email protected]">[email protected]</a></p>' ))
def html2text(html): soup = BeautifulSoup(html) # kill all script and style elements for script in soup(["script", "style"]): script.extract() return u2s(soup.get_text('\n', strip=True))
def test_markdown_break_on_newline(self): self.client.db.config['MARKDOWN_BREAK_ON_NEWLINE'] = True p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A string with\nline break\ntwice.')) m = p.markdown() self.assertEqual(2, m.count('<br')) self.client.db.config['MARKDOWN_BREAK_ON_NEWLINE'] = False m = p.markdown() self.assertEqual(0, m.count('<br'))
def test_markdown_return_text_on_exception(self): ''' string is invalid markdown. missing end of fenced code block ''' p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'embedded code block <pre>\n\n``` python\nline 1\nline 2\n\n\nnew </pre> paragraph' )) m = p.markdown().strip() print(m) self.assertEqual( m.replace('\n\n', '\n'), '<p>embedded code block <pre></p>\n<p>``` python\nline 1\nline 2</p>\n<p>new </pre> paragraph</p>' )
def test_string_markdown_forced_line_break(self): p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'This is a set of text \n:that should have a break \n:at newlines. Each \n:colon should be the start of an html line' )) # sigh different backends render this differently: # of text <br /> # of text<br> # etc. # Rather than using a different result for each # renderer, look for '<br' and require three of them. m = p.markdown() print(m) self.assertEqual(3, m.count('<br'))
def add_text(self, identifier, text, mime_type='text/plain'): """ "identifier" is (classname, itemid, property) """ if mime_type != 'text/plain': return # Ensure all elements of the identifier are strings 'cos the itemid # column is varchar even if item ids may be numbers elsewhere in the # code. ugh. identifier = tuple(map(str, identifier)) # first, find the id of the (classname, itemid, property) a = self.db.arg sql = 'select _textid from __textids where _class=%s and '\ '_itemid=%s and _prop=%s'%(a, a, a) self.db.cursor.execute(sql, identifier) r = self.db.cursor.fetchone() if not r: # not previously indexed id = self.db.newid('__textids') sql = 'insert into __textids (_textid, _class, _itemid, _prop)'\ ' values (%s, %s, %s, %s)'%(a, a, a, a) self.db.cursor.execute(sql, (id, ) + identifier) else: id = int(r[0]) # clear out any existing indexed values sql = 'delete from __words where _textid=%s'%a self.db.cursor.execute(sql, (id, )) # ok, find all the unique words in the text text = us2u(text, "replace") text = text.upper() wordlist = [u2s(w) for w in re.findall(r'(?u)\b\w{%d,%d}\b' % (self.minlength, self.maxlength), text)] words = set() for word in wordlist: if self.is_stopword(word): continue words.add(word) # for each word, add an entry in the db sql = 'insert into __words (_word, _textid) values (%s, %s)'%(a, a) words = [(word, id) for word in words] self.db.cursor.executemany(sql, words)
def test_string_markdown_code_block_attribute(self): ''' also verify that embedded html is escaped ''' p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'embedded code block <pre>\n\n``` python\nline 1\nline 2\n```\n\nnew </pre> paragraph' )) m = p.markdown().strip() print(m) if type(self) == MistuneTestCase: self.assertEqual( m.replace('\n\n', '\n'), '<p>embedded code block <pre></p>\n<pre><code class="lang-python">line 1\nline 2\n</code></pre>\n<p>new </pre> paragraph</p>' ) elif type(self) == MarkdownTestCase: self.assertEqual( m.replace('\n\n', '\n'), '<p>embedded code block <pre></p>\n<pre><code class="language-python">line 1\nline 2\n</code></pre>\n<p>new </pre> paragraph</p>' ) else: self.assertEqual( m.replace('\n\n', '\n'), '<p>embedded code block <pre></p>\n<div class="codehilite"><pre><span></span><code><span class="n">line</span> <span class="mi">1</span>\n<span class="n">line</span> <span class="mi">2</span>\n</code></pre></div>\n<p>new </pre> paragraph</p>' )
def test_string_rst(self): p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'A string with [email protected] *embedded* \u00df')) # test case to make sure include directive is disabled q = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'\n\n.. include:: XyZrMt.html\n\n<badtag>\n\n')) q_result = u'''<div class="document"> <div class="system-message"> <p class="system-message-title">System Message: WARNING/2 (<tt class="docutils"><string></tt>, line 3)</p> <p>"include" directive disabled.</p> <pre class="literal-block"> .. include:: XyZrMt.html </pre> </div> <p><badtag></p> </div> ''' # test case to make sure raw directive is disabled r = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'\n\n.. raw:: html\n\n <badtag>\n\n')) r_result = '''<div class="document"> <div class="system-message"> <p class="system-message-title">System Message: WARNING/2 (<tt class="docutils"><string></tt>, line 3)</p> <p>"raw" directive disabled.</p> <pre class="literal-block"> .. raw:: html <badtag> </pre> </div> </div> ''' # test case to make sure javascript and data url's aren't turned # into links s = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'<badtag>\njavascript:badcode data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==' )) s_result = '<div class="document">\n<p><badtag>\njavascript:badcode data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==</p>\n</div>\n' # test url recognition t = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'link is https://example.com/link for testing.')) t_result = '<div class="document">\n<p>link is <a class="reference external" href="https://example.com/link">https://example.com/link</a> for testing.</p>\n</div>\n' # test text that doesn't need to be processed u = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'Just a plain old string here. Nothig to process.')) u_result = '<div class="document">\n<p>Just a plain old string here. Nothig to process.</p>\n</div>\n' self.assertEqual( p.rst(), u2s(u'<div class="document">\n<p>A string with <a class="reference external" href="mailto:cmeerw@example.com">cmeerw@example.com</a> <em>embedded</em> \u00df</p>\n</div>\n' )) self.assertEqual(q.rst(), u2s(q_result)) self.assertEqual(r.rst(), u2s(r_result)) self.assertEqual(s.rst(), u2s(s_result)) self.assertEqual(t.rst(), u2s(t_result)) self.assertEqual(u.rst(), u2s(u_result))
def test_string_markdown_link_item(self): """ The link formats for the different markdown engines changes. Order of attributes, value for rel (noopener, nofollow etc) is different. So most tests check for a substring that indicates success rather than the entire returned string. """ p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An issue1 link')) self.assertIn(u2s(u'href="issue1"'), p.markdown().strip()) # just verify that plain linking is working self.assertIn(u2s(u'href="issue1"'), p.plain(hyperlink=1)) p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An [issue1](issue1) link')) self.assertIn(u2s(u'href="issue1"'), p.markdown().strip()) # just verify that plain linking is working self.assertIn(u2s(u'href="issue1"'), p.plain(hyperlink=1)) p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'An [issue1](https://example.com/issue1) link')) self.assertIn(u2s(u'href="https://example.com/issue1"'), p.markdown().strip()) p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'An [issue1] (https://example.com/issue1) link')) self.assertIn(u2s(u'href="issue1"'), p.markdown().strip()) if type(self) == MistuneTestCase: # mistune makes the https url into a real link self.assertIn(u2s(u'href="https://example.com/issue1"'), p.markdown().strip()) else: # the other two engines leave the parenthesized url as is. self.assertIn(u2s(u' (https://example.com/issue1) link'), p.markdown().strip())
def ngettext(self, singular, plural, number): singular = us2u(singular) plural = us2u(plural) msgtrans = self.ungettext(singular, plural, number) return u2s(msgtrans)
def gettext(self, msgid): msgid = us2u(msgid) msgtrans = self.ugettext(msgid) return u2s(msgtrans)
def test_string_markdown(self): p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A string with <br> *embedded* \u00df')) self.assertEqual( p.markdown().strip(), u2s(u'<p>A string with <br> <em>embedded</em> \u00df</p>'))
def test_markdown_hyperlinked_url(self): # classic markdown does not emit a \n at end of rendered string # so rstrip \n. p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'http://example.com/')) m = p.markdown(hyperlink=1) m = self.mangleMarkdown2(m) print(m) self.assertEqual( m.rstrip('\n'), '<p><a href="http://example.com/" rel="nofollow noopener">http://example.com/</a></p>' ) p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'<http://example.com/>')) m = p.markdown(hyperlink=1) m = self.mangleMarkdown2(m) self.assertEqual( m.rstrip('\n'), '<p><a href="http://example.com/" rel="nofollow noopener">http://example.com/</a></p>' ) p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'[label](http://example.com/ "a title")')) m = p.markdown(hyperlink=1) m = self.mangleMarkdown2(m) self.assertEqual( m.rstrip('\n'), '<p><a href="http://example.com/" rel="nofollow noopener" title="a title">label</a></p>' ) p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'[label](http://example.com/).')) m = p.markdown(hyperlink=1) m = self.mangleMarkdown2(m) self.assertEqual( m.rstrip('\n'), '<p><a href="http://example.com/" rel="nofollow noopener">label</a>.</p>' ) p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'![](http://example.com/)')) m = p.markdown(hyperlink=1) m = self.mangleMarkdown2(m) self.assertIn( m, [ '<p><img src="http://example.com/" alt=""/></p>\n', '<p><img src="http://example.com/" alt="" /></p>\n', '<p><img src="http://example.com/" alt=""></p>\n', '<p><img alt="" src="http://example.com/" /></p>', # markdown ]) p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An URL http://example.com/ with text')) m = p.markdown(hyperlink=1) m = self.mangleMarkdown2(m) self.assertEqual( m.rstrip('\n'), '<p>An URL <a href="http://example.com/" rel="nofollow noopener">http://example.com/</a> with text</p>' ) p = StringHTMLProperty( self.client, 'test', '1', None, 'test', u2s(u'An URL https://example.com/path with text')) m = p.markdown(hyperlink=1) m = self.mangleMarkdown2(m) self.assertEqual( m.rstrip('\n'), '<p>An URL <a href="https://example.com/path" rel="nofollow noopener">https://example.com/path</a> with text</p>' )