def test_code_spans_347(): """ Test case 347: Interior spaces are not collapsed: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`foo bar\a baz`""".replace("\a", " ") expected_tokens = [ "[para(1,1):\n]", "[icode-span:foo bar \a\n\a \abaz:`::]", "[end-para]", ] expected_gfm = """<p><code>foo bar baz</code></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_121(): """ Test case 121: A block can also start with a closing tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """</div> *foo*""" expected_tokens = [ "[html-block(1,1)]", "[text:</div>\n*foo*:]", "[end-html-block]", ] expected_gfm = """</div> *foo*""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_cov1(): """ Test case cov1: Based on coverage analysis. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<hr/> </x-table> </x-table>""" expected_tokens = [ "[html-block(1,1)]", "[text:<hr/>\n</x-table>:]", "[end-html-block]", "[BLANK(3,1):]", "[html-block(4,1)]", "[text:</x-table>:]", "[end-html-block]", ] expected_gfm = """<hr/> </x-table> </x-table>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_153(): """ Test case 153: (part 2) The opening tag can be indented 1-3 spaces, but not 4: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """ <div> <div>""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>: ]", "[end-html-block]", "[BLANK(2,1):]", "[icode-block(3,5): :]", "[text:\a<\a<\adiv\a>\a>\a:]", "[end-icode-block]", ] expected_gfm = """ <div> <pre><code><div> </code></pre>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_155(): """ Test case 155: However, a following blank line is needed, except at the end of a document, and except for blocks of types 1–5, above: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<div> bar </div> *foo*""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>\nbar\n</div>\n*foo*:]", "[end-html-block]", ] expected_gfm = """<div> bar </div> *foo*""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_145(): """ Test case 145: (part 1) The end tag can occur on the same line as the start tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<style>p{color:red;}</style> *foo*""" expected_tokens = [ "[html-block(1,1)]", "[text:<style>p{color:red;}</style>:]", "[end-html-block]", "[para(2,1):]", "[emphasis:1:*]", "[text:foo:]", "[end-emphasis::1:*]", "[end-para]", ] expected_gfm = """<style>p{color:red;}</style> <p><em>foo</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_147(): """ Test case 147: Note that anything on the last line after the end tag will be included in the HTML block: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<script> foo </script>1. *bar*""" expected_tokens = [ "[html-block(1,1)]", "[text:<script>\nfoo\n</script>1. *bar*:]", "[end-html-block]", ] expected_gfm = """<script> foo </script>1. *bar*""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_355(): """ Test case 355: And this is code: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`<http://foo.bar.`baz>`""" expected_tokens = [ "[para(1,1):]", "[icode-span:\a<\a<\ahttp://foo.bar.:`::]", "[text:baz\a>\a>\a`:]", "[end-para]", ] expected_gfm = """<p><code><http://foo.bar.</code>baz>`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_356(): """ Test case 356: But this is an autolink: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<http://foo.bar.`baz>`""" expected_tokens = [ "[para(1,1):]", "[uri-autolink:http://foo.bar.`baz]", "[text:`:]", "[end-para]", ] expected_gfm = """<p><a href="http://foo.bar.%60baz">http://foo.bar.`baz</a>`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_353(): """ Test case 353: Code spans, HTML tags, and autolinks have the same precedence. Thus, this is code: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`<a href="`">`""" expected_tokens = [ "[para(1,1):]", '[icode-span:\a<\a<\aa href=\a"\a"\a:`::]', '[text:\a"\a"\a\a>\a>\a`:]', "[end-para]", ] expected_gfm = """<p><code><a href="</code>">`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_354(): """ Test case 354: But this is an HTML tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<a href="`">`""" expected_tokens = [ "[para(1,1):]", '[raw-html:a href="`"]', "[text:`:]", "[end-para]", ] expected_gfm = """<p><a href="`">`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_352(): """ Test case 352: And this is not parsed as a link: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """[not a `link](/foo`)""" expected_tokens = [ "[para(1,1):]", "[text:[:]", "[text:not a :]", "[icode-span:link](/foo:`::]", "[text:):]", "[end-para]", ] expected_gfm = """<p>[not a <code>link](/foo</code>)</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_351(): """ Test case 351: Code span backticks have higher precedence than any other inline constructs except HTML tags and autolinks. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """*foo`*`""" expected_tokens = [ "[para(1,1):]", "[text:*:]", "[text:foo:]", "[icode-span:*:`::]", "[end-para]", ] expected_gfm = """<p>*foo<code>*</code></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_348(): """ Test case 348: Note that backslash escapes do not work in code spans. All backslashes are treated literally: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`foo\\`bar`""" expected_tokens = [ "[para(1,1):]", "[icode-span:foo\\:`::]", "[text:bar`:]", "[end-para]", ] expected_gfm = """<p><code>foo\\</code>bar`</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_extra_005(): """ When encoding link characters, special attention is used for the % characters as the CommonMark parser treats "%<hex-char><hex-char>" as non-encodable. Make sure this is tested at the end of the link. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = "[link](http://google.com/search%)" expected_tokens = [ "[para(1,1):]", "[link:inline:http://google.com/search%25::http://google.com/search%:::link]", "[text:link:]", "[end-link::]", "[end-para]", ] expected_gfm = '<p><a href="http://google.com/search%25">link</a></p>' # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm)
def test_code_spans_359(): """ Test case 359: The following case also illustrates the need for opening and closing backtick strings to be equal in length: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`foo``bar``""" expected_tokens = [ "[para(1,1):]", "[text:`foo:]", "[icode-span:bar:``::]", "[end-para]", ] expected_gfm = """<p>`foo<code>bar</code></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_extra_004(): """ Test to make sure the wide range of characters meets the GRM/CommonMark encodings. Note that since % is followed by a 2 digit hex value, it is encoded per the common mark libraries except for the % and the 2 digit hex value following it. Another example of this is example 511: https://github.github.com/gfm/#example-511 """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = ( "[link](!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~)" ) expected_tokens = [ "[para(1,1):]", "[link:inline:!%22#$%12&'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link]", "[text:link:]", "[end-link::]", "[end-para]", ] expected_gfm = '<p><a href="!%22#$%12&\'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~">link</a></p>' # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm)
def test_emphasis_363(): """ Test case 363: Unicode nonbreaking spaces count as whitespace, too: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """*\u00A0a\u00A0*""" expected_tokens = [ "[para(1,1):]", "[text:*:]", "[text:\u00A0a\u00A0:]", "[text:*:]", "[end-para]", ] expected_gfm = """<p>*\u00A0a\u00A0*</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_146(): """ Test case 146: (part 2) The end tag can occur on the same line as the start tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<!-- foo -->*bar* *baz*""" expected_tokens = [ "[html-block(1,1)]", "[text:<!-- foo -->*bar*:]", "[end-html-block]", "[para(2,1):]", "[emphasis:1:*]", "[text:baz:]", "[end-emphasis::1:*]", "[end-para]", ] expected_gfm = """<!-- foo -->*bar* <p><em>baz</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_365(): """ Test case 365: (part 2) Intraword emphasis with * is permitted: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """5*6*78""" expected_tokens = [ "[para(1,1):]", "[text:5:]", "[emphasis:1:*]", "[text:6:]", "[end-emphasis::1:*]", "[text:78:]", "[end-para]", ] expected_gfm = """<p>5<em>6</em>78</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_120(): """ Test case 120: (part 2) Some simple examples follow. Here are some basic HTML blocks of type 6: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """ <div> *hello* <foo><a>""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>\n *hello*\n <foo><a>: ]", "[end-html-block]", ] expected_gfm = """ <div> *hello* <foo><a>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_361(): """ Test case 361: This is not emphasis, because the opening * is followed by whitespace, and hence not part of a left-flanking delimiter run: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """a * foo bar*""" expected_tokens = [ "[para(1,1):]", "[text:a :]", "[text:*:]", "[text: foo bar:]", "[text:*:]", "[end-para]", ] expected_gfm = """<p>a * foo bar*</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_154(): """ Test case 154: An HTML block of types 1–6 can interrupt a paragraph, and need not be preceded by a blank line. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """Foo <div> bar </div>""" expected_tokens = [ "[para(1,1):]", "[text:Foo:]", "[end-para]", "[html-block(2,1)]", "[text:<div>\nbar\n</div>:]", "[end-html-block]", ] expected_gfm = """<p>Foo</p> <div> bar </div>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_362(): """ Test case 362: This is not emphasis, because the opening * is preceded by an alphanumeric and followed by punctuation, and hence not part of a left-flanking delimiter run: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """a*"foo"*""" expected_tokens = [ "[para(1,1):]", "[text:a:]", "[text:*:]", '[text:\a"\a"\afoo\a"\a"\a:]', "[text:*:]", "[end-para]", ] expected_gfm = """<p>a*"foo"*</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_156(): """ Test case 156: HTML blocks of type 7 cannot interrupt a paragraph: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """Foo <a href="bar"> baz""" expected_tokens = [ "[para(1,1):\n\n]", "[text:Foo\n::\n]", '[raw-html:a href="bar"]', "[text:\nbaz::\n]", "[end-para]", ] expected_gfm = """<p>Foo <a href="bar"> baz</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_456(): """ Test case 456: (part 6) Note that when delimiters do not match evenly, Rule 11 determines that the excess literal * characters will appear outside of the emphasis, rather than inside it: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """*foo****""" expected_tokens = [ "[para(1,1):]", "[emphasis:1:*]", "[text:foo:]", "[end-emphasis::1:*]", "[text:***:]", "[end-para]", ] expected_gfm = """<p><em>foo</em>***</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_158(): """ Test case 158: (part 2) This rule differs from John Gruber’s original Markdown syntax specification """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<div> *Emphasized* text. </div>""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>\n*Emphasized* text.\n</div>:]", "[end-html-block]", ] expected_gfm = """<div> *Emphasized* text. </div>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_emphasis_447(): """ Test case 447: (part 3) Rule 11 """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """foo *_*""" expected_tokens = [ "[para(1,1):]", "[text:foo :]", "[emphasis:1:*]", "[text:_:]", "[end-emphasis::1:*]", "[end-para]", ] expected_gfm = """<p>foo <em>_</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_cov2(): """ Test case cov2: Based on coverage analysis. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """</hrx > </x-table>""" expected_tokens = [ "[para(1,1):]", "[text:\a<\a<\a/hrx:]", "[end-para]", "[block-quote(2,1):]", "[BLANK(2,2):]", "[html-block(3,1)]", "[text:</x-table>:]", "[end-html-block]", "[end-block-quote]", ] expected_gfm = """<p></hrx</p> <blockquote> </x-table> </blockquote>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_code_spans_346(): """ Test case 346: (part 2) Line endings are treated like spaces: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """`` foo ``""" expected_tokens = [ "[para(1,1):\n\n]", "[icode-span:foo :``:\a\n\a \a:\a\n\a \a]", "[end-para]", ] expected_gfm = """<p><code>foo </code></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)