def test_html_blocks_145(): """ Test case 145: (part 1) The end tag can occur on the same line as the start tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<style>p{color:red;}</style> *foo*""" expected_tokens = [ "[html-block(1,1)]", "[text:<style>p{color:red;}</style>:]", "[end-html-block]", "[para(2,1):]", "[emphasis:1:*]", "[text:foo:]", "[end-emphasis::1:*]", "[end-para]", ] expected_gfm = """<style>p{color:red;}</style> <p><em>foo</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_tabs_006(): """ Test case 006: case > is followed by a tab, which is treated as if it were expanded into three spaces. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """>\t\tfoo""" expected_tokens = [ "[block-quote(1,1):]", "[icode-block(1,3):\t\t:]", "[text:foo: ]", "[end-icode-block]", "[end-block-quote]", ] expected_gfm = """<blockquote> <pre><code> foo </code></pre> </blockquote>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_tabs_007(): """ Test case 007: none """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """-\t\tfoo""" expected_tokens = [ "[ulist(1,1):-::2:]", "[icode-block(1,3):\t\t:]", "[text:foo: ]", "[end-icode-block]", "[end-ulist]", ] expected_gfm = """<ul> <li> <pre><code> foo </code></pre> </li> </ul>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_136(): """ Test case 136: These rules are designed to allow us to work with tags that can function as either block-level or inline-level tags. The <del> tag is a nice example. We can surround content with <del> tags in three different ways. In this case, we get a raw HTML block, because the <del> tag is on a line by itself: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<del> *foo* </del>""" expected_tokens = [ "[html-block(1,1)]", "[text:<del>\n*foo*\n</del>:]", "[end-html-block]", ] expected_gfm = """<del> *foo* </del>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_142(): """ Test case 142: (part 1) If there is no matching end tag, the block will end at the end of the document (or the enclosing block quote or list item): """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<style type="text/css"> foo""" expected_tokens = [ "[html-block(1,1)]", '[text:<style\n type="text/css">:]', "[BLANK(3,1):]", "[text:foo:]", "[end-html-block]", ] expected_gfm = """<style type="text/css"> foo""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_130(): """ Test case 130: (part 2) In type 6 blocks, the initial tag need not be on a line by itself: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<table><tr><td> foo </td></tr></table>""" expected_tokens = [ "[html-block(1,1)]", "[text:<table><tr><td>\nfoo\n</td></tr></table>:]", "[end-html-block]", ] expected_gfm = """<table><tr><td> foo </td></tr></table>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_132(): """ Test case 132: To start an HTML block with a tag that is not in the list of block-level tags in (6), you must put the tag by itself on the first line (and it must be complete): """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<a href="foo"> *bar* </a>""" expected_tokens = [ "[html-block(1,1)]", '[text:<a href="foo">\n*bar*\n</a>:]', "[end-html-block]", ] expected_gfm = """<a href="foo"> *bar* </a>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_155(): """ Test case 155: However, a following blank line is needed, except at the end of a document, and except for blocks of types 1–5, above: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<div> bar </div> *foo*""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>\nbar\n</div>\n*foo*:]", "[end-html-block]", ] expected_gfm = """<div> bar </div> *foo*""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_156(): """ Test case 156: HTML blocks of type 7 cannot interrupt a paragraph: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """Foo <a href="bar"> baz""" expected_tokens = [ "[para(1,1):\n\n]", "[text:Foo\n::\n]", '[raw-html:a href="bar"]', "[text:\nbaz::\n]", "[end-para]", ] expected_gfm = """<p>Foo <a href="bar"> baz</p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_153(): """ Test case 153: (part 2) The opening tag can be indented 1-3 spaces, but not 4: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """ <div> <div>""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>: ]", "[end-html-block]", "[BLANK(2,1):]", "[icode-block(3,5): :]", "[text:\a<\a<\adiv\a>\a>\a:]", "[end-icode-block]", ] expected_gfm = """ <div> <pre><code><div> </code></pre>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_154(): """ Test case 154: An HTML block of types 1–6 can interrupt a paragraph, and need not be preceded by a blank line. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """Foo <div> bar </div>""" expected_tokens = [ "[para(1,1):]", "[text:Foo:]", "[end-para]", "[html-block(2,1)]", "[text:<div>\nbar\n</div>:]", "[end-html-block]", ] expected_gfm = """<p>Foo</p> <div> bar </div>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_120(): """ Test case 120: (part 2) Some simple examples follow. Here are some basic HTML blocks of type 6: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """ <div> *hello* <foo><a>""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>\n *hello*\n <foo><a>: ]", "[end-html-block]", ] expected_gfm = """ <div> *hello* <foo><a>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_147(): """ Test case 147: Note that anything on the last line after the end tag will be included in the HTML block: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<script> foo </script>1. *bar*""" expected_tokens = [ "[html-block(1,1)]", "[text:<script>\nfoo\n</script>1. *bar*:]", "[end-html-block]", ] expected_gfm = """<script> foo </script>1. *bar*""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_146(): """ Test case 146: (part 2) The end tag can occur on the same line as the start tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<!-- foo -->*bar* *baz*""" expected_tokens = [ "[html-block(1,1)]", "[text:<!-- foo -->*bar*:]", "[end-html-block]", "[para(2,1):]", "[emphasis:1:*]", "[text:baz:]", "[end-emphasis::1:*]", "[end-para]", ] expected_gfm = """<!-- foo -->*bar* <p><em>baz</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_127(): """ Test case 127: (part 2) A partial tag need not even be completed (garbage in, garbage out): """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<div class foo""" expected_tokens = [ "[html-block(1,1)]", "[text:<div class\nfoo:]", "[end-html-block]", ] expected_gfm = """<div class foo""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_121(): """ Test case 121: A block can also start with a closing tag: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """</div> *foo*""" expected_tokens = [ "[html-block(1,1)]", "[text:</div>\n*foo*:]", "[end-html-block]", ] expected_gfm = """</div> *foo*""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_128(): """ Test case 128: The initial tag doesn’t even need to be a valid tag, as long as it starts like one: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<div *???-&&&-<--- *foo*""" expected_tokens = [ "[html-block(1,1)]", "[text:<div *???-&&&-<---\n*foo*:]", "[end-html-block]", ] expected_gfm = """<div *???-&&&-<--- *foo*""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_158(): """ Test case 158: (part 2) This rule differs from John Gruber’s original Markdown syntax specification """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<div> *Emphasized* text. </div>""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>\n*Emphasized* text.\n</div>:]", "[end-html-block]", ] expected_gfm = """<div> *Emphasized* text. </div>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_131(): """ Test case 131: Everything until the next blank line or end of document gets included in the HTML block. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<div></div> ``` c int x = 33; ```""" expected_tokens = [ "[html-block(1,1)]", "[text:<div></div>\n``` c\nint x = 33;\n```:]", "[end-html-block]", ] expected_gfm = """<div></div> ``` c int x = 33; ```""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_cov1(): """ Test case cov1: Based on coverage analysis. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<hr/> </x-table> </x-table>""" expected_tokens = [ "[html-block(1,1)]", "[text:<hr/>\n</x-table>:]", "[end-html-block]", "[BLANK(3,1):]", "[html-block(4,1)]", "[text:</x-table>:]", "[end-html-block]", ] expected_gfm = """<hr/> </x-table> </x-table>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_135(): """ Test case 135: (part 3) In type 7 blocks, the tag name can be anything: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """</ins> *bar*""" expected_tokens = [ "[html-block(1,1)]", "[text:</ins>\n*bar*:]", "[end-html-block]", ] expected_gfm = """</ins> *bar*""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_cov2(): """ Test case cov2: Based on coverage analysis. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """</hrx > </x-table>""" expected_tokens = [ "[para(1,1):]", "[text:\a<\a<\a/hrx:]", "[end-para]", "[block-quote(2,1):]", "[BLANK(2,2):]", "[html-block(3,1)]", "[text:</x-table>:]", "[end-html-block]", "[end-block-quote]", ] expected_gfm = """<p></hrx</p> <blockquote> </x-table> </blockquote>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_138(): """ Test case 138: Finally, in this case, the <del> tags are interpreted as raw HTML inside the CommonMark paragraph. (Because the tag is not on a line by itself, we get inline HTML rather than an HTML block.) """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<del>*foo*</del>""" expected_tokens = [ "[para(1,1):]", "[raw-html:del]", "[emphasis:1:*]", "[text:foo:]", "[end-emphasis::1:*]", "[raw-html:/del]", "[end-para]", ] expected_gfm = """<p><del><em>foo</em></del></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_cov4(): """ Test case cov4: Based on coverage analysis. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """< bad> </x-table>""" expected_tokens = [ "[para(1,1):\n\n]", "[text:\a<\a<\a\nbad\a>\a>\a\n::\n\n]", "[raw-html:/x-table]", "[end-para]", ] expected_gfm = """<p>< bad> </x-table></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_tabs_003(): """ Test case 003: (part c) a tab can be used instead of four spaces in an indented code block. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """ a\ta ὐ\ta""" expected_tokens = [ "[icode-block(1,5): :\n ]", "[text:a\ta\nὐ\ta:]", "[end-icode-block]", ] expected_gfm = """<pre><code>a\ta ὐ\ta </code></pre>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_124(): """ Test case 124: (part 2) The tag on the first line can be partial, as long as it is split where there would be whitespace: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<div id="foo" class="bar baz"> </div>""" expected_tokens = [ "[html-block(1,1)]", '[text:<div id="foo" class="bar\n baz">\n</div>:]', "[end-html-block]", ] expected_gfm = """<div id="foo" class="bar baz"> </div>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_tabs_006c(): """ Test case 006c: modified version of 006 with spaces leading in instead of tab """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """> foo""" expected_tokens = [ "[block-quote(1,1):]", "[icode-block(1,7): :]", "[text:foo: ]", "[end-icode-block]", "[end-block-quote]", ] expected_gfm = """<blockquote> <pre><code> foo </code></pre> </blockquote>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_html_blocks_125(): """ Test case 125: An open tag need not be closed: """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """<div> *foo* *bar*""" expected_tokens = [ "[html-block(1,1)]", "[text:<div>\n*foo*:]", "[end-html-block]", "[BLANK(3,1):]", "[para(4,1):]", "[emphasis:1:*]", "[text:bar:]", "[end-emphasis::1:*]", "[end-para]", ] expected_gfm = """<div> *foo* <p><em>bar</em></p>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_tabs_002(): """ Test case 002: (part b) a tab can be used instead of four spaces in an indented code block. """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = """ \tfoo\tbaz\t\tbim""" expected_tokens = [ "[icode-block(1,4): \t:]", "[text:foo\tbaz\t\tbim:]", "[end-icode-block]", ] expected_gfm = """<pre><code>foo\tbaz\t\tbim </code></pre>""" # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm) assert_token_consistency(source_markdown, actual_tokens)
def test_extra_004(): """ Test to make sure the wide range of characters meets the GRM/CommonMark encodings. Note that since % is followed by a 2 digit hex value, it is encoded per the common mark libraries except for the % and the 2 digit hex value following it. Another example of this is example 511: https://github.github.com/gfm/#example-511 """ # Arrange tokenizer = TokenizedMarkdown() transformer = TransformToGfm() source_markdown = ( "[link](!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~)" ) expected_tokens = [ "[para(1,1):]", "[link:inline:!%22#$%12&'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~::!\"#$%12&'\\(\\)*+,-./0123456789:;<=>?@A-Z[\\\\]^_`a-z{|}~:::link]", "[text:link:]", "[end-link::]", "[end-para]", ] expected_gfm = '<p><a href="!%22#$%12&\'()*+,-./0123456789:;%3C=%3E?@A-Z%5B%5C%5D%5E_%60a-z%7B%7C%7D~">link</a></p>' # Act actual_tokens = tokenizer.transform(source_markdown) actual_gfm = transformer.transform(actual_tokens) # Assert assert_if_lists_different(expected_tokens, actual_tokens) assert_if_strings_different(expected_gfm, actual_gfm)