Example #1
0
def test_character_references_323():
    """
    Test case 323:  Hexadecimal numeric character references consist of &# + either X or x + a string of 1-6 hexadecimal digits + ;. They too are parsed as the corresponding Unicode character (this time specified with a hexadecimal numeral instead of decimal).
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """" ആ ಫ"""
    expected_tokens = [
        "[para(1,1):]",
        '[text:\a"\a\a"\a"\a\a \aആ\aആ\a \aಫ\aಫ\a:]',
        "[end-para]",
    ]
    expected_gfm = """<p>&quot; ആ ಫ</p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
Example #2
0
def test_character_references_322():
    """
    Test case 322:  Decimal numeric character references consist of &# + a string of 1–7 arabic digits + ;. A numeric character reference is parsed as the corresponding Unicode character. Invalid Unicode code points will be replaced by the REPLACEMENT CHARACTER (U+FFFD). For security reasons, the code point U+0000 will also be replaced by U+FFFD.
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """&#35; &#1234; &#992; &#0;"""
    expected_tokens = [
        "[para(1,1):]",
        "[text:\a&#35;\a#\a \a&#1234;\aӒ\a \a&#992;\aϠ\a \a&#0;\a�\a:]",
        "[end-para]",
    ]
    expected_gfm = """<p># Ӓ Ϡ �</p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
Example #3
0
def test_character_references_336():
    """
    Test case 336:  (part 4) Entity and numeric character references cannot be used in place of symbols indicating structure in CommonMark documents.
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """&#9;foo"""
    expected_tokens = [
        "[para(1,1):]",
        "[text:\a&#9;\a\t\afoo:]",
        "[end-para]",
    ]
    expected_gfm = """<p>\tfoo</p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_backslash_escapes_308():
    """
    Test case 308:  Any ASCII punctuation character may be backslash-escaped:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\`\\{\\|\\}\\~"""
    expected_tokens = [
        "[para(1,1):]",
        "[text:\\\b!\\\b\a\"\a&quot;\a\\\b#\\\b$\\\b%\\\b\a&\a&amp;\a\\\b'\\\b(\\\b)\\\b*\\\b+\\\b,\\\b-\\\b.\\\b/\\\b:\\\b;\\\b\a<\a&lt;\a\\\b=\\\b\a>\a&gt;\a\\\b?\\\b@\\\b[\\\b\\\\\b]\\\b^\\\b_\\\b`\\\b{\\\b|\\\b}\\\b~:]",
        "[end-para]",
    ]
    expected_gfm = """<p>!&quot;#$%&amp;'()*+,-./:;&lt;=&gt;?@[\\]^_`{|}~</p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
Example #5
0
def test_character_references_331():
    """
    Test case 331:  (part 1) Entity and numeric character references are treated as literal text in code spans and code blocks:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """`f&ouml;&ouml;`"""
    expected_tokens = [
        "[para(1,1):]",
        "[icode-span:f\a&\a&amp;\aouml;\a&\a&amp;\aouml;:`::]",
        "[end-para]",
    ]
    expected_gfm = """<p><code>f&amp;ouml;&amp;ouml;</code></p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
Example #6
0
def test_character_references_325():
    """
    Test case 325:  Although HTML5 does accept some entity references without a trailing semicolon (such as &copy), these are not recognized here, because it makes the grammar too ambiguous:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """&copy"""
    expected_tokens = [
        "[para(1,1):]",
        "[text:\a&\a&amp;\acopy:]",
        "[end-para]",
    ]
    expected_gfm = """<p>&amp;copy</p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
Example #7
0
def test_character_references_326():
    """
    Test case 326:  Strings that are not on the list of HTML5 named entities are not recognized as entity references either:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """&MadeUpEntity;"""
    expected_tokens = [
        "[para(1,1):]",
        "[text:\a&\a&amp;\aMadeUpEntity;:]",
        "[end-para]",
    ]
    expected_gfm = """<p>&amp;MadeUpEntity;</p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_backslash_escapes_309():
    """
    Test case 309:  Backslashes before other characters are treated as literal backslashes:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """\\→\\A\\a\\ \\3\\φ\\«"""
    expected_tokens = [
        "[para(1,1):]",
        "[text:\\→\\A\\a\\ \\3\\φ\\«:]",
        "[end-para]",
    ]
    expected_gfm = """<p>\\→\\A\\a\\ \\3\\φ\\«</p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
Example #9
0
def test_character_references_324a():
    """
    Test case 324a:  Extension of 324
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """&"""
    expected_tokens = [
        "[para(1,1):]",
        "[text:\a&\a&amp;\a:]",
        "[end-para]",
    ]
    expected_gfm = """<p>&amp;</p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_backslash_escapes_317():
    """
    Test case 317:  (part 5) Backslash escapes do not work in code blocks, code spans, autolinks, or raw HTML:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """<a href="/bar\\/)">"""
    expected_tokens = [
        "[html-block(1,1)]",
        '[text:<a href="/bar\\/)">:]',
        "[end-html-block]",
    ]
    expected_gfm = """<a href="/bar\\/)">"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_backslash_escapes_316():
    """
    Test case 316:  (part 4) Backslash escapes do not work in code blocks, code spans, autolinks, or raw HTML:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """<http://example.com?find=\\*>"""
    expected_tokens = [
        "[para(1,1):]",
        "[uri-autolink:http://example.com?find=\\*]",
        "[end-para]",
    ]
    expected_gfm = """<p><a href="http://example.com?find=%5C*">http://example.com?find=\\*</a></p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_backslash_escapes_313():
    """
    Test case 313:  (part 1) Backslash escapes do not work in code blocks, code spans, autolinks, or raw HTML:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """`` \\[\\` ``"""
    expected_tokens = [
        "[para(1,1):]",
        "[icode-span:\\[\\`:``: : ]",
        "[end-para]",
    ]
    expected_gfm = """<p><code>\\[\\`</code></p>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_list_items_293a():
    """
    Test case 293a:  variation on 293
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """1. a
  1. b
    1. c"""
    expected_tokens = [
        "[olist(1,1):.:1:3::   ]",
        "[para(1,4):]",
        "[text:a:]",
        "[end-para]",
        "[li(2,3):5:  ]",
        "[para(2,6):\n ]",
        "[text:b\n1. c::\n]",
        "[end-para]",
        "[end-olist]",
    ]
    expected_gfm = """<ol>
<li>a</li>
<li>b
1. c</li>
</ol>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
Example #14
0
def test_character_references_327():
    """
    Test case 327:  (part 1) Entity and numeric character references are recognized in any context besides code spans or code blocks, including URLs, link titles, and fenced code block info strings:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = '<a href="&ouml;&ouml;.html">'
    expected_tokens = [
        "[html-block(1,1)]",
        '[text:<a href="&ouml;&ouml;.html">:]',
        "[end-html-block]",
    ]
    expected_gfm = """<a href="&ouml;&ouml;.html">"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_block_quotes_214():
    """
    Test case 214:  (part 1) For the same reason, we can’t omit the > in front of subsequent lines of an indented or fenced code block:
    """
    # TODO add case with >

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """>     foo
    bar"""
    expected_tokens = [
        "[block-quote(1,1):]",
        "[icode-block(1,7):    :]",
        "[text:foo:]",
        "[end-icode-block]",
        "[end-block-quote]",
        "[icode-block(2,5):    :]",
        "[text:bar:]",
        "[end-icode-block]",
    ]
    expected_gfm = """<blockquote>
<pre><code>foo
</code></pre>
</blockquote>
<pre><code>bar
</code></pre>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_list_items_289():
    """
    Test case 289:  (part 2) To separate consecutive lists of the same type, or to separate a list from an indented code block that would otherwise be parsed as a subparagraph of the final list item, you can insert a blank HTML comment:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """-   foo

    notcode

-   foo

<!-- -->

    code"""
    expected_tokens = [
        "[ulist(1,1):-::4::    ]",
        "[para(1,5):]",
        "[text:foo:]",
        "[end-para]",
        "[BLANK(2,1):]",
        "[para(3,5):]",
        "[text:notcode:]",
        "[end-para]",
        "[BLANK(4,1):]",
        "[li(5,1):4:]",
        "[para(5,5):]",
        "[text:foo:]",
        "[end-para]",
        "[BLANK(6,1):]",
        "[end-ulist]",
        "[html-block(7,1)]",
        "[text:<!-- -->:]",
        "[end-html-block]",
        "[BLANK(8,1):]",
        "[icode-block(9,5):    :]",
        "[text:code:]",
        "[end-icode-block]",
    ]
    expected_gfm = """<ul>
<li>
<p>foo</p>
<p>notcode</p>
</li>
<li>
<p>foo</p>
</li>
</ul>
<!-- -->
<pre><code>code
</code></pre>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_list_items_290():
    """
    Test case 290:  (part 1) List items need not be indented to the same level. The following list items will be treated as items at the same list level, since none is indented enough to belong to the previous list item:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """- a
 - b
  - c
   - d
  - e
 - f
- g"""
    expected_tokens = [
        "[ulist(1,1):-::2:]",
        "[para(1,3):]",
        "[text:a:]",
        "[end-para]",
        "[li(2,2):3: ]",
        "[para(2,4):]",
        "[text:b:]",
        "[end-para]",
        "[li(3,3):4:  ]",
        "[para(3,5):]",
        "[text:c:]",
        "[end-para]",
        "[li(4,4):5:   ]",
        "[para(4,6):]",
        "[text:d:]",
        "[end-para]",
        "[li(5,3):4:  ]",
        "[para(5,5):]",
        "[text:e:]",
        "[end-para]",
        "[li(6,2):3: ]",
        "[para(6,4):]",
        "[text:f:]",
        "[end-para]",
        "[li(7,1):2:]",
        "[para(7,3):]",
        "[text:g:]",
        "[end-para]",
        "[end-ulist]",
    ]
    expected_gfm = """<ul>
<li>a</li>
<li>b</li>
<li>c</li>
<li>d</li>
<li>e</li>
<li>f</li>
<li>g</li>
</ul>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)
def test_list_items_306():
    """
    Test case 306:  (part 2) Here the outer list is loose, the inner list tight:
    """

    # Arrange
    tokenizer = TokenizedMarkdown()
    transformer = TransformToGfm()
    source_markdown = """- a
  - b
  - c

- d
  - e
  - f"""
    expected_tokens = [
        "[ulist(1,1):-::2:]",
        "[para(1,3):]",
        "[text:a:]",
        "[end-para]",
        "[ulist(2,3):-::4:  ]",
        "[para(2,5):]",
        "[text:b:]",
        "[end-para]",
        "[li(3,3):4:  ]",
        "[para(3,5):]",
        "[text:c:]",
        "[end-para]",
        "[BLANK(4,1):]",
        "[end-ulist]",
        "[li(5,1):2:]",
        "[para(5,3):]",
        "[text:d:]",
        "[end-para]",
        "[ulist(6,3):-::4:  ]",
        "[para(6,5):]",
        "[text:e:]",
        "[end-para]",
        "[li(7,3):4:  ]",
        "[para(7,5):]",
        "[text:f:]",
        "[end-para]",
        "[end-ulist]",
        "[end-ulist]",
    ]
    expected_gfm = """<ul>
<li>
<p>a</p>
<ul>
<li>b</li>
<li>c</li>
</ul>
</li>
<li>
<p>d</p>
<ul>
<li>e</li>
<li>f</li>
</ul>
</li>
</ul>"""

    # Act
    actual_tokens = tokenizer.transform(source_markdown)
    actual_gfm = transformer.transform(actual_tokens)

    # Assert
    assert_if_lists_different(expected_tokens, actual_tokens)
    assert_if_strings_different(expected_gfm, actual_gfm)
    assert_token_consistency(source_markdown, actual_tokens)