Example #1
0
def test_fix_tables():
    cases = [
        (
            'add a table row',
            '''
            <table>
              <tr><td>A</td></tr>
              <ins><tr><td>B</td></tr></ins>
            </table>
            ''',
            '''
            <table>
              <tr><td>A</td></tr>
              <tr><td><ins>B</ins></td></tr>
            </table>
            '''
        ),
        (
            'remove ins and del tags at the wrong level of the table',
            '''
            <table>
                <ins> </ins><del> </del>
                <thead>
                    <ins> </ins><del> </del>
                </thead>
                <tfoot>
                    <ins> </ins><del> </del>
                </tfoot>
                <tbody>
                    <ins> </ins><del> </del>
                    <tr>
                        <ins> </ins><del> </del>
                        <td><ins>A</ins></td>
                    </tr>
                </tbody>
            </table>
            ''',
            '''
            <table>
                <thead></thead>
                <tfoot></tfoot>
                <tbody>
                    <tr>
                        <td><ins>A</ins></td>
                    </tr>
                </tbody>
            </table>
            ''',
        ),
    ]
    for test_name, changes, fixed_changes in cases:
        changes = collapse(changes)
        fixed_changes = collapse(fixed_changes)

        def test():
            changes_dom = parse_minidom(changes, strict_xml=True)
            fix_tables(changes_dom)
            assert_html_equal(minidom_tostring(changes_dom), fixed_changes)
        test.description = 'test_fix_tables - %s' % test_name
        yield test
Example #2
0
def test_fix_tables():
    cases = [
        (
            'add a table row',
            '''
            <table>
              <tr><td>A</td></tr>
              <ins><tr><td>B</td></tr></ins>
            </table>
            ''',
            '''
            <table>
              <tr><td>A</td></tr>
              <tr><td><ins>B</ins></td></tr>
            </table>
            '''
        ),
        (
            'remove ins and del tags at the wrong level of the table',
            '''
            <table>
                <ins> </ins><del> </del>
                <thead>
                    <ins> </ins><del> </del>
                </thead>
                <tfoot>
                    <ins> </ins><del> </del>
                </tfoot>
                <tbody>
                    <ins> </ins><del> </del>
                    <tr>
                        <ins> </ins><del> </del>
                        <td><ins>A</ins></td>
                    </tr>
                </tbody>
            </table>
            ''',
            '''
            <table>
                <thead></thead>
                <tfoot></tfoot>
                <tbody>
                    <tr>
                        <td><ins>A</ins></td>
                    </tr>
                </tbody>
            </table>
            ''',
        ),
    ]
    for test_name, changes, fixed_changes in cases:
        changes = collapse(changes)
        fixed_changes = collapse(fixed_changes)

        def test():
            changes_dom = parse_minidom(changes, strict_xml=True)
            fix_tables(changes_dom)
            assert_html_equal(minidom_tostring(changes_dom), fixed_changes)
        test.description = 'test_fix_tables - %s' % test_name
        yield test
Example #3
0
test_cases = [(
    'nested list items',
    collapse('''
        <ul>
          <li>Monday
            <ul>
              <li>2pm - 3pm</li>
            </ul>
          </li>
          <li>Wednesday
            <ul>
              <li>11am - Noon</li>
              <li>3pm - 5pm</li>
            </ul>
          </li>
          <li>Thursday
            <ul>
              <li>11am - Noon</li>
            </ul>
          </li>
          <li>Friday
            <ul>
              <li>Noon - 1pm</li>
            </ul>
          </li>
        </ul>
        '''),
    collapse('''
        <ul>
          <li>Tuesday
Example #4
0
def test_fix_lists():
    cases = [
        (
            "simple list item insert",
            """
            <ol>
              <li>one</li>
              <ins><li>two</li></ins>
            </ol>
            """,
            """
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
            </ol>
            """,
        ),
        (
            "multiple list item insert",
            """
            <ol>
              <li>one</li>
              <ins>
                <li>two</li>
                <li>three</li>
              </ins>
            </ol>
            """,
            """
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
              <li><ins>three</ins></li>
            </ol>
            """,
        ),
        (
            "simple list item delete afterward",
            """
            <ol>
              <li>one</li>
              <del><li>one and a half</li></del>
            </ol>
            """,
            """
            <ol>
              <li>one</li>
              <li class="del-li"><del>one and a half</del></li>
            </ol>
            """,
        ),
        (
            "simple list item delete first",
            """
            <ol>
              <del><li>one half</li></del>
              <li>one</li>
            </ol>
            """,
            """
            <ol>
              <li class="del-li"><del>one half</del></li>
              <li>one</li>
            </ol>
            """,
        ),
        (
            "multiple list item delete first",
            """
            <ol>
              <del>
                <li>one third</li>
                <li>two thirds</li>
              </del>
              <li>one</li>
            </ol>
            """,
            """
            <ol>
              <li class="del-li"><del>one third</del></li>
              <li class="del-li"><del>two thirds</del></li>
              <li>one</li>
            </ol>
            """,
        ),
        (
            "insert and delete separately",
            """
            <ol>
              <li>one</li>
              <ins><li>two</li></ins>
              <li>three</li>
              <del><li>three point five</li></del>
              <li>four</li>
            </ol>
            """,
            """
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
              <li>three</del>
              <li class="del-li"><del>three point five</del></li>
              <li>four</li>
            </ol>
            """,
        ),
        (
            "multiple list item delete",
            """
            <ol>
              <li>one</li>
              <del>
                <li>two</li>
                <li>three</li>
              </del>
            </ol>
            """,
            """
            <ol>
              <li>one</li>
              <li class="del-li"><del>two</del></li>
              <li class="del-li"><del>three</del></li>
            </ol>
            """,
        ),
        (
            "delete only list item",
            """
            <ol>
              <del>
                <li>one</li>
              </del>
            </ol>
            """,
            """
            <ol>
              <li class="del-li"><del>one</del></li>
            </ol>
            """,
        ),
    ]
    for test_name, changes, fixed_changes in cases:
        changes = collapse(changes)
        fixed_changes = collapse(fixed_changes)

        def test():
            changes_dom = parse_minidom(changes)
            fix_lists(changes_dom)
            assert_html_equal(minidom_tostring(changes_dom), fixed_changes)

        test.description = "test_fix_lists - %s" % test_name
        yield test
Example #5
0
def test_fix_lists():
    cases = [
        ('simple list item insert', '''
            <ol>
              <li>one</li>
              <ins><li>two</li></ins>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
            </ol>
            '''),
        ('multiple list item insert', '''
            <ol>
              <li>one</li>
              <ins>
                <li>two</li>
                <li>three</li>
              </ins>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
              <li><ins>three</ins></li>
            </ol>
            '''),
        ('simple list item delete afterward', '''
            <ol>
              <li>one</li>
              <del><li>one and a half</li></del>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li class="del-li"><del>one and a half</del></li>
            </ol>
            '''),
        ('simple list item delete first', '''
            <ol>
              <del><li>one half</li></del>
              <li>one</li>
            </ol>
            ''', '''
            <ol>
              <li class="del-li"><del>one half</del></li>
              <li>one</li>
            </ol>
            '''),
        ('multiple list item delete first', '''
            <ol>
              <del>
                <li>one third</li>
                <li>two thirds</li>
              </del>
              <li>one</li>
            </ol>
            ''', '''
            <ol>
              <li class="del-li"><del>one third</del></li>
              <li class="del-li"><del>two thirds</del></li>
              <li>one</li>
            </ol>
            '''),
        ('insert and delete separately', '''
            <ol>
              <li>one</li>
              <ins><li>two</li></ins>
              <li>three</li>
              <del><li>three point five</li></del>
              <li>four</li>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
              <li>three</del>
              <li class="del-li"><del>three point five</del></li>
              <li>four</li>
            </ol>
            '''),
        ('multiple list item delete', '''
            <ol>
              <li>one</li>
              <del>
                <li>two</li>
                <li>three</li>
              </del>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li class="del-li"><del>two</del></li>
              <li class="del-li"><del>three</del></li>
            </ol>
            '''),
        ('delete only list item', '''
            <ol>
              <del>
                <li>one</li>
              </del>
            </ol>
            ''', '''
            <ol>
              <li class="del-li"><del>one</del></li>
            </ol>
            '''),
        ('LI full content change does not add another LI', '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
              <ins>
                <li>BBB</li>
              </ins>
            </ol>
            ''', '''
            <ol>
              <li><del>AAA</del><ins>BBB</ins></li>
            </ol>
            '''),
        ('LI full content change keeps attrs', '''
            <ol>
              <del>
                <li class="old" id="foo">AAA</li>
              </del>
              <ins>
                <li class="new">BBB</li>
              </ins>
            </ol>
            ''', '''
            <ol>
              <li class="new"><del>AAA</del><ins>BBB</ins></li>
            </ol>
            '''),
        (
            'LI changes markup internalization fix not done if next tag is not an insert',  # noqa
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
                <li><strong>BBB</strong></li>
              <ins>
                <li>CCC</li>
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <li><strong>BBB</strong></li>
                <li><ins>CCC</ins></li>
            </ol>
            ''',
        ),
        (
            'LI changes markup internalization fix not done if next tag is not an insert',  # noqa
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
                <li><strong>BBB</strong></li>
              <ins>
                <li>CCC</li>
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <li><strong>BBB</strong></li>
                <li><ins>CCC</ins></li>
            </ol>
            ''',
        ),
        (
            'LI after del must be ins',
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
              <del>
                <li>BBB</li>
              </del>
              <ins>
                <li>CCC</li>
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <li><del>BBB</del><ins>CCC</ins></li>
            </ol>
            ''',
        ),
        (
            'LI changes markup internalization fix not performed if next tags child is not li',  # noqa
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
              <ins>
                <foo>BBB</foo>
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <ins>
                    <foo>BBB</foo>
                </ins>
            </ol>
            ''',
        ),
        (
            'LI changes markup internalization fix not performed if next tags is text',  # noqa
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
              <ins>
                BBB
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <ins>
                    BBB
                </ins>
            </ol>
            ''',
        ),
    ]
    for test_name, changes, fixed_changes in cases:
        changes = collapse(changes)
        fixed_changes = collapse(fixed_changes)

        def test():
            changes_dom = parse_minidom(changes)
            fix_lists(changes_dom)
            assert_html_equal(minidom_tostring(changes_dom), fixed_changes)

        test.description = 'test_fix_lists - %s' % test_name
        yield test
Example #6
0
 'TD content change does not show TD removal',
 '''
 <table>
     <tbody>
         <tr>
             <td>AAA</td>
             <td>BBB</td>
         </tr>
     </tbody>
 </table>
 ''',
 collapse('''
 <table>
     <tbody>
         <tr>
             <td>ZZZ</td>
             <td>BBB</td>
         </tr>
     </tbody>
 </table>
 '''),
 collapse('''
 <table>
     <tbody>
         <tr>
             <td><del>AAA</del><ins>ZZZ</ins></td>
             <td>BBB</td>
         </tr>
     </tbody>
 </table>
 '''),
 [  # The result of this will be fixed by fix_tables
Example #7
0
def test_fix_lists():
    cases = [
        ('simple list item insert', '''
            <ol>
              <li>one</li>
              <ins><li>two</li></ins>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
            </ol>
            '''),
        ('multiple list item insert', '''
            <ol>
              <li>one</li>
              <ins>
                <li>two</li>
                <li>three</li>
              </ins>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
              <li><ins>three</ins></li>
            </ol>
            '''),
        ('simple list item delete afterward', '''
            <ol>
              <li>one</li>
              <del><li>one and a half</li></del>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li class="del-li"><del>one and a half</del></li>
            </ol>
            '''),
        ('simple list item delete first', '''
            <ol>
              <del><li>one half</li></del>
              <li>one</li>
            </ol>
            ''', '''
            <ol>
              <li class="del-li"><del>one half</del></li>
              <li>one</li>
            </ol>
            '''),
        ('multiple list item delete first', '''
            <ol>
              <del>
                <li>one third</li>
                <li>two thirds</li>
              </del>
              <li>one</li>
            </ol>
            ''', '''
            <ol>
              <li class="del-li"><del>one third</del></li>
              <li class="del-li"><del>two thirds</del></li>
              <li>one</li>
            </ol>
            '''),
        ('insert and delete separately', '''
            <ol>
              <li>one</li>
              <ins><li>two</li></ins>
              <li>three</li>
              <del><li>three point five</li></del>
              <li>four</li>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
              <li>three</del>
              <li class="del-li"><del>three point five</del></li>
              <li>four</li>
            </ol>
            '''),
        ('multiple list item delete', '''
            <ol>
              <li>one</li>
              <del>
                <li>two</li>
                <li>three</li>
              </del>
            </ol>
            ''', '''
            <ol>
              <li>one</li>
              <li class="del-li"><del>two</del></li>
              <li class="del-li"><del>three</del></li>
            </ol>
            '''),
        ('delete only list item', '''
            <ol>
              <del>
                <li>one</li>
              </del>
            </ol>
            ''', '''
            <ol>
              <li class="del-li"><del>one</del></li>
            </ol>
            '''),
    ]
    for test_name, changes, fixed_changes in cases:
        changes = collapse(changes)
        fixed_changes = collapse(fixed_changes)

        def test():
            changes_dom = parse_minidom(changes)
            fix_lists(changes_dom)
            assert_html_equal(minidom_tostring(changes_dom), fixed_changes)

        test.description = 'test_fix_lists - %s' % test_name
        yield test
test_cases = [
    (
        'nested list items',
        collapse('''
        <ul>
          <li>Monday
            <ul>
              <li>2pm - 3pm</li>
            </ul>
          </li>
          <li>Wednesday
            <ul>
              <li>11am - Noon</li>
              <li>3pm - 5pm</li>
            </ul>
          </li>
          <li>Thursday
            <ul>
              <li>11am - Noon</li>
            </ul>
          </li>
          <li>Friday
            <ul>
              <li>Noon - 1pm</li>
            </ul>
          </li>
        </ul>
        '''),
        collapse('''
        <ul>
          <li>Tuesday
Example #9
0
     <table class="table_class"><tbody>
     <tr>
     <td colspan="2">top across</td>
     </tr>
     <tr>
     <td>bottom left</td>
     <td>bottom right</td>
     </tr>
     </tbody></table>
     ''',
     collapse('''
     <table class="table_class"><tbody>
     <tr>
     <td colspan="2">top across</td>
     </tr>
     <tr>
     <td>bottom left</td>
     <td>bottom right</td>
     </tr>
     </tbody></table>
     '''),
 ),
 (
     'whitespace changes in a table with nbsp entity',
     '''
     <table>
     <tbody>
     <tr>
         <td> </td>
         <td>&#160;</td>
         <td>&nbsp;</td>
Example #10
0
def test_fix_lists():
    cases = [
        (
            'simple list item insert',
            '''
            <ol>
              <li>one</li>
              <ins><li>two</li></ins>
            </ol>
            ''',
            '''
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
            </ol>
            '''
        ),
        (
            'multiple list item insert',
            '''
            <ol>
              <li>one</li>
              <ins>
                <li>two</li>
                <li>three</li>
              </ins>
            </ol>
            ''',
            '''
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
              <li><ins>three</ins></li>
            </ol>
            '''
        ),
        (
            'simple list item delete afterward',
            '''
            <ol>
              <li>one</li>
              <del><li>one and a half</li></del>
            </ol>
            ''',
            '''
            <ol>
              <li>one</li>
              <li class="del-li"><del>one and a half</del></li>
            </ol>
            '''
        ),
        (
            'simple list item delete first',
            '''
            <ol>
              <del><li>one half</li></del>
              <li>one</li>
            </ol>
            ''',
            '''
            <ol>
              <li class="del-li"><del>one half</del></li>
              <li>one</li>
            </ol>
            '''
        ),
        (
            'multiple list item delete first',
            '''
            <ol>
              <del>
                <li>one third</li>
                <li>two thirds</li>
              </del>
              <li>one</li>
            </ol>
            ''',
            '''
            <ol>
              <li class="del-li"><del>one third</del></li>
              <li class="del-li"><del>two thirds</del></li>
              <li>one</li>
            </ol>
            '''
        ),
        (
            'insert and delete separately',
            '''
            <ol>
              <li>one</li>
              <ins><li>two</li></ins>
              <li>three</li>
              <del><li>three point five</li></del>
              <li>four</li>
            </ol>
            ''',
            '''
            <ol>
              <li>one</li>
              <li><ins>two</ins></li>
              <li>three</del>
              <li class="del-li"><del>three point five</del></li>
              <li>four</li>
            </ol>
            '''
        ),
        (
            'multiple list item delete',
            '''
            <ol>
              <li>one</li>
              <del>
                <li>two</li>
                <li>three</li>
              </del>
            </ol>
            ''',
            '''
            <ol>
              <li>one</li>
              <li class="del-li"><del>two</del></li>
              <li class="del-li"><del>three</del></li>
            </ol>
            '''
        ),
        (
            'delete only list item',
            '''
            <ol>
              <del>
                <li>one</li>
              </del>
            </ol>
            ''',
            '''
            <ol>
              <li class="del-li"><del>one</del></li>
            </ol>
            '''
        ),
        (
            'LI full content change does not add another LI',
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
              <ins>
                <li>BBB</li>
              </ins>
            </ol>
            ''',
            '''
            <ol>
              <li><del>AAA</del><ins>BBB</ins></li>
            </ol>
            '''
        ),
        (
            'LI full content change keeps attrs',
            '''
            <ol>
              <del>
                <li class="old" id="foo">AAA</li>
              </del>
              <ins>
                <li class="new">BBB</li>
              </ins>
            </ol>
            ''',
            '''
            <ol>
              <li class="new"><del>AAA</del><ins>BBB</ins></li>
            </ol>
            '''
        ),
        (
            'LI changes markup internalization fix not done if next tag is not an insert',  # noqa
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
                <li><strong>BBB</strong></li>
              <ins>
                <li>CCC</li>
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <li><strong>BBB</strong></li>
                <li><ins>CCC</ins></li>
            </ol>
            ''',
        ),
        (
            'LI changes markup internalization fix not done if next tag is not an insert',  # noqa
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
                <li><strong>BBB</strong></li>
              <ins>
                <li>CCC</li>
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <li><strong>BBB</strong></li>
                <li><ins>CCC</ins></li>
            </ol>
            ''',
        ),
        (
            'LI after del must be ins',
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
              <del>
                <li>BBB</li>
              </del>
              <ins>
                <li>CCC</li>
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <li><del>BBB</del><ins>CCC</ins></li>
            </ol>
            ''',
        ),
        (
            'LI changes markup internalization fix not performed if next tags child is not li',  # noqa
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
              <ins>
                <foo>BBB</foo>
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <ins>
                    <foo>BBB</foo>
                </ins>
            </ol>
            ''',
        ),
        (
            'LI changes markup internalization fix not performed if next tags is text',  # noqa
            '''
            <ol>
              <del>
                <li>AAA</li>
              </del>
              <ins>
                BBB
              </ins>
            </ol>
            ''',
            '''
            <ol>
                <li class="del-li">
                    <del>AAA</del>
                </li>
                <ins>
                    BBB
                </ins>
            </ol>
            ''',
        ),
    ]
    for test_name, changes, fixed_changes in cases:
        changes = collapse(changes)
        fixed_changes = collapse(fixed_changes)

        def test():
            changes_dom = parse_minidom(changes)
            fix_lists(changes_dom)
            assert_html_equal(minidom_tostring(changes_dom), fixed_changes)
        test.description = 'test_fix_lists - %s' % test_name
        yield test