예제 #1
0
def test_htmlinputter_no_bs4():
    """
    This should return an OptionalTableImportError if BeautifulSoup
    is not installed.
    """

    inputter = html.HTMLInputter()
    with pytest.raises(core.OptionalTableImportError):
        inputter.process_lines([])
예제 #2
0
def test_htmldata():
    """
    Test to ensure that the start_line and end_lines methods
    of HTMLData returns the first line of table data. Uses
    t/html.html for sample input.
    """

    f = 'data/html.html'
    with open(f) as fd:
        table = fd.read()

    inputter = html.HTMLInputter()
    inputter.html = {}
    data = html.HTMLData()

    lines = inputter.get_lines(table)
    assert str(lines[data.start_line(lines)]) == \
        '<tr><td>1</td><td>a</td><td>1.05</td></tr>'
    # end_line returns the index of the last data element + 1
    assert str(lines[data.end_line(lines) - 1]) == \
        '<tr><td>3</td><td>c</td><td>-1.25</td></tr>'

    inputter.html['table_id'] = 'second'
    lines = inputter.get_lines(table)
    assert str(lines[data.start_line(lines)]) == \
        '<tr><td>4</td><td>d</td><td>10.5</td></tr>'
    assert str(lines[data.end_line(lines) - 1]) == \
        '<tr><td>6</td><td>f</td><td>-12.5</td></tr>'

    inputter.html['table_id'] = 3
    lines = inputter.get_lines(table)
    assert str(lines[data.start_line(lines)]) == \
        '<tr><td>7</td><td>g</td><td>105.0</td></tr>'
    assert str(lines[data.end_line(lines) - 1]) == \
        '<tr><td>9</td><td>i</td><td>-125.0</td></tr>'

    # start_line should raise an error if no table data exists
    lines = [
        html.SoupString(BeautifulSoup('<div></div>', 'html.parser').div),
        html.SoupString(BeautifulSoup('<p>Text</p>', 'html.parser').p)
    ]
    with pytest.raises(core.InconsistentTableError):
        data.start_line(lines)

    # end_line should return None if no table data exists
    assert data.end_line(lines) is None

    # Should raise an error if a non-SoupString is present
    lines.append('<tr><td>Data</td></tr>')
    with pytest.raises(TypeError):
        data.start_line(lines)
    with pytest.raises(TypeError):
        data.end_line(lines)
예제 #3
0
def test_htmlinputter():
    """
    Test to ensure that HTMLInputter correctly converts input
    into a list of SoupStrings representing table elements.
    """

    f = 'data/html.html'
    with open(f) as fd:
        table = fd.read()

    inputter = html.HTMLInputter()
    inputter.html = {}

    # In absence of table_id, defaults to the first table
    expected = [
        '<tr><th>Column 1</th><th>Column 2</th><th>Column 3</th></tr>',
        '<tr><td>1</td><td>a</td><td>1.05</td></tr>',
        '<tr><td>2</td><td>b</td><td>2.75</td></tr>',
        '<tr><td>3</td><td>c</td><td>-1.25</td></tr>'
    ]
    assert [str(x) for x in inputter.get_lines(table)] == expected

    # Should raise an InconsistentTableError if the table is not found
    inputter.html = {'table_id': 4}
    with pytest.raises(core.InconsistentTableError):
        inputter.get_lines(table)

    # Identification by string ID
    inputter.html['table_id'] = 'second'
    expected = [
        '<tr><th>Column A</th><th>Column B</th><th>Column C</th></tr>',
        '<tr><td>4</td><td>d</td><td>10.5</td></tr>',
        '<tr><td>5</td><td>e</td><td>27.5</td></tr>',
        '<tr><td>6</td><td>f</td><td>-12.5</td></tr>'
    ]
    assert [str(x) for x in inputter.get_lines(table)] == expected

    # Identification by integer index
    inputter.html['table_id'] = 3
    expected = [
        '<tr><th>C1</th><th>C2</th><th>C3</th></tr>',
        '<tr><td>7</td><td>g</td><td>105.0</td></tr>',
        '<tr><td>8</td><td>h</td><td>275.0</td></tr>',
        '<tr><td>9</td><td>i</td><td>-125.0</td></tr>'
    ]
    assert [str(x) for x in inputter.get_lines(table)] == expected
예제 #4
0
def test_htmlheader_start():
    """
    Test to ensure that the start_line method of HTMLHeader
    returns the first line of header data. Uses t/html.html
    for sample input.
    """

    f = 'data/html.html'
    with open(f) as fd:
        table = fd.read()

    inputter = html.HTMLInputter()
    inputter.html = {}
    header = html.HTMLHeader()

    lines = inputter.get_lines(table)
    assert str(lines[header.start_line(lines)]) == \
        '<tr><th>Column 1</th><th>Column 2</th><th>Column 3</th></tr>'
    inputter.html['table_id'] = 'second'
    lines = inputter.get_lines(table)
    assert str(lines[header.start_line(lines)]) == \
        '<tr><th>Column A</th><th>Column B</th><th>Column C</th></tr>'
    inputter.html['table_id'] = 3
    lines = inputter.get_lines(table)
    assert str(lines[header.start_line(lines)]) == \
        '<tr><th>C1</th><th>C2</th><th>C3</th></tr>'

    # start_line should return None if no valid header is found
    lines = [
        html.SoupString(
            BeautifulSoup('<table><tr><td>Data</td></tr></table>',
                          'html.parser').tr),
        html.SoupString(BeautifulSoup('<p>Text</p>', 'html.parser').p)
    ]
    assert header.start_line(lines) is None

    # Should raise an error if a non-SoupString is present
    lines.append('<tr><th>Header</th></tr>')
    with pytest.raises(TypeError):
        header.start_line(lines)