def test_htmlinputter_no_bs4(): """ This should return an OptionalTableImportError if BeautifulSoup is not installed. """ inputter = html.HTMLInputter() with pytest.raises(core.OptionalTableImportError): inputter.process_lines([])
def test_htmldata(): """ Test to ensure that the start_line and end_lines methods of HTMLData returns the first line of table data. Uses t/html.html for sample input. """ f = 'data/html.html' with open(f) as fd: table = fd.read() inputter = html.HTMLInputter() inputter.html = {} data = html.HTMLData() lines = inputter.get_lines(table) assert str(lines[data.start_line(lines)]) == \ '<tr><td>1</td><td>a</td><td>1.05</td></tr>' # end_line returns the index of the last data element + 1 assert str(lines[data.end_line(lines) - 1]) == \ '<tr><td>3</td><td>c</td><td>-1.25</td></tr>' inputter.html['table_id'] = 'second' lines = inputter.get_lines(table) assert str(lines[data.start_line(lines)]) == \ '<tr><td>4</td><td>d</td><td>10.5</td></tr>' assert str(lines[data.end_line(lines) - 1]) == \ '<tr><td>6</td><td>f</td><td>-12.5</td></tr>' inputter.html['table_id'] = 3 lines = inputter.get_lines(table) assert str(lines[data.start_line(lines)]) == \ '<tr><td>7</td><td>g</td><td>105.0</td></tr>' assert str(lines[data.end_line(lines) - 1]) == \ '<tr><td>9</td><td>i</td><td>-125.0</td></tr>' # start_line should raise an error if no table data exists lines = [ html.SoupString(BeautifulSoup('<div></div>', 'html.parser').div), html.SoupString(BeautifulSoup('<p>Text</p>', 'html.parser').p) ] with pytest.raises(core.InconsistentTableError): data.start_line(lines) # end_line should return None if no table data exists assert data.end_line(lines) is None # Should raise an error if a non-SoupString is present lines.append('<tr><td>Data</td></tr>') with pytest.raises(TypeError): data.start_line(lines) with pytest.raises(TypeError): data.end_line(lines)
def test_htmlinputter(): """ Test to ensure that HTMLInputter correctly converts input into a list of SoupStrings representing table elements. """ f = 'data/html.html' with open(f) as fd: table = fd.read() inputter = html.HTMLInputter() inputter.html = {} # In absence of table_id, defaults to the first table expected = [ '<tr><th>Column 1</th><th>Column 2</th><th>Column 3</th></tr>', '<tr><td>1</td><td>a</td><td>1.05</td></tr>', '<tr><td>2</td><td>b</td><td>2.75</td></tr>', '<tr><td>3</td><td>c</td><td>-1.25</td></tr>' ] assert [str(x) for x in inputter.get_lines(table)] == expected # Should raise an InconsistentTableError if the table is not found inputter.html = {'table_id': 4} with pytest.raises(core.InconsistentTableError): inputter.get_lines(table) # Identification by string ID inputter.html['table_id'] = 'second' expected = [ '<tr><th>Column A</th><th>Column B</th><th>Column C</th></tr>', '<tr><td>4</td><td>d</td><td>10.5</td></tr>', '<tr><td>5</td><td>e</td><td>27.5</td></tr>', '<tr><td>6</td><td>f</td><td>-12.5</td></tr>' ] assert [str(x) for x in inputter.get_lines(table)] == expected # Identification by integer index inputter.html['table_id'] = 3 expected = [ '<tr><th>C1</th><th>C2</th><th>C3</th></tr>', '<tr><td>7</td><td>g</td><td>105.0</td></tr>', '<tr><td>8</td><td>h</td><td>275.0</td></tr>', '<tr><td>9</td><td>i</td><td>-125.0</td></tr>' ] assert [str(x) for x in inputter.get_lines(table)] == expected
def test_htmlheader_start(): """ Test to ensure that the start_line method of HTMLHeader returns the first line of header data. Uses t/html.html for sample input. """ f = 'data/html.html' with open(f) as fd: table = fd.read() inputter = html.HTMLInputter() inputter.html = {} header = html.HTMLHeader() lines = inputter.get_lines(table) assert str(lines[header.start_line(lines)]) == \ '<tr><th>Column 1</th><th>Column 2</th><th>Column 3</th></tr>' inputter.html['table_id'] = 'second' lines = inputter.get_lines(table) assert str(lines[header.start_line(lines)]) == \ '<tr><th>Column A</th><th>Column B</th><th>Column C</th></tr>' inputter.html['table_id'] = 3 lines = inputter.get_lines(table) assert str(lines[header.start_line(lines)]) == \ '<tr><th>C1</th><th>C2</th><th>C3</th></tr>' # start_line should return None if no valid header is found lines = [ html.SoupString( BeautifulSoup('<table><tr><td>Data</td></tr></table>', 'html.parser').tr), html.SoupString(BeautifulSoup('<p>Text</p>', 'html.parser').p) ] assert header.start_line(lines) is None # Should raise an error if a non-SoupString is present lines.append('<tr><th>Header</th></tr>') with pytest.raises(TypeError): header.start_line(lines)