def test_returns_tables_that_span_multiple_pages(self, page_overflow) -> None: result = extract_table(page_overflow[0:3]) assert result.shape == (100, 5) pd.testing.assert_index_equal( result.columns, pd.Index(['IdField', 'NameField', 'TestField1', 'TestField2', 'TestField3']) )
def test_returns_correct_table_data_when_working_with_borders(self, borders) -> None: actual = extract_table(borders) expected = pd.DataFrame( columns=['IdField', 'NameField', 'TestField1', 'TestField2', 'TestField3'], data=[["1", 'Name1', 'Value1', 'Value2', 'Value3'], ["2", 'Name2', 'Value4', 'Value5', 'Value6'], ["3", 'Name3', 'Value7', 'Value8', 'Value9'], ["4", 'Name4', 'Value10', 'Value11', 'Value12'], ["5", 'Name5', 'Value13', 'Value14', 'Value15']] ) pd.testing.assert_frame_equal(actual, expected)
def test_returns_correct_table_when_missing_most_values(self, missing_values) -> None: actual = extract_table(missing_values[4]) expected = pd.DataFrame( columns=['IdField', 'NameField', 'TestField1', 'TestField2', 'TestField3'], data=[["1", 'Name1', 'Value1', '', ''], ["2", 'Name2', '', '', 'Value2'], ["3", 'Name3', '', '', ''], ["4", 'Name4', '', 'Value3', ''], ["5", 'Name5', '', '', '']] ) pd.testing.assert_frame_equal(actual, expected)
def test_returns_dataframe(self, table_borders) -> None: result = extract_table(table_borders[0]) assert isinstance(result, pd.DataFrame)