Esempio n. 1
0
def test_clean_lattice_emits_multiple_lines(row):
    row["extraction_method"] = "lattice"
    row["data"].append([_get_cell("c1"), _get_cell("c2")])
    row["data"].append([_get_cell(""), _get_cell("c4")])

    r = bout.clean(row)

    assert r == [["c1", "c2"], ["", "c4"]]
Esempio n. 2
0
def test_clean_stream_merge_cell_data(row):
    # First line doesn't have empty text, c1 and c will be merged
    row["data"].append([_get_cell("c1"), _get_cell("c2")])
    row["data"].append([_get_cell("c"), _get_cell("c4")])

    r = bout.clean(row)

    assert r == [["c1c", "c2c4"]]
Esempio n. 3
0
def test_clean_ignores_zero_cell_data(row):
    cell = _get_cell("")
    cell["width"] = 0.0
    row["data"].append([cell, _get_cell("c2")])

    r = bout.clean(row)

    assert r == [["c2"]]
Esempio n. 4
0
def test_clean_stream_not_merge_cell_data_empty_first_line(row):
    cell = _get_cell("")
    cell["width"] = 0.0
    row["data"].append([cell, _get_cell("c2")])
    row["data"].append([_get_cell("cc2"), _get_cell("c4")])

    r = bout.clean(row)

    assert r == [["cc2", "c2c4"]]
Esempio n. 5
0
def test_clean_ignores_zero_row_data(row):
    r = bout.clean(row)

    assert len(r) == 0