def test_merge_tables():
    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib_multi_page_tables.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    tbl_id1 = 'fed02fb4-1996-4a15-98dc-29da193cc476'
    tbl_id2 = '47c6097f-02d5-4432-8423-13c05fbfacbd'
    pre_merge_tbl1_cells_no = len(t_document.get_block_by_id(tbl_id1).relationships[0].ids)
    pre_merge_tbl2_cells_no = len(t_document.get_block_by_id(tbl_id2).relationships[0].ids)
    pre_merge_tbl1_lastcell = t_document.get_block_by_id(tbl_id1).relationships[0].ids[-1]
    pre_merge_tbl2_lastcell = t_document.get_block_by_id(tbl_id2).relationships[0].ids[-1]
    pre_merge_tbl1_last_row = t_document.get_block_by_id(pre_merge_tbl1_lastcell).row_index
    pre_merge_tbl2_last_row = t_document.get_block_by_id(pre_merge_tbl2_lastcell).row_index
    t_document.merge_tables([[tbl_id1,tbl_id2]])
    post_merge_tbl1_cells_no = len(t_document.get_block_by_id(tbl_id1).relationships[0].ids)
    post_merge_tbl1_lastcell = t_document.get_block_by_id(tbl_id1).relationships[0].ids[-1]
    post_merge_tbl1_last_row = t_document.get_block_by_id(post_merge_tbl1_lastcell).row_index
    assert post_merge_tbl1_cells_no == pre_merge_tbl1_cells_no + pre_merge_tbl2_cells_no
    assert pre_merge_tbl2_last_row
    assert post_merge_tbl1_last_row == pre_merge_tbl1_last_row + pre_merge_tbl2_last_row
Esempio n. 2
0
def test_get_relationships_for_type(caplog):
    # existing relationships
    p = os.path.dirname(os.path.realpath(__file__))
    with open(os.path.join(p, "data/gib.json")) as f:
        j = json.load(f)
        t_document: t2.TDocument = t2.TDocumentSchema().load(j)
        page = t_document.pages[0]
        new_block = t2.TBlock(id=str(uuid4()))
        t_document.add_block(new_block)
        page.add_ids_to_relationships([new_block.id])
        assert t_document.get_block_by_id(new_block.id) == new_block

    #empty relationships
    t_document: t2.TDocument = t2.TDocument()
    t_document.add_block(t2.TBlock(id=str(uuid4()), block_type="PAGE"))
    page = t_document.pages[0]
    new_block = t2.TBlock(id=str(uuid4()))
    t_document.add_block(new_block)
    page.add_ids_to_relationships([new_block.id])
    assert t_document.get_block_by_id(new_block.id) == new_block
Esempio n. 3
0
def test_pipeline_merge_tables():
    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib_multi_page_table_merge.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    tbl_id1 = '5685498d-d196-42a7-8b40-594d6d886ca9'
    tbl_id2 = 'a9191a66-0d32-4d36-8fd6-58e6917f4ea6'
    tbl_id3 = 'e0368543-c9c3-4616-bd6c-f25e66c859b2'
    pre_merge_tbl1_cells_no = len(
        t_document.get_block_by_id(
            tbl_id1).relationships[0].ids)  # type: ignore
    pre_merge_tbl2_cells_no = len(
        t_document.get_block_by_id(
            tbl_id2).relationships[0].ids)  # type: ignore
    pre_merge_tbl3_cells_no = len(
        t_document.get_block_by_id(
            tbl_id3).relationships[0].ids)  # type: ignore
    t_document = pipeline_merge_tables(t_document, MergeOptions.MERGE, None,
                                       HeaderFooterType.NONE)
    post_merge_tbl1_cells_no = len(
        t_document.get_block_by_id(
            tbl_id1).relationships[0].ids)  # type: ignore
    assert post_merge_tbl1_cells_no == pre_merge_tbl1_cells_no + pre_merge_tbl2_cells_no + pre_merge_tbl3_cells_no
def test_custom_page_orientation(json_response):
    doc = Document(json_response)
    assert 1 == len(doc.pages)
    lines = [line for line in doc.pages[0].lines]
    assert 22 == len(lines)
    words = [word for line in lines for word in line.words]
    assert 53 == len(words)
    t_document: t2.TDocument = t2.TDocumentSchema().load(json_response)
    t_document.custom = {'orientation': 180}
    new_t_doc_json = t2.TDocumentSchema().dump(t_document)
    assert "Custom" in new_t_doc_json
    assert "orientation" in new_t_doc_json["Custom"]
    assert new_t_doc_json["Custom"]["orientation"] == 180

    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = add_page_orientation(t_document)
    assert -1 < t_document.pages[0].custom['Orientation'] < 2

    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib_10_degrees.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = add_page_orientation(t_document)
    assert 5 < t_document.pages[0].custom['Orientation'] < 15

    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib__15_degrees.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = add_page_orientation(t_document)
    assert 10 < t_document.pages[0].custom['Orientation'] < 20

    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib__25_degrees.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = add_page_orientation(t_document)
    assert 17 < t_document.pages[0].custom['Orientation'] < 30

    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib__180_degrees.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = add_page_orientation(t_document)
    assert 170 < t_document.pages[0].custom['Orientation'] < 190

    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib__270_degrees.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = add_page_orientation(t_document)
    assert -100 < t_document.pages[0].custom['Orientation'] < -80

    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib__90_degrees.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = add_page_orientation(t_document)
    assert 80 < t_document.pages[0].custom['Orientation'] < 100

    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib__minus_10_degrees.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = add_page_orientation(t_document)
    assert -10 < t_document.pages[0].custom['Orientation'] < 5

    doc = t1.Document(t2.TDocumentSchema().dump(t_document))
    for page in doc.pages:
        assert page.custom['Orientation']