def test_merge_tables(): p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib_multi_page_tables.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) tbl_id1 = 'fed02fb4-1996-4a15-98dc-29da193cc476' tbl_id2 = '47c6097f-02d5-4432-8423-13c05fbfacbd' pre_merge_tbl1_cells_no = len(t_document.get_block_by_id(tbl_id1).relationships[0].ids) pre_merge_tbl2_cells_no = len(t_document.get_block_by_id(tbl_id2).relationships[0].ids) pre_merge_tbl1_lastcell = t_document.get_block_by_id(tbl_id1).relationships[0].ids[-1] pre_merge_tbl2_lastcell = t_document.get_block_by_id(tbl_id2).relationships[0].ids[-1] pre_merge_tbl1_last_row = t_document.get_block_by_id(pre_merge_tbl1_lastcell).row_index pre_merge_tbl2_last_row = t_document.get_block_by_id(pre_merge_tbl2_lastcell).row_index t_document.merge_tables([[tbl_id1,tbl_id2]]) post_merge_tbl1_cells_no = len(t_document.get_block_by_id(tbl_id1).relationships[0].ids) post_merge_tbl1_lastcell = t_document.get_block_by_id(tbl_id1).relationships[0].ids[-1] post_merge_tbl1_last_row = t_document.get_block_by_id(post_merge_tbl1_lastcell).row_index assert post_merge_tbl1_cells_no == pre_merge_tbl1_cells_no + pre_merge_tbl2_cells_no assert pre_merge_tbl2_last_row assert post_merge_tbl1_last_row == pre_merge_tbl1_last_row + pre_merge_tbl2_last_row
def test_get_relationships_for_type(caplog): # existing relationships p = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(p, "data/gib.json")) as f: j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) page = t_document.pages[0] new_block = t2.TBlock(id=str(uuid4())) t_document.add_block(new_block) page.add_ids_to_relationships([new_block.id]) assert t_document.get_block_by_id(new_block.id) == new_block #empty relationships t_document: t2.TDocument = t2.TDocument() t_document.add_block(t2.TBlock(id=str(uuid4()), block_type="PAGE")) page = t_document.pages[0] new_block = t2.TBlock(id=str(uuid4())) t_document.add_block(new_block) page.add_ids_to_relationships([new_block.id]) assert t_document.get_block_by_id(new_block.id) == new_block
def test_pipeline_merge_tables(): p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib_multi_page_table_merge.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) tbl_id1 = '5685498d-d196-42a7-8b40-594d6d886ca9' tbl_id2 = 'a9191a66-0d32-4d36-8fd6-58e6917f4ea6' tbl_id3 = 'e0368543-c9c3-4616-bd6c-f25e66c859b2' pre_merge_tbl1_cells_no = len( t_document.get_block_by_id( tbl_id1).relationships[0].ids) # type: ignore pre_merge_tbl2_cells_no = len( t_document.get_block_by_id( tbl_id2).relationships[0].ids) # type: ignore pre_merge_tbl3_cells_no = len( t_document.get_block_by_id( tbl_id3).relationships[0].ids) # type: ignore t_document = pipeline_merge_tables(t_document, MergeOptions.MERGE, None, HeaderFooterType.NONE) post_merge_tbl1_cells_no = len( t_document.get_block_by_id( tbl_id1).relationships[0].ids) # type: ignore assert post_merge_tbl1_cells_no == pre_merge_tbl1_cells_no + pre_merge_tbl2_cells_no + pre_merge_tbl3_cells_no
def test_custom_page_orientation(json_response): doc = Document(json_response) assert 1 == len(doc.pages) lines = [line for line in doc.pages[0].lines] assert 22 == len(lines) words = [word for line in lines for word in line.words] assert 53 == len(words) t_document: t2.TDocument = t2.TDocumentSchema().load(json_response) t_document.custom = {'orientation': 180} new_t_doc_json = t2.TDocumentSchema().dump(t_document) assert "Custom" in new_t_doc_json assert "orientation" in new_t_doc_json["Custom"] assert new_t_doc_json["Custom"]["orientation"] == 180 p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = add_page_orientation(t_document) assert -1 < t_document.pages[0].custom['Orientation'] < 2 p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib_10_degrees.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = add_page_orientation(t_document) assert 5 < t_document.pages[0].custom['Orientation'] < 15 p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib__15_degrees.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = add_page_orientation(t_document) assert 10 < t_document.pages[0].custom['Orientation'] < 20 p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib__25_degrees.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = add_page_orientation(t_document) assert 17 < t_document.pages[0].custom['Orientation'] < 30 p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib__180_degrees.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = add_page_orientation(t_document) assert 170 < t_document.pages[0].custom['Orientation'] < 190 p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib__270_degrees.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = add_page_orientation(t_document) assert -100 < t_document.pages[0].custom['Orientation'] < -80 p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib__90_degrees.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = add_page_orientation(t_document) assert 80 < t_document.pages[0].custom['Orientation'] < 100 p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib__minus_10_degrees.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = add_page_orientation(t_document) assert -10 < t_document.pages[0].custom['Orientation'] < 5 doc = t1.Document(t2.TDocumentSchema().dump(t_document)) for page in doc.pages: assert page.custom['Orientation']