def test_tblock_order_block_by_geo_multi_page(): p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib_multi_page_tables.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = order_blocks_by_geo(t_document) doc = t1.Document(t2.TDocumentSchema().dump(t_document)) assert "Page 1 - Value 1.1.1" == doc.pages[0].tables[0].rows[0].cells[0].text.strip() assert "Page 1 - Value 2.1.1" == doc.pages[0].tables[1].rows[0].cells[0].text.strip()
def ExecuteTableValidations(t_doc: t2.TDocument, header_footer_type: HeaderFooterType, accuracy_percentage: float): """ Invoke validations for first and last tables on all pages recursively """ page_compare_proc = 0 table_ids_to_merge = {} table_ids_merge_list = [] from trp.t_pipeline import order_blocks_by_geo ordered_doc = order_blocks_by_geo(t_doc) trp_doc = trp.Document(TDocumentSchema().dump(ordered_doc)) for current_page in trp_doc.pages: if (page_compare_proc >= len(trp_doc.pages) - 1): break if (len(current_page.tables) == 0 or len(current_page.tables) == 0): page_compare_proc += 1 break current_page_table = current_page.tables[len(current_page.tables) - 1] next_page = trp_doc.pages[page_compare_proc + 1] next_page_table = next_page.tables[0] result_1 = __validate_objects_between_tables(current_page, current_page_table, next_page, next_page_table, header_footer_type) if (result_1): result_2_1 = __compare_table_column_numbers( current_page_table, next_page_table) result_2_2 = __compare_table_headers(current_page_table, next_page_table) if (result_2_1 or result_2_2): result3 = __compare_table_dimensions(current_page_table, next_page_table, accuracy_percentage) if (result3): table_ids_to_merge[ next_page_table.id] = current_page_table.id if (table_ids_merge_list): if (any(merge_pairs[1] == current_page_table.id for merge_pairs in table_ids_merge_list)): table_ids_merge_list[len(table_ids_merge_list) - 1].append(next_page_table.id) else: table_ids_merge_list.append( [current_page_table.id, next_page_table.id]) page_compare_proc += 1 return table_ids_merge_list
def test_adjust_bounding_boxes_and_polygons_to_orientation(): # p = os.path.dirname(os.path.realpath(__file__)) # f = open(os.path.join(p, "data/gib.json")) # j = json.load(f) # t_document: t2.TDocument = t2.TDocumentSchema().load(j) # t_document = add_page_orientation(t_document) # doc = t1.Document(t2.TDocumentSchema().dump(t_document)) # key = "Date:" # fields = doc.pages[0].form.searchFieldsByKey(key) # for field in fields: # print(f"Field: Key: {field.key}, Value: {field.value}, Geo: {field.geometry} ") p = os.path.dirname(os.path.realpath(__file__)) f = open(os.path.join(p, "data/gib__180_degrees.json")) j = json.load(f) t_document: t2.TDocument = t2.TDocumentSchema().load(j) t_document = add_page_orientation(t_document) new_order = order_blocks_by_geo(t_document) doc = t1.Document(t2.TDocumentSchema().dump(t_document))