コード例 #1
0
def test_tblock_order_block_by_geo_multi_page():
    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib_multi_page_tables.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = order_blocks_by_geo(t_document)
    doc = t1.Document(t2.TDocumentSchema().dump(t_document))
    assert "Page 1 - Value 1.1.1" == doc.pages[0].tables[0].rows[0].cells[0].text.strip()
    assert "Page 1 - Value 2.1.1" == doc.pages[0].tables[1].rows[0].cells[0].text.strip()
コード例 #2
0
def ExecuteTableValidations(t_doc: t2.TDocument,
                            header_footer_type: HeaderFooterType,
                            accuracy_percentage: float):
    """
    Invoke validations for first and last tables on all pages recursively
    """
    page_compare_proc = 0
    table_ids_to_merge = {}
    table_ids_merge_list = []
    from trp.t_pipeline import order_blocks_by_geo
    ordered_doc = order_blocks_by_geo(t_doc)
    trp_doc = trp.Document(TDocumentSchema().dump(ordered_doc))

    for current_page in trp_doc.pages:
        if (page_compare_proc >= len(trp_doc.pages) - 1):
            break
        if (len(current_page.tables) == 0 or len(current_page.tables) == 0):
            page_compare_proc += 1
            break
        current_page_table = current_page.tables[len(current_page.tables) - 1]
        next_page = trp_doc.pages[page_compare_proc + 1]
        next_page_table = next_page.tables[0]
        result_1 = __validate_objects_between_tables(current_page,
                                                     current_page_table,
                                                     next_page,
                                                     next_page_table,
                                                     header_footer_type)
        if (result_1):
            result_2_1 = __compare_table_column_numbers(
                current_page_table, next_page_table)
            result_2_2 = __compare_table_headers(current_page_table,
                                                 next_page_table)
            if (result_2_1 or result_2_2):
                result3 = __compare_table_dimensions(current_page_table,
                                                     next_page_table,
                                                     accuracy_percentage)
                if (result3):
                    table_ids_to_merge[
                        next_page_table.id] = current_page_table.id
                    if (table_ids_merge_list):
                        if (any(merge_pairs[1] == current_page_table.id
                                for merge_pairs in table_ids_merge_list)):
                            table_ids_merge_list[len(table_ids_merge_list) -
                                                 1].append(next_page_table.id)
                    else:
                        table_ids_merge_list.append(
                            [current_page_table.id, next_page_table.id])
        page_compare_proc += 1
    return table_ids_merge_list
コード例 #3
0
def test_adjust_bounding_boxes_and_polygons_to_orientation():
    # p = os.path.dirname(os.path.realpath(__file__))
    # f = open(os.path.join(p, "data/gib.json"))
    # j = json.load(f)
    # t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    # t_document = add_page_orientation(t_document)
    # doc = t1.Document(t2.TDocumentSchema().dump(t_document))
    # key = "Date:"
    # fields = doc.pages[0].form.searchFieldsByKey(key)
    # for field in fields:
    #     print(f"Field: Key: {field.key}, Value: {field.value}, Geo: {field.geometry} ")

    p = os.path.dirname(os.path.realpath(__file__))
    f = open(os.path.join(p, "data/gib__180_degrees.json"))
    j = json.load(f)
    t_document: t2.TDocument = t2.TDocumentSchema().load(j)
    t_document = add_page_orientation(t_document)
    new_order = order_blocks_by_geo(t_document)
    doc = t1.Document(t2.TDocumentSchema().dump(t_document))