Example #1
0
def form_line(bounding_box, form_word):
    model = _models.FormLine(text="Word Word",
                             bounding_box=bounding_box[0],
                             words=[form_word[0], form_word[0]],
                             page_number=1)
    model_repr = "FormLine(text=Word Word, bounding_box={}, words=[{}, {}], page_number=1)".format(
        bounding_box[1], form_word[1], form_word[1])[:1024]
    assert repr(model) == model_repr
    return model, model_repr
Example #2
0
def form_line(bounding_box, form_word):
    appearance = Appearance(style=Style(name="other", confidence=1.0))
    model = _models.FormLine(text="Word Word",
                             bounding_box=bounding_box[0],
                             words=[form_word[0], form_word[0]],
                             page_number=1,
                             appearance=appearance)
    model_repr = "FormLine(text=Word Word, bounding_box={}, words=[{}, {}], page_number=1, kind=line, appearance={})".format(
        bounding_box[1], form_word[1], form_word[1], appearance)[:1024]
    assert repr(model) == model_repr
    return model, model_repr
    def test_form_line_to_dict(self):
        form_line = _models.FormLine(
            text="sample line",
            bounding_box=[
                _models.Point(1427.0, 1669.0),
                _models.Point(1527.0, 1669.0),
                _models.Point(1527.0, 1698.0),
                _models.Point(1427.0, 1698.0),
            ],
            words=[
                _models.FormWord(
                    text="sample",
                    confidence=0.92,
                    page_number=1,
                    bounding_box=[
                        _models.Point(1427.0, 1669.0),
                        _models.Point(1527.0, 1669.0),
                        _models.Point(1527.0, 1698.0),
                        _models.Point(1427.0, 1698.0),
                    ],
                ),
                _models.FormWord(
                    text="line",
                    confidence=0.92,
                    page_number=1,
                    bounding_box=[
                        _models.Point(1427.0, 1669.0),
                        _models.Point(1527.0, 1669.0),
                        _models.Point(1527.0, 1698.0),
                        _models.Point(1427.0, 1698.0),
                    ],
                ),
            ],
            page_number=2,
            appearance=_models.TextAppearance(
                style_name="other", style_confidence=0.90
            ),
        )

        d = form_line.to_dict()
        final = {
            "text": "sample line",
            "bounding_box": [
                {"x": 1427.0, "y": 1669.0},
                {"x": 1527.0, "y": 1669.0},
                {"x": 1527.0, "y": 1698.0},
                {"x": 1427.0, "y": 1698.0},
            ],
            "words": [
                {
                    "text": "sample",
                    "bounding_box": [
                        {"x": 1427.0, "y": 1669.0},
                        {"x": 1527.0, "y": 1669.0},
                        {"x": 1527.0, "y": 1698.0},
                        {"x": 1427.0, "y": 1698.0},
                    ],
                    "confidence": 0.92,
                    "page_number": 1,
                    "kind": "word",
                },
                {
                    "text": "line",
                    "bounding_box": [
                        {"x": 1427.0, "y": 1669.0},
                        {"x": 1527.0, "y": 1669.0},
                        {"x": 1527.0, "y": 1698.0},
                        {"x": 1427.0, "y": 1698.0},
                    ],
                    "confidence": 0.92,
                    "page_number": 1,
                    "kind": "word",
                },
            ],
            "page_number": 2,
            "kind": "line",
            "appearance": {"style_name": "other", "style_confidence": 0.90},
        }
        assert d == final
 def test_form_page_to_dict(self):
     form_page = _models.FormPage(
         page_number=1,
         text_angle=180.0,
         width=5.5,
         height=8.0,
         unit="pixel",
         tables= [
             _models.FormTable(
                 page_number=2,
                 cells=[
                     _models.FormTableCell(
                         text="info",
                         row_index=1,
                         column_index=3,
                         row_span=1,
                         column_span=2,
                         bounding_box=[
                                 _models.Point(1427.0, 1669.0),
                                 _models.Point(1527.0, 1669.0),
                                 _models.Point(1527.0, 1698.0),
                                 _models.Point(1427.0, 1698.0),
                             ],
                         confidence=0.87,
                         is_header=False,
                         is_footer=True,
                         page_number=1,
                         field_elements=[
                             _models.FormWord(
                                 text="word",
                                 confidence=0.92,
                                 page_number=1,
                                 bounding_box=[
                                     _models.Point(1427.0, 1669.0),
                                     _models.Point(1527.0, 1669.0),
                                     _models.Point(1527.0, 1698.0),
                                     _models.Point(1427.0, 1698.0),
                                 ],
                             ),
                         ]
                     )
                 ],
                 row_count=10,
                 column_count=5,
                 bounding_box=[
                     _models.Point(1427.0, 1669.0),
                     _models.Point(1527.0, 1669.0),
                     _models.Point(1527.0, 1698.0),
                     _models.Point(1427.0, 1698.0),
                 ],
             ),
         ],
         lines=[_models.FormLine(
                 text="sample line",
                 bounding_box=[
                     _models.Point(1427.0, 1669.0),
                     _models.Point(1527.0, 1669.0),
                     _models.Point(1527.0, 1698.0),
                     _models.Point(1427.0, 1698.0),
                 ],
                 words=[
                     _models.FormWord(
                         text="sample",
                         confidence=0.92,
                         page_number=1,
                         bounding_box=[
                             _models.Point(1427.0, 1669.0),
                             _models.Point(1527.0, 1669.0),
                             _models.Point(1527.0, 1698.0),
                             _models.Point(1427.0, 1698.0),
                         ],
                     ),
                     _models.FormWord(
                         text="line",
                         confidence=0.92,
                         page_number=1,
                         bounding_box=[
                             _models.Point(1427.0, 1669.0),
                             _models.Point(1527.0, 1669.0),
                             _models.Point(1527.0, 1698.0),
                             _models.Point(1427.0, 1698.0),
                         ],
                     ),
                 ],
                 page_number=2,
                 appearance=_models.TextAppearance(
                     style_name="other", style_confidence=0.90
                 ),
             ),
         ],
         selection_marks=[_models.FormSelectionMark(
                 text="checkbox",
                 state="selected",
                 confidence=0.92,
                 page_number=1,
                 bounding_box=[
                     _models.Point(1427.0, 1669.0),
                     _models.Point(1527.0, 1669.0),
                     _models.Point(1527.0, 1698.0),
                     _models.Point(1427.0, 1698.0),
                 ],
             ),
         ],
         )
     d = form_page.to_dict()
     final = {
         "page_number": 1,
         "text_angle": 180.0,
         "width": 5.5,
         "height": 8.0,
         "unit": "pixel",
         "tables": [
             {"cells": [
                 {
                     "text": "info",
                     "bounding_box": [
                         {"x": 1427.0, "y": 1669.0},
                         {"x": 1527.0, "y": 1669.0},
                         {"x": 1527.0, "y": 1698.0},
                         {"x": 1427.0, "y": 1698.0},
                     ],
                     "row_index": 1,
                     "column_index": 3,
                     "row_span": 1,
                     "column_span": 2,
                     "confidence": 0.87,
                     "is_header": False,
                     "is_footer": True,
                     "page_number": 1,
                     "field_elements": [
                         {
                             "text": "word",
                             "bounding_box": [
                                 {"x": 1427.0, "y": 1669.0},
                                 {"x": 1527.0, "y": 1669.0},
                                 {"x": 1527.0, "y": 1698.0},
                                 {"x": 1427.0, "y": 1698.0},
                             ],
                             "confidence": 0.92,
                             "page_number": 1,
                             "kind": "word",
                         }
                     ],
                 },
             ],
             "page_number": 2,
             "row_count": 10,
             "column_count": 5,
             "bounding_box": [
                 {"x": 1427.0, "y": 1669.0},
                 {"x": 1527.0, "y": 1669.0},
                 {"x": 1527.0, "y": 1698.0},
                 {"x": 1427.0, "y": 1698.0},
             ],
         },
         ],
         "lines": [{
             "text": "sample line",
             "bounding_box": [
                 {"x": 1427.0, "y": 1669.0},
                 {"x": 1527.0, "y": 1669.0},
                 {"x": 1527.0, "y": 1698.0},
                 {"x": 1427.0, "y": 1698.0},
             ],
             "words": [
                 {
                     "text": "sample",
                     "bounding_box": [
                         {"x": 1427.0, "y": 1669.0},
                         {"x": 1527.0, "y": 1669.0},
                         {"x": 1527.0, "y": 1698.0},
                         {"x": 1427.0, "y": 1698.0},
                     ],
                     "confidence": 0.92,
                     "page_number": 1,
                     "kind": "word",
                 },
                 {
                     "text": "line",
                     "bounding_box": [
                         {"x": 1427.0, "y": 1669.0},
                         {"x": 1527.0, "y": 1669.0},
                         {"x": 1527.0, "y": 1698.0},
                         {"x": 1427.0, "y": 1698.0},
                     ],
                     "confidence": 0.92,
                     "page_number": 1,
                     "kind": "word",
                 },
             ],
             "page_number": 2,
             "kind": "line",
             "appearance": {"style_name": "other", "style_confidence": 0.90},
         }],
         "selection_marks": [{
             "text": "checkbox",
             "state": "selected",
             "bounding_box": [
                 {"x": 1427.0, "y": 1669.0},
                 {"x": 1527.0, "y": 1669.0},
                 {"x": 1527.0, "y": 1698.0},
                 {"x": 1427.0, "y": 1698.0},
             ],
             "confidence": 0.92,
             "page_number": 1,
             "kind": "selectionMark",
         }],
     }
     assert d == final
    def test_recognized_form_to_dict(self):
        form = _models.RecognizedForm(
            form_type="test_form",
            form_type_confidence="0.84",
            model_id="examplemodel123",
            page_range=_models.FormPageRange(1, 1),
            fields={
                "example": _models.FormField(
                    value_type="phoneNumber",
                    label_data=_models.FieldData(
                        text="phone",
                        page_number=1,
                        bounding_box=[
                            _models.Point(1427.0, 1669.0),
                            _models.Point(1527.0, 1669.0),
                            _models.Point(1527.0, 1698.0),
                            _models.Point(1427.0, 1698.0),
                        ],
                    ),
                    value_data=_models.FieldData(
                        text="55554444",
                        page_number=1,
                        bounding_box=[
                            _models.Point(1427.0, 1669.0),
                            _models.Point(1527.0, 1669.0),
                            _models.Point(1527.0, 1698.0),
                            _models.Point(1427.0, 1698.0),
                        ],
                    ),
                    name="phone",
                    value="55554444",
                    confidence=0.99,
                )
            },
            pages=[_models.FormPage(
                page_number=1,
                text_angle=180.0,
                width=5.5,
                height=8.0,
                unit="pixel",
                lines=[_models.FormLine(
                        text="sample line",
                        bounding_box=[
                            _models.Point(1427.0, 1669.0),
                            _models.Point(1527.0, 1669.0),
                            _models.Point(1527.0, 1698.0),
                            _models.Point(1427.0, 1698.0),
                        ],
                        words=[
                            _models.FormWord(
                                text="sample",
                                confidence=0.92,
                                page_number=1,
                                bounding_box=[
                                    _models.Point(1427.0, 1669.0),
                                    _models.Point(1527.0, 1669.0),
                                    _models.Point(1527.0, 1698.0),
                                    _models.Point(1427.0, 1698.0),
                                ],
                            ),
                            _models.FormWord(
                                text="line",
                                confidence=0.92,
                                page_number=1,
                                bounding_box=[
                                    _models.Point(1427.0, 1669.0),
                                    _models.Point(1527.0, 1669.0),
                                    _models.Point(1527.0, 1698.0),
                                    _models.Point(1427.0, 1698.0),
                                ],
                            ),
                        ],
                        page_number=2,
                        appearance=_models.TextAppearance(
                            style_name="other", style_confidence=0.90
                        ),
                    )],
                )
            ]
        )

        d = form.to_dict()
        final = {
            "form_type": "test_form",
            "form_type_confidence": "0.84",
            "model_id": "examplemodel123",
            "page_range": {"first_page_number": 1, "last_page_number": 1},
            "fields": { 
                "example": {
                    "value_type": "phoneNumber",
                    "label_data": {
                        "text": "phone",
                        "bounding_box": [
                            {"x": 1427.0, "y": 1669.0},
                            {"x": 1527.0, "y": 1669.0},
                            {"x": 1527.0, "y": 1698.0},
                            {"x": 1427.0, "y": 1698.0},
                        ],
                        "page_number": 1,
                        "field_elements": []
                    },
                    "value_data": {
                        "text": "55554444",
                        "bounding_box": [
                            {"x": 1427.0, "y": 1669.0},
                            {"x": 1527.0, "y": 1669.0},
                            {"x": 1527.0, "y": 1698.0},
                            {"x": 1427.0, "y": 1698.0},
                        ],
                        "page_number": 1,
                        "field_elements": []
                    },
                    "name": "phone",
                    "value": "55554444",
                    "confidence": 0.99,
                }
            },
            "pages": [{
                "page_number": 1,
                "text_angle": 180.0,
                "width": 5.5,
                "height": 8.0,
                "unit": "pixel",
                "lines": [{
                    "text": "sample line",
                    "bounding_box": [
                        {"x": 1427.0, "y": 1669.0},
                        {"x": 1527.0, "y": 1669.0},
                        {"x": 1527.0, "y": 1698.0},
                        {"x": 1427.0, "y": 1698.0},
                    ],
                    "words": [
                        {
                            "text": "sample",
                            "bounding_box": [
                                {"x": 1427.0, "y": 1669.0},
                                {"x": 1527.0, "y": 1669.0},
                                {"x": 1527.0, "y": 1698.0},
                                {"x": 1427.0, "y": 1698.0},
                            ],
                            "confidence": 0.92,
                            "page_number": 1,
                            "kind": "word",
                        },
                        {
                            "text": "line",
                            "bounding_box": [
                                {"x": 1427.0, "y": 1669.0},
                                {"x": 1527.0, "y": 1669.0},
                                {"x": 1527.0, "y": 1698.0},
                                {"x": 1427.0, "y": 1698.0},
                            ],
                            "confidence": 0.92,
                            "page_number": 1,
                            "kind": "word",
                        },
                    ],
                    "page_number": 2,
                    "kind": "line",
                    "appearance": {"style_name": "other", "style_confidence": 0.90},
                }],    
                "selection_marks": [],
                "tables": [],
            }],
        }
        assert d == final