def test_json():

    i = Interval(1, 2, "y", canvas_height=5)
    r = Rectangle(1, 2, 3, 4)
    q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400)
    l = Layout([i, r, q], page_data={"width": 200, "height": 200})

    i2 = TextBlock(i, "")
    r2 = TextBlock(r, id=24)
    q2 = TextBlock(q, text="test", parent=45)
    l2 = Layout([i2, r2, q2])

    i3 = TextBlock(i, None)
    r3 = TextBlock(r, id=None)
    q3 = TextBlock(q, text=None, parent=None)
    l3 = Layout([i3, r3, q3], page_data={"width": 200, "height": 200})

    # fmt: off
    assert i == load_dict(i.to_dict()) == load_json("tests/fixtures/io/interval.json")
    assert r == load_dict(r.to_dict()) == load_json("tests/fixtures/io/rectangle.json")
    assert q == load_dict(q.to_dict()) == load_json("tests/fixtures/io/quadrilateral.json")
    assert l == load_dict(l.to_dict()) == load_json("tests/fixtures/io/layout.json")

    assert i2 == load_dict(i2.to_dict()) == load_json("tests/fixtures/io/interval_textblock.json")
    assert r2 == load_dict(r2.to_dict()) == load_json("tests/fixtures/io/rectangle_textblock.json")
    assert q2 == load_dict(q2.to_dict()) == load_json("tests/fixtures/io/quadrilateral_textblock.json")
    assert l2 == load_dict(l2.to_dict()) == load_json("tests/fixtures/io/layout_textblock.json")

    # Test if LP can ignore the unused None features 
    assert l == load_dict(l3.to_dict())
Example #2
0
def test_csv():
    i = Interval(1, 2, "y", canvas_height=5)
    r = Rectangle(1, 2, 3, 4)
    q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400)
    l = Layout([i, r, q], page_data={"width": 200, "height": 200})

    _l = load_csv("tests/fixtures/io/layout.csv")
    assert _l != l
    _l.page_data = {"width": 200, "height": 200}
    assert _l == l

    i2 = TextBlock(i, "")
    r2 = TextBlock(r, id=24)
    q2 = TextBlock(q, text="test", parent=45)
    l2 = Layout([i2, r2, q2])

    _l2 = load_csv("tests/fixtures/io/layout_textblock.csv")
    assert _l2 == l2
Example #3
0
import json
import numpy as np
from layoutparser.elements import Interval, Rectangle, Quadrilateral, TextBlock, Layout

if __name__ == "__main__":

    i = Interval(1, 2, "y", canvas_height=5)
    r = Rectangle(1, 2, 3, 4)
    q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400)
    l = Layout([i, r, q], page_data={"width": 200, "height": 200})

    with open("interval.json", "w") as fp:
        json.dump(i.to_dict(), fp)
    with open("rectangle.json", "w") as fp:
        json.dump(r.to_dict(), fp)
    with open("quadrilateral.json", "w") as fp:
        json.dump(q.to_dict(), fp)
    with open("layout.json", "w") as fp:
        json.dump(l.to_dict(), fp)
    l.to_dataframe().to_csv("layout.csv", index=None)

    i2 = TextBlock(i, "")
    r2 = TextBlock(r, id=24)
    q2 = TextBlock(q, text="test", parent=45)
    l2 = Layout([i2, r2, q2])

    with open("interval_textblock.json", "w") as fp:
        json.dump(i2.to_dict(), fp)
    with open("rectangle_textblock.json", "w") as fp:
        json.dump(r2.to_dict(), fp)
    with open("quadrilateral_textblock.json", "w") as fp:
Example #4
0
def test_dict():

    i = Interval(1, 2, "y", canvas_height=5)
    i_dict = {
        "block_type": "interval",
        "start": 1,
        "end": 2,
        "axis": "y",
        "canvas_height": 5,
        "canvas_width": 0,
    }
    assert i.to_dict() == i_dict
    assert i == Interval.from_dict(i_dict)

    r = Rectangle(1, 2, 3, 4)
    r_dict = {
        "block_type": "rectangle",
        "x_1": 1,
        "y_1": 2,
        "x_2": 3,
        "y_2": 4
    }
    assert r.to_dict() == r_dict
    assert r == Rectangle.from_dict(r_dict)

    q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400)
    q_dict = {
        "block_type": "quadrilateral",
        "points": [0, 1, 2, 3, 4, 5, 6, 7],
        "height": 200,
        "width": 400,
    }
    assert q.to_dict() == q_dict
    assert q == Quadrilateral.from_dict(q_dict)

    l = Layout([i, r, q], page_data={"width": 200, "height": 200})
    l_dict = {
        "page_data": {
            "width": 200,
            "height": 200
        },
        "blocks": [i_dict, r_dict, q_dict],
    }
    assert l.to_dict() == l_dict

    i2 = TextBlock(i, "")
    i_dict["text"] = ""
    assert i2.to_dict() == i_dict
    assert i2 == TextBlock.from_dict(i_dict)

    r2 = TextBlock(r, id=24)
    r_dict["id"] = 24
    assert r2.to_dict() == r_dict
    assert r2 == TextBlock.from_dict(r_dict)

    q2 = TextBlock(q, text="test", parent=45)
    q_dict["text"] = "test"
    q_dict["parent"] = 45
    assert q2.to_dict() == q_dict
    assert q2 == TextBlock.from_dict(q_dict)

    l2 = Layout([i2, r2, q2])
    l2_dict = {"page_data": {}, "blocks": [i_dict, r_dict, q_dict]}
    assert l2.to_dict() == l2_dict
Example #5
0
def test_layout():
    i = Interval(4, 5, axis="y")
    q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]]))
    r = Rectangle(3, 3, 5, 6)
    t = TextBlock(i, id=1, type=2, text="12")

    l = Layout([i, q, r])
    l.get_texts()
    l.condition_on(i)
    l.relative_to(q)
    l.filter_by(t)
    l.is_in(r)
    assert l.get_homogeneous_blocks() == [
        i.to_quadrilateral(), q, r.to_quadrilateral()
    ]

    i2 = TextBlock(i, id=1, type=2, text="12")
    r2 = TextBlock(r, id=1, type=2, parent="a")
    q2 = TextBlock(q, id=1, type=2, next="a")
    l2 = Layout([i2, r2, q2], page_data={"width": 200, "height": 200})

    l2.get_texts()
    l2.get_info("next")
    l2.condition_on(i)
    l2.relative_to(q)
    l2.filter_by(t)
    l2.is_in(r)

    l2.scale(4)
    l2.shift(4)
    l2.pad(left=2)

    # Test slicing function
    homogeneous_blocks = l2[:2].get_homogeneous_blocks()
    assert homogeneous_blocks[0].block == i.to_rectangle()
    assert homogeneous_blocks[1].block == r

    # Test appending and extending
    assert l + [i2] == Layout([i, q, r, i2])
    assert l + l == Layout([i, q, r] * 2)
    l.append(i)
    assert l == Layout([i, q, r, i])
    l2.extend([q])
    assert l2 == Layout([i2, r2, q2, q],
                        page_data={
                            "width": 200,
                            "height": 200
                        })

    # Test addition
    l + l2
    with pytest.raises(ValueError):
        l.page_data = {"width": 200, "height": 400}
        l + l2
Example #6
0
def test_df():

    df = pd.DataFrame(
        columns=\
            ["_identifier", "x_1", "y_1", "x_2", "y_2", "height", "width", "p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42"],
        data=[
            ['_interval',   None,  10,    None,  12,     240,      None,   None,  None,  None,  None,  None,  None,  None,  None ],
            ['_interval',   12,    None,  24,    None,   120,      50,     None,  None,  None,  None,  None,  None,  None,  None ],
            ['_interval',   0,     10,    0,     12,     120,      50,     None,  None,  None,  None,  None,  None,  None,  None ], # for fillna with 0
            ['_rectangle',  12,    32,    24,    55,     None,     None,   None,  None,  None,  None,  None,  None,  None,  None ],
            ['_rectangle',  12,    32,    24,    55,     0,        0,      None,  None,  None,  None,  None,  None,  None,  None ],
            ['_quadrilateral',None,None,  None,  None,   None,     None,   1,     2,     3,     2,     3,     6,     1,     4    ],
            ['_quadrilateral',None,None,  None,  None,   0,        0,      1,     2,     3,     2,     3,     6,     1,     4    ],
            ['_quadrilateral',0,   0,     0,     0,      0,        0,      1,     2,     3,     2,     3,     6,     1,     4    ],
        ]
    )

    layout = Layout.from_dataframe(df)
    assert layout[0] == Interval(10, 12, 'y', canvas_height=240)
    assert layout[2] == Interval(10,
                                 12,
                                 'y',
                                 canvas_height=120,
                                 canvas_width=50)

    assert layout[3] == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55)
    assert layout[3] == layout[4]

    assert not layout[5] == Quadrilateral(np.arange(8).reshape(4, -1))
    assert layout[6] == Quadrilateral(
        np.array([[1, 2], [3, 2], [3, 6], [1, 4]]))

    df = pd.DataFrame(
        columns=\
            ["_identifier", "x_1", "y_1", "x_2", "y_2", "height", "width", "p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42", 'next', 'parent'],
        data=[
            ['_interval',   None,  10,    None,  12,     240,      None,   None,  None,  None,  None,  None,  None,  None,  None,  None,   None    ],
            ['_interval',   12,    None,  24,    None,   120,      50,     None,  None,  None,  None,  None,  None,  None,  None,  None,   None    ],
            ['_interval',   0,     10,    0,     12,     120,      50,     None,  None,  None,  None,  None,  None,  None,  None,  None,   24      ],
                # for fillna with 0
            ['_rectangle',  12,    32,    24,    55,     None,     None,   None,  None,  None,  None,  None,  None,  None,  None,  None,   None    ],
            ['_rectangle',  12,    32,    24,    55,     0,        0,      None,  None,  None,  None,  None,  None,  None,  None,  12,     None    ],
            ['_quadrilateral',None,None,  None,  None,   None,     None,   1,     2,     3,     2,     3,     6,     1,     4,     None,   None    ],
            ['_quadrilateral',None,None,  None,  None,   0,        0,      1,     2,     3,     2,     3,     6,     1,     4,     None,   None    ],
            ['_textblock',  None,None,  None,  None,     0,        0,      1,     2,     3,     2,     3,     6,     1,     4,     None,   28      ],
        ]
    )

    layout = Layout.from_dataframe(df)
    assert layout[0] == Interval(10, 12, 'y', canvas_height=240)
    assert layout[2] == Interval(10,
                                 12,
                                 'y',
                                 canvas_height=120,
                                 canvas_width=50)

    assert layout[3] == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55)
    assert layout[3] == layout[4]
    assert layout[6] == Quadrilateral(
        np.array([[1, 2], [3, 2], [3, 6], [1, 4]]))

    assert layout[-1].block == Quadrilateral(
        np.array([[1, 2], [3, 2], [3, 6], [1, 4]]))
    assert layout[-1].parent == 28


    df = pd.DataFrame(
        columns=\
            ["x_1", "y_1", "x_2", "y_2", "height", "width", "p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42", 'next', 'parent'],
        data=[
            [None,  10,    None,  12,     240,      None,   None,  None,  None,  None,  None,  None,  None,  None,  None,   None    ],
            [12,    None,  24,    None,   120,      50,     None,  None,  None,  None,  None,  None,  None,  None,  None,   None    ],
            [0,     10,    0,     12,     120,      50,     None,  None,  None,  None,  None,  None,  None,  None,  None,   24      ],
            # for fillna with 0
            [12,    32,    24,    55,     None,     None,   None,  None,  None,  None,  None,  None,  None,  None,  None,   None    ],
            [12,    32,    24,    55,     None,     None,   None,  None,  None,  None,  None,  None,  None,  None,  12,     None    ],
            [None,  None,  None,  None,   None,     None,   1,     2,     3,     2,     3,     6,     1,     4,     None,   None    ],
            [None,  None,  None,  None,   0,        0,      1,     2,     3,     2,     3,     6,     1,     4,     None,   None    ],
            [None,  None,  None,  None,   0,        0,      1,     2,     3,     2,     3,     6,     1,     4,     None,   28      ],
        ]
    )

    layout = Layout.from_dataframe(df)
    assert layout[0].block == Interval(10, 12, 'y', canvas_height=240)
    assert layout[2].block == Interval(10,
                                       12,
                                       'y',
                                       canvas_height=120,
                                       canvas_width=50)

    assert layout[3].block == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55)
    assert not layout[3] == layout[4]
    assert layout[6].block == Quadrilateral(
        np.array([[1, 2], [3, 2], [3, 6], [1, 4]]))

    assert layout[-1].block == Quadrilateral(
        np.array([[1, 2], [3, 2], [3, 6], [1, 4]]))
    assert layout[-1].parent == 28

    df = pd.DataFrame(
        columns=\
            ["x_1", "y_1", "x_2", "y_2"],
        data=[
            [0,     10,    0,     12,  ],
            [12,    32,    24,    55,  ],
        ])

    layout = Layout.from_dataframe(df)
    assert layout[1] == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55)


    df = pd.DataFrame(
        columns=\
            ["x_1", "y_1", "x_2", "y_2", "height", "width"],
        data=[
            [0,     10,    0,     12,    240,      520    ],
            [12,    None,  24,    None,  240,      None   ],
        ])

    layout = Layout.from_dataframe(df)
    assert layout[1] == Interval(12, 24, 'x', canvas_height=240)


    df = pd.DataFrame(
        columns=\
            ["p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42", 'width', 'height'],
        data=[
            [1,     2,     3,     2,     3,     6,     1,     4,     None,   None    ],
            [1,     2,     3,     2,     3,     6,     1,     4,     None,   None    ],
            [1,     2,     3,     2,     3,     6,     1,     4,     None,   28      ],
        ])

    layout = Layout.from_dataframe(df)
    assert layout[1] == Quadrilateral(
        np.array([[1, 2], [3, 2], [3, 6], [1, 4]]))
    assert layout[2] == Quadrilateral(np.array([[1, 2], [3, 2], [3, 6], [1,
                                                                         4]]),
                                      height=28)
Example #7
0
def test_layout():
    i = Interval(4, 5, axis='y')
    q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]]))
    r = Rectangle(3, 3, 5, 6)
    t = TextBlock(i, id=1, type=2, text="12")

    l = Layout([i, q, r])
    l.get_texts()
    l.condition_on(i)
    l.relative_to(q)
    l.filter_by(t)
    l.is_in(r)

    l = Layout([
        TextBlock(i, id=1, type=2, text="12"),
        TextBlock(r, id=1, type=2, parent="a"),
        TextBlock(q, id=1, type=2, next="a")
    ])
    l.get_texts()
    l.get_info('next')
    l.condition_on(i)
    l.relative_to(q)
    l.filter_by(t)
    l.is_in(r)

    l.scale(4)
    l.shift(4)
    l.pad(left=2)