def test_json(): i = Interval(1, 2, "y", canvas_height=5) r = Rectangle(1, 2, 3, 4) q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400) l = Layout([i, r, q], page_data={"width": 200, "height": 200}) i2 = TextBlock(i, "") r2 = TextBlock(r, id=24) q2 = TextBlock(q, text="test", parent=45) l2 = Layout([i2, r2, q2]) i3 = TextBlock(i, None) r3 = TextBlock(r, id=None) q3 = TextBlock(q, text=None, parent=None) l3 = Layout([i3, r3, q3], page_data={"width": 200, "height": 200}) # fmt: off assert i == load_dict(i.to_dict()) == load_json("tests/fixtures/io/interval.json") assert r == load_dict(r.to_dict()) == load_json("tests/fixtures/io/rectangle.json") assert q == load_dict(q.to_dict()) == load_json("tests/fixtures/io/quadrilateral.json") assert l == load_dict(l.to_dict()) == load_json("tests/fixtures/io/layout.json") assert i2 == load_dict(i2.to_dict()) == load_json("tests/fixtures/io/interval_textblock.json") assert r2 == load_dict(r2.to_dict()) == load_json("tests/fixtures/io/rectangle_textblock.json") assert q2 == load_dict(q2.to_dict()) == load_json("tests/fixtures/io/quadrilateral_textblock.json") assert l2 == load_dict(l2.to_dict()) == load_json("tests/fixtures/io/layout_textblock.json") # Test if LP can ignore the unused None features assert l == load_dict(l3.to_dict())
def test_csv(): i = Interval(1, 2, "y", canvas_height=5) r = Rectangle(1, 2, 3, 4) q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400) l = Layout([i, r, q], page_data={"width": 200, "height": 200}) _l = load_csv("tests/fixtures/io/layout.csv") assert _l != l _l.page_data = {"width": 200, "height": 200} assert _l == l i2 = TextBlock(i, "") r2 = TextBlock(r, id=24) q2 = TextBlock(q, text="test", parent=45) l2 = Layout([i2, r2, q2]) _l2 = load_csv("tests/fixtures/io/layout_textblock.csv") assert _l2 == l2
import json import numpy as np from layoutparser.elements import Interval, Rectangle, Quadrilateral, TextBlock, Layout if __name__ == "__main__": i = Interval(1, 2, "y", canvas_height=5) r = Rectangle(1, 2, 3, 4) q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400) l = Layout([i, r, q], page_data={"width": 200, "height": 200}) with open("interval.json", "w") as fp: json.dump(i.to_dict(), fp) with open("rectangle.json", "w") as fp: json.dump(r.to_dict(), fp) with open("quadrilateral.json", "w") as fp: json.dump(q.to_dict(), fp) with open("layout.json", "w") as fp: json.dump(l.to_dict(), fp) l.to_dataframe().to_csv("layout.csv", index=None) i2 = TextBlock(i, "") r2 = TextBlock(r, id=24) q2 = TextBlock(q, text="test", parent=45) l2 = Layout([i2, r2, q2]) with open("interval_textblock.json", "w") as fp: json.dump(i2.to_dict(), fp) with open("rectangle_textblock.json", "w") as fp: json.dump(r2.to_dict(), fp) with open("quadrilateral_textblock.json", "w") as fp:
def test_dict(): i = Interval(1, 2, "y", canvas_height=5) i_dict = { "block_type": "interval", "start": 1, "end": 2, "axis": "y", "canvas_height": 5, "canvas_width": 0, } assert i.to_dict() == i_dict assert i == Interval.from_dict(i_dict) r = Rectangle(1, 2, 3, 4) r_dict = { "block_type": "rectangle", "x_1": 1, "y_1": 2, "x_2": 3, "y_2": 4 } assert r.to_dict() == r_dict assert r == Rectangle.from_dict(r_dict) q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400) q_dict = { "block_type": "quadrilateral", "points": [0, 1, 2, 3, 4, 5, 6, 7], "height": 200, "width": 400, } assert q.to_dict() == q_dict assert q == Quadrilateral.from_dict(q_dict) l = Layout([i, r, q], page_data={"width": 200, "height": 200}) l_dict = { "page_data": { "width": 200, "height": 200 }, "blocks": [i_dict, r_dict, q_dict], } assert l.to_dict() == l_dict i2 = TextBlock(i, "") i_dict["text"] = "" assert i2.to_dict() == i_dict assert i2 == TextBlock.from_dict(i_dict) r2 = TextBlock(r, id=24) r_dict["id"] = 24 assert r2.to_dict() == r_dict assert r2 == TextBlock.from_dict(r_dict) q2 = TextBlock(q, text="test", parent=45) q_dict["text"] = "test" q_dict["parent"] = 45 assert q2.to_dict() == q_dict assert q2 == TextBlock.from_dict(q_dict) l2 = Layout([i2, r2, q2]) l2_dict = {"page_data": {}, "blocks": [i_dict, r_dict, q_dict]} assert l2.to_dict() == l2_dict
def test_layout(): i = Interval(4, 5, axis="y") q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]])) r = Rectangle(3, 3, 5, 6) t = TextBlock(i, id=1, type=2, text="12") l = Layout([i, q, r]) l.get_texts() l.condition_on(i) l.relative_to(q) l.filter_by(t) l.is_in(r) assert l.get_homogeneous_blocks() == [ i.to_quadrilateral(), q, r.to_quadrilateral() ] i2 = TextBlock(i, id=1, type=2, text="12") r2 = TextBlock(r, id=1, type=2, parent="a") q2 = TextBlock(q, id=1, type=2, next="a") l2 = Layout([i2, r2, q2], page_data={"width": 200, "height": 200}) l2.get_texts() l2.get_info("next") l2.condition_on(i) l2.relative_to(q) l2.filter_by(t) l2.is_in(r) l2.scale(4) l2.shift(4) l2.pad(left=2) # Test slicing function homogeneous_blocks = l2[:2].get_homogeneous_blocks() assert homogeneous_blocks[0].block == i.to_rectangle() assert homogeneous_blocks[1].block == r # Test appending and extending assert l + [i2] == Layout([i, q, r, i2]) assert l + l == Layout([i, q, r] * 2) l.append(i) assert l == Layout([i, q, r, i]) l2.extend([q]) assert l2 == Layout([i2, r2, q2, q], page_data={ "width": 200, "height": 200 }) # Test addition l + l2 with pytest.raises(ValueError): l.page_data = {"width": 200, "height": 400} l + l2
def test_df(): df = pd.DataFrame( columns=\ ["_identifier", "x_1", "y_1", "x_2", "y_2", "height", "width", "p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42"], data=[ ['_interval', None, 10, None, 12, 240, None, None, None, None, None, None, None, None, None ], ['_interval', 12, None, 24, None, 120, 50, None, None, None, None, None, None, None, None ], ['_interval', 0, 10, 0, 12, 120, 50, None, None, None, None, None, None, None, None ], # for fillna with 0 ['_rectangle', 12, 32, 24, 55, None, None, None, None, None, None, None, None, None, None ], ['_rectangle', 12, 32, 24, 55, 0, 0, None, None, None, None, None, None, None, None ], ['_quadrilateral',None,None, None, None, None, None, 1, 2, 3, 2, 3, 6, 1, 4 ], ['_quadrilateral',None,None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4 ], ['_quadrilateral',0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4 ], ] ) layout = Layout.from_dataframe(df) assert layout[0] == Interval(10, 12, 'y', canvas_height=240) assert layout[2] == Interval(10, 12, 'y', canvas_height=120, canvas_width=50) assert layout[3] == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55) assert layout[3] == layout[4] assert not layout[5] == Quadrilateral(np.arange(8).reshape(4, -1)) assert layout[6] == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) df = pd.DataFrame( columns=\ ["_identifier", "x_1", "y_1", "x_2", "y_2", "height", "width", "p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42", 'next', 'parent'], data=[ ['_interval', None, 10, None, 12, 240, None, None, None, None, None, None, None, None, None, None, None ], ['_interval', 12, None, 24, None, 120, 50, None, None, None, None, None, None, None, None, None, None ], ['_interval', 0, 10, 0, 12, 120, 50, None, None, None, None, None, None, None, None, None, 24 ], # for fillna with 0 ['_rectangle', 12, 32, 24, 55, None, None, None, None, None, None, None, None, None, None, None, None ], ['_rectangle', 12, 32, 24, 55, 0, 0, None, None, None, None, None, None, None, None, 12, None ], ['_quadrilateral',None,None, None, None, None, None, 1, 2, 3, 2, 3, 6, 1, 4, None, None ], ['_quadrilateral',None,None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4, None, None ], ['_textblock', None,None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4, None, 28 ], ] ) layout = Layout.from_dataframe(df) assert layout[0] == Interval(10, 12, 'y', canvas_height=240) assert layout[2] == Interval(10, 12, 'y', canvas_height=120, canvas_width=50) assert layout[3] == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55) assert layout[3] == layout[4] assert layout[6] == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[-1].block == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[-1].parent == 28 df = pd.DataFrame( columns=\ ["x_1", "y_1", "x_2", "y_2", "height", "width", "p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42", 'next', 'parent'], data=[ [None, 10, None, 12, 240, None, None, None, None, None, None, None, None, None, None, None ], [12, None, 24, None, 120, 50, None, None, None, None, None, None, None, None, None, None ], [0, 10, 0, 12, 120, 50, None, None, None, None, None, None, None, None, None, 24 ], # for fillna with 0 [12, 32, 24, 55, None, None, None, None, None, None, None, None, None, None, None, None ], [12, 32, 24, 55, None, None, None, None, None, None, None, None, None, None, 12, None ], [None, None, None, None, None, None, 1, 2, 3, 2, 3, 6, 1, 4, None, None ], [None, None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4, None, None ], [None, None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4, None, 28 ], ] ) layout = Layout.from_dataframe(df) assert layout[0].block == Interval(10, 12, 'y', canvas_height=240) assert layout[2].block == Interval(10, 12, 'y', canvas_height=120, canvas_width=50) assert layout[3].block == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55) assert not layout[3] == layout[4] assert layout[6].block == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[-1].block == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[-1].parent == 28 df = pd.DataFrame( columns=\ ["x_1", "y_1", "x_2", "y_2"], data=[ [0, 10, 0, 12, ], [12, 32, 24, 55, ], ]) layout = Layout.from_dataframe(df) assert layout[1] == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55) df = pd.DataFrame( columns=\ ["x_1", "y_1", "x_2", "y_2", "height", "width"], data=[ [0, 10, 0, 12, 240, 520 ], [12, None, 24, None, 240, None ], ]) layout = Layout.from_dataframe(df) assert layout[1] == Interval(12, 24, 'x', canvas_height=240) df = pd.DataFrame( columns=\ ["p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42", 'width', 'height'], data=[ [1, 2, 3, 2, 3, 6, 1, 4, None, None ], [1, 2, 3, 2, 3, 6, 1, 4, None, None ], [1, 2, 3, 2, 3, 6, 1, 4, None, 28 ], ]) layout = Layout.from_dataframe(df) assert layout[1] == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[2] == Quadrilateral(np.array([[1, 2], [3, 2], [3, 6], [1, 4]]), height=28)
def test_layout(): i = Interval(4, 5, axis='y') q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]])) r = Rectangle(3, 3, 5, 6) t = TextBlock(i, id=1, type=2, text="12") l = Layout([i, q, r]) l.get_texts() l.condition_on(i) l.relative_to(q) l.filter_by(t) l.is_in(r) l = Layout([ TextBlock(i, id=1, type=2, text="12"), TextBlock(r, id=1, type=2, parent="a"), TextBlock(q, id=1, type=2, next="a") ]) l.get_texts() l.get_info('next') l.condition_on(i) l.relative_to(q) l.filter_by(t) l.is_in(r) l.scale(4) l.shift(4) l.pad(left=2)