def test_textblock(): i = Interval(4, 5, axis="y") q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]])) r = Rectangle(3, 3, 5, 6) t = TextBlock(i, id=1, type=2, text="12") assert (t.relative_to(q).condition_on(q).block == i.put_on_canvas( q).to_quadrilateral()) t.area t = TextBlock(r, id=1, type=2, parent="a") assert t.relative_to(i).condition_on(i).block == r t.area t = TextBlock(q, id=1, type=2, parent="a") assert t.relative_to(r).condition_on(r).block == q t.area # Ensure the operations did not change the object itself assert t == TextBlock(q, id=1, type=2, parent="a") t1 = TextBlock(q, id=1, type=2, parent="a") t2 = TextBlock(i, id=1, type=2, text="12") t1.relative_to(t2) assert t2.is_in(t1) t = TextBlock(q, score=0.2)
def test_json(): i = Interval(1, 2, "y", canvas_height=5) r = Rectangle(1, 2, 3, 4) q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400) l = Layout([i, r, q], page_data={"width": 200, "height": 200}) i2 = TextBlock(i, "") r2 = TextBlock(r, id=24) q2 = TextBlock(q, text="test", parent=45) l2 = Layout([i2, r2, q2]) i3 = TextBlock(i, None) r3 = TextBlock(r, id=None) q3 = TextBlock(q, text=None, parent=None) l3 = Layout([i3, r3, q3], page_data={"width": 200, "height": 200}) # fmt: off assert i == load_dict(i.to_dict()) == load_json("tests/fixtures/io/interval.json") assert r == load_dict(r.to_dict()) == load_json("tests/fixtures/io/rectangle.json") assert q == load_dict(q.to_dict()) == load_json("tests/fixtures/io/quadrilateral.json") assert l == load_dict(l.to_dict()) == load_json("tests/fixtures/io/layout.json") assert i2 == load_dict(i2.to_dict()) == load_json("tests/fixtures/io/interval_textblock.json") assert r2 == load_dict(r2.to_dict()) == load_json("tests/fixtures/io/rectangle_textblock.json") assert q2 == load_dict(q2.to_dict()) == load_json("tests/fixtures/io/quadrilateral_textblock.json") assert l2 == load_dict(l2.to_dict()) == load_json("tests/fixtures/io/layout_textblock.json") # Test if LP can ignore the unused None features assert l == load_dict(l3.to_dict())
def test_layout(): i = Interval(4, 5, axis="y") q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]])) r = Rectangle(3, 3, 5, 6) t = TextBlock(i, id=1, type=2, text="12") l = Layout([i, q, r]) l.get_texts() l.condition_on(i) l.relative_to(q) l.filter_by(t) l.is_in(r) assert l.get_homogeneous_blocks() == [ i.to_quadrilateral(), q, r.to_quadrilateral() ] i2 = TextBlock(i, id=1, type=2, text="12") r2 = TextBlock(r, id=1, type=2, parent="a") q2 = TextBlock(q, id=1, type=2, next="a") l2 = Layout([i2, r2, q2], page_data={"width": 200, "height": 200}) l2.get_texts() l2.get_info("next") l2.condition_on(i) l2.relative_to(q) l2.filter_by(t) l2.is_in(r) l2.scale(4) l2.shift(4) l2.pad(left=2) # Test slicing function homogeneous_blocks = l2[:2].get_homogeneous_blocks() assert homogeneous_blocks[0].block == i.to_rectangle() assert homogeneous_blocks[1].block == r # Test appending and extending assert l + [i2] == Layout([i, q, r, i2]) assert l + l == Layout([i, q, r] * 2) l.append(i) assert l == Layout([i, q, r, i]) l2.extend([q]) assert l2 == Layout([i2, r2, q2, q], page_data={ "width": 200, "height": 200 }) # Test addition l + l2 with pytest.raises(ValueError): l.page_data = {"width": 200, "height": 400} l + l2
def test_quadrilateral_relations(): i = Interval(4, 5, axis="y") q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]])) r = Rectangle(3, 3, 5, 6) assert not q.is_in(r) assert q.is_in(i, soft_margin={"top": 2, "bottom": 2}) assert q.is_in(r, soft_margin={ "left": 1, "top": 1, "right": 1, "bottom": 1 }) assert q.is_in(q) # convert to absolute then convert back to relative assert q.condition_on(i).relative_to(i) == q assert q.condition_on(r).relative_to(r) == q assert q.condition_on(q).relative_to(q) == q # convert to relative then convert back to absolute assert q.relative_to(i).condition_on(i) == q assert q.relative_to(r).condition_on(r) == q assert q.relative_to(q).condition_on(q) == q
def test_layout(): i = Interval(4, 5, axis='y') q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]])) r = Rectangle(3, 3, 5, 6) t = TextBlock(i, id=1, type=2, text="12") l = Layout([i, q, r]) l.get_texts() l.condition_on(i) l.relative_to(q) l.filter_by(t) l.is_in(r) l = Layout([ TextBlock(i, id=1, type=2, text="12"), TextBlock(r, id=1, type=2, parent="a"), TextBlock(q, id=1, type=2, next="a") ]) l.get_texts() l.get_info('next') l.condition_on(i) l.relative_to(q) l.filter_by(t) l.is_in(r) l.scale(4) l.shift(4) l.pad(left=2)
def test_csv(): i = Interval(1, 2, "y", canvas_height=5) r = Rectangle(1, 2, 3, 4) q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400) l = Layout([i, r, q], page_data={"width": 200, "height": 200}) _l = load_csv("tests/fixtures/io/layout.csv") assert _l != l _l.page_data = {"width": 200, "height": 200} assert _l == l i2 = TextBlock(i, "") r2 = TextBlock(r, id=24) q2 = TextBlock(q, text="test", parent=45) l2 = Layout([i2, r2, q2]) _l2 = load_csv("tests/fixtures/io/layout_textblock.csv") assert _l2 == l2
def test_rectangle_relations(): i = Interval(4, 5, axis="y") q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]])) r = Rectangle(3, 3, 5, 6) assert not r.is_in(q) assert r.is_in(q, soft_margin={"bottom": 1}) assert r.is_in(q.to_rectangle()) assert r.is_in(q.to_interval()) # convert to absolute then convert back to relative assert r.condition_on(i).relative_to(i) == r assert r.condition_on(r).relative_to(r) == r assert r.condition_on(q).relative_to(q) == r.to_quadrilateral() # convert to relative then convert back to absolute assert r.relative_to(i).condition_on(i) == r assert r.relative_to(r).condition_on(r) == r assert r.relative_to(q).condition_on(q) == r.to_quadrilateral()
import json import numpy as np from layoutparser.elements import Interval, Rectangle, Quadrilateral, TextBlock, Layout if __name__ == "__main__": i = Interval(1, 2, "y", canvas_height=5) r = Rectangle(1, 2, 3, 4) q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400) l = Layout([i, r, q], page_data={"width": 200, "height": 200}) with open("interval.json", "w") as fp: json.dump(i.to_dict(), fp) with open("rectangle.json", "w") as fp: json.dump(r.to_dict(), fp) with open("quadrilateral.json", "w") as fp: json.dump(q.to_dict(), fp) with open("layout.json", "w") as fp: json.dump(l.to_dict(), fp) l.to_dataframe().to_csv("layout.csv", index=None) i2 = TextBlock(i, "") r2 = TextBlock(r, id=24) q2 = TextBlock(q, text="test", parent=45) l2 = Layout([i2, r2, q2]) with open("interval_textblock.json", "w") as fp: json.dump(i2.to_dict(), fp) with open("rectangle_textblock.json", "w") as fp: json.dump(r2.to_dict(), fp) with open("quadrilateral_textblock.json", "w") as fp:
def test_interval_relations(): i = Interval(4, 5, axis="y") r = Rectangle(3, 3, 5, 6) q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]])) assert i.is_in(i) assert i.is_in(r) assert i.is_in(q) # convert to absolute then convert back to relative assert i.condition_on(i).relative_to(i) == i assert i.condition_on(r).relative_to(r) == i.put_on_canvas( r).to_rectangle() assert i.condition_on(q).relative_to(q) == i.put_on_canvas( q).to_quadrilateral() # convert to relative then convert back to absolute assert i.relative_to(i).condition_on(i) == i assert i.relative_to(r).condition_on(r) == i.put_on_canvas( r).to_rectangle() assert i.relative_to(q).condition_on(q) == i.put_on_canvas( q).to_quadrilateral()
def test_interval(): i = Interval(1, 2, axis="y", canvas_height=30, canvas_width=400) i.to_rectangle() i.to_quadrilateral() assert i.shift(1) == Interval(2, 3, axis="y", canvas_height=30, canvas_width=400) assert i.area == 1 * 400 i = Interval(1, 2, axis="x") assert i.shift([1, 2]) == Interval(2, 3, axis="x") assert i.scale([2, 1]) == Interval(2, 4, axis="x") assert i.pad(left=10, right=20) == Interval(0, 22, axis="x") # Test the safe_mode assert i.pad(left=10, right=20, safe_mode=False) == Interval(-9, 22, axis="x") assert i.area == 0 img = np.random.randint(12, 24, (40, 40)) img[:, 10:20] = 0 i = Interval(5, 11, axis="x") assert np.unique(i.crop_image(img)[:, -1]) == np.array([0])
def test_dict(): i = Interval(1, 2, "y", canvas_height=5) i_dict = { "block_type": "interval", "start": 1, "end": 2, "axis": "y", "canvas_height": 5, "canvas_width": 0, } assert i.to_dict() == i_dict assert i == Interval.from_dict(i_dict) r = Rectangle(1, 2, 3, 4) r_dict = { "block_type": "rectangle", "x_1": 1, "y_1": 2, "x_2": 3, "y_2": 4 } assert r.to_dict() == r_dict assert r == Rectangle.from_dict(r_dict) q = Quadrilateral(np.arange(8).reshape(4, 2), 200, 400) q_dict = { "block_type": "quadrilateral", "points": [0, 1, 2, 3, 4, 5, 6, 7], "height": 200, "width": 400, } assert q.to_dict() == q_dict assert q == Quadrilateral.from_dict(q_dict) l = Layout([i, r, q], page_data={"width": 200, "height": 200}) l_dict = { "page_data": { "width": 200, "height": 200 }, "blocks": [i_dict, r_dict, q_dict], } assert l.to_dict() == l_dict i2 = TextBlock(i, "") i_dict["text"] = "" assert i2.to_dict() == i_dict assert i2 == TextBlock.from_dict(i_dict) r2 = TextBlock(r, id=24) r_dict["id"] = 24 assert r2.to_dict() == r_dict assert r2 == TextBlock.from_dict(r_dict) q2 = TextBlock(q, text="test", parent=45) q_dict["text"] = "test" q_dict["parent"] = 45 assert q2.to_dict() == q_dict assert q2 == TextBlock.from_dict(q_dict) l2 = Layout([i2, r2, q2]) l2_dict = {"page_data": {}, "blocks": [i_dict, r_dict, q_dict]} assert l2.to_dict() == l2_dict
def test_shape_operations(): i_1 = Interval(1, 2, axis="y", canvas_height=30, canvas_width=400) i_2 = TextBlock(Interval(1, 2, axis="x")) i_3 = Interval(1, 2, axis="y") r_1 = Rectangle(0.5, 0.5, 2.5, 1.5) r_2 = TextBlock(Rectangle(0.5, 0.5, 2, 2.5)) q_1 = Quadrilateral([[1, 1], [2.5, 1.2], [2.5, 3], [1.5, 3]]) q_2 = TextBlock(Quadrilateral([[0.5, 0.5], [2, 1], [1.5, 2.5], [0.5, 2]])) # I and I in different axes assert i_1.intersect(i_1) == i_1 assert i_1.intersect(i_2) == Rectangle(1, 1, 2, 2) assert i_1.intersect(i_3) == i_1 # Ensure intersect copy the canvas size assert i_1.union(i_1) == i_1 with pytest.raises(InvalidShapeError): assert i_1.union(i_2) == Rectangle(1, 1, 2, 2) # I and R in different axes assert i_1.intersect(r_1) == Rectangle(0.5, 1, 2.5, 1.5) assert i_2.intersect(r_1).block == Rectangle(1, 0.5, 2, 1.5) assert i_1.union(r_1) == Rectangle(0.5, 0.5, 2.5, 2) assert i_2.union(r_1).block == r_1 # I and Q in strict mode with pytest.raises(NotSupportedShapeError): i_1.intersect(q_1) i_1.union(q_1) # I and Q in different axes assert i_1.intersect(q_1, strict=False) == Rectangle(1, 1, 2.5, 2) assert i_1.union(q_1, strict=False) == Rectangle(1, 1, 2.5, 3) assert i_2.intersect(q_1, strict=False).block == Rectangle(1, 1, 2, 3) assert i_2.union(q_1, strict=False).block == Rectangle(1, 1, 2.5, 3) # R and I assert r_1.intersect(i_1) == i_1.intersect(r_1) # R and R assert r_1.intersect(r_2) == r_2.intersect(r_1).block == Rectangle( 0.5, 0.5, 2, 1.5) assert r_1.union(r_2) == r_2.union(r_1).block == Rectangle( 0.5, 0.5, 2.5, 2.5) # R and Q with pytest.raises(NotSupportedShapeError): r_1.intersect(q_1) r_1.union(q_1) assert r_1.intersect(q_1, strict=False) == Rectangle(1, 1, 2.5, 1.5) assert r_1.union(q_1, strict=False) == Rectangle(0.5, 0.5, 2.5, 3) assert r_1.intersect(q_2, strict=False) == r_1.intersect(q_2.to_rectangle()) assert r_1.union(q_2, strict=False) == r_1.union(q_2.to_rectangle()) # Q and others in strict mode with pytest.raises(NotSupportedShapeError): q_1.intersect(i_1) q_1.intersect(r_1) q_1.intersect(q_2) # Q and I assert q_1.intersect(i_1, strict=False) == i_1.intersect(q_1, strict=False) assert q_1.union(i_1, strict=False) == i_1.union(q_1, strict=False) # Q and R assert q_1.intersect(r_1, strict=False) == r_1.intersect(q_1, strict=False) assert q_1.union(r_1, strict=False) == r_1.union(q_1, strict=False) # Q and R assert q_1.intersect(q_2, strict=False) == q_2.intersect(q_1, strict=False).block assert q_1.intersect(q_2, strict=False) == Rectangle(1, 1, 2, 2.5) assert q_1.union(q_2, strict=False) == q_2.union(q_1, strict=False).block assert q_1.union(q_2, strict=False) == Rectangle(0.5, 0.5, 2.5, 3)
def test_textblock(): i = Interval(4, 5, axis="y") q = Quadrilateral(np.array([[2, 2], [6, 2], [6, 7], [2, 5]])) r = Rectangle(3, 3, 5, 6) t = TextBlock(i, id=1, type=2, text="12") assert (t.relative_to(q).condition_on(q).block == i.put_on_canvas( q).to_quadrilateral()) t.area t = TextBlock(r, id=1, type=2, parent="a") assert t.relative_to(i).condition_on(i).block == r t.area t = TextBlock(q, id=1, type=2, parent="a") assert t.relative_to(r).condition_on(r).block == q t.area # Ensure the operations did not change the object itself assert t == TextBlock(q, id=1, type=2, parent="a") t1 = TextBlock(q, id=1, type=2, parent="a") t2 = TextBlock(i, id=1, type=2, text="12") t1.relative_to(t2) assert t2.is_in(t1) t = TextBlock(q, score=0.2) # Additional test for shape conversion assert TextBlock(i, id=1, type=2, text="12").to_interval() == TextBlock(i, id=1, type=2, text="12") assert TextBlock(i, id=1, type=2, text="12").to_rectangle() == TextBlock(i.to_rectangle(), id=1, type=2, text="12") assert TextBlock(i, id=1, type=2, text="12").to_quadrilateral() == TextBlock( i.to_quadrilateral(), id=1, type=2, text="12") assert TextBlock(r, id=1, type=2, parent="a").to_interval(axis="x") == TextBlock( r.to_interval(axis="x"), id=1, type=2, parent="a") assert TextBlock(r, id=1, type=2, parent="a").to_interval(axis="y") == TextBlock( r.to_interval(axis="y"), id=1, type=2, parent="a") assert TextBlock(r, id=1, type=2, parent="a").to_rectangle() == TextBlock(r, id=1, type=2, parent="a") assert TextBlock(r, id=1, type=2, parent="a").to_quadrilateral() == TextBlock( r.to_quadrilateral(), id=1, type=2, parent="a") assert TextBlock(q, id=1, type=2, parent="a").to_interval(axis="x") == TextBlock( q.to_interval(axis="x"), id=1, type=2, parent="a") assert TextBlock(q, id=1, type=2, parent="a").to_interval(axis="y") == TextBlock( q.to_interval(axis="y"), id=1, type=2, parent="a") assert TextBlock(q, id=1, type=2, parent="a").to_rectangle() == TextBlock(q.to_rectangle(), id=1, type=2, parent="a") assert TextBlock(q, id=1, type=2, parent="a").to_quadrilateral() == TextBlock(q, id=1, type=2, parent="a") with pytest.raises(ValueError): TextBlock(q, id=1, type=2, parent="a").to_interval() TextBlock(r, id=1, type=2, parent="a").to_interval()
def test_df(): df = pd.DataFrame( columns=\ ["_identifier", "x_1", "y_1", "x_2", "y_2", "height", "width", "p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42"], data=[ ['_interval', None, 10, None, 12, 240, None, None, None, None, None, None, None, None, None ], ['_interval', 12, None, 24, None, 120, 50, None, None, None, None, None, None, None, None ], ['_interval', 0, 10, 0, 12, 120, 50, None, None, None, None, None, None, None, None ], # for fillna with 0 ['_rectangle', 12, 32, 24, 55, None, None, None, None, None, None, None, None, None, None ], ['_rectangle', 12, 32, 24, 55, 0, 0, None, None, None, None, None, None, None, None ], ['_quadrilateral',None,None, None, None, None, None, 1, 2, 3, 2, 3, 6, 1, 4 ], ['_quadrilateral',None,None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4 ], ['_quadrilateral',0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4 ], ] ) layout = Layout.from_dataframe(df) assert layout[0] == Interval(10, 12, 'y', canvas_height=240) assert layout[2] == Interval(10, 12, 'y', canvas_height=120, canvas_width=50) assert layout[3] == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55) assert layout[3] == layout[4] assert not layout[5] == Quadrilateral(np.arange(8).reshape(4, -1)) assert layout[6] == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) df = pd.DataFrame( columns=\ ["_identifier", "x_1", "y_1", "x_2", "y_2", "height", "width", "p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42", 'next', 'parent'], data=[ ['_interval', None, 10, None, 12, 240, None, None, None, None, None, None, None, None, None, None, None ], ['_interval', 12, None, 24, None, 120, 50, None, None, None, None, None, None, None, None, None, None ], ['_interval', 0, 10, 0, 12, 120, 50, None, None, None, None, None, None, None, None, None, 24 ], # for fillna with 0 ['_rectangle', 12, 32, 24, 55, None, None, None, None, None, None, None, None, None, None, None, None ], ['_rectangle', 12, 32, 24, 55, 0, 0, None, None, None, None, None, None, None, None, 12, None ], ['_quadrilateral',None,None, None, None, None, None, 1, 2, 3, 2, 3, 6, 1, 4, None, None ], ['_quadrilateral',None,None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4, None, None ], ['_textblock', None,None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4, None, 28 ], ] ) layout = Layout.from_dataframe(df) assert layout[0] == Interval(10, 12, 'y', canvas_height=240) assert layout[2] == Interval(10, 12, 'y', canvas_height=120, canvas_width=50) assert layout[3] == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55) assert layout[3] == layout[4] assert layout[6] == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[-1].block == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[-1].parent == 28 df = pd.DataFrame( columns=\ ["x_1", "y_1", "x_2", "y_2", "height", "width", "p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42", 'next', 'parent'], data=[ [None, 10, None, 12, 240, None, None, None, None, None, None, None, None, None, None, None ], [12, None, 24, None, 120, 50, None, None, None, None, None, None, None, None, None, None ], [0, 10, 0, 12, 120, 50, None, None, None, None, None, None, None, None, None, 24 ], # for fillna with 0 [12, 32, 24, 55, None, None, None, None, None, None, None, None, None, None, None, None ], [12, 32, 24, 55, None, None, None, None, None, None, None, None, None, None, 12, None ], [None, None, None, None, None, None, 1, 2, 3, 2, 3, 6, 1, 4, None, None ], [None, None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4, None, None ], [None, None, None, None, 0, 0, 1, 2, 3, 2, 3, 6, 1, 4, None, 28 ], ] ) layout = Layout.from_dataframe(df) assert layout[0].block == Interval(10, 12, 'y', canvas_height=240) assert layout[2].block == Interval(10, 12, 'y', canvas_height=120, canvas_width=50) assert layout[3].block == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55) assert not layout[3] == layout[4] assert layout[6].block == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[-1].block == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[-1].parent == 28 df = pd.DataFrame( columns=\ ["x_1", "y_1", "x_2", "y_2"], data=[ [0, 10, 0, 12, ], [12, 32, 24, 55, ], ]) layout = Layout.from_dataframe(df) assert layout[1] == Rectangle(x_1=12, y_1=32, x_2=24, y_2=55) df = pd.DataFrame( columns=\ ["x_1", "y_1", "x_2", "y_2", "height", "width"], data=[ [0, 10, 0, 12, 240, 520 ], [12, None, 24, None, 240, None ], ]) layout = Layout.from_dataframe(df) assert layout[1] == Interval(12, 24, 'x', canvas_height=240) df = pd.DataFrame( columns=\ ["p11", "p12", "p21", "p22", "p31", "p32", "p41", "p42", 'width', 'height'], data=[ [1, 2, 3, 2, 3, 6, 1, 4, None, None ], [1, 2, 3, 2, 3, 6, 1, 4, None, None ], [1, 2, 3, 2, 3, 6, 1, 4, None, 28 ], ]) layout = Layout.from_dataframe(df) assert layout[1] == Quadrilateral( np.array([[1, 2], [3, 2], [3, 6], [1, 4]])) assert layout[2] == Quadrilateral(np.array([[1, 2], [3, 2], [3, 6], [1, 4]]), height=28)