def get_neighbors_below(obj, objs, page): neighbors = [] for c_obj in objs: if c_obj is obj or utils.is_center_aligned(c_obj, page): continue if (utils.is_in_direction(obj, c_obj, BELOW_ANGLE) and (utils.is_center_aligned(obj, c_obj) or utils.is_hor_aligned(obj, c_obj)) and not utils.has_elements_between(obj, c_obj, objs)): neighbors.append(c_obj) return neighbors
def merge_layouts(layout): BELOW_ANGLE = math.radians(270) def get_neighbors_below(obj, objs, page): neighbors = [] for c_obj in objs: if c_obj is obj or utils.is_center_aligned(c_obj, page): continue if (utils.is_in_direction(obj, c_obj, BELOW_ANGLE) and (utils.is_center_aligned(obj, c_obj) or utils.is_hor_aligned(obj, c_obj)) and not utils.has_elements_between(obj, c_obj, objs)): neighbors.append(c_obj) return neighbors layout_objs = layout._objs #layout_objs = filter(lambda x: isinstance(x, LTTextContainer), layout._objs) has_updated = True while has_updated: has_updated = False layout_map = {} for l in layout_objs: if l not in layout_map: g = LTTextGroupLRTB([l]) layout_map[l] = g if utils.is_center_aligned(l, layout): continue else: g = layout_map[l] neighbors = get_neighbors_below(l, layout_objs, layout) x_ordered_neighbors = sorted(neighbors, key=lambda x: x.x0) for neighbor in x_ordered_neighbors: if neighbor not in g._objs: has_updated = True g.add(neighbor) layout_map[neighbor] = g layout_objs = sorted(set(layout_map.values()), key=lambda x: -x.y1) return layout_objs