def __init__(self, name, tree, encoder=None): self.name = name self.blocks = get_textblocks(tree, encoder) self.weight = sum(b.weight for b in self.blocks) #self.weight_noanchor = sum( b.weight_noanchor for b in self.blocks ) self.anchor_strs = [] return
def __init__(self, name, tree, encoder=None): self.name = name self.blocks = get_textblocks(tree, encoder) self.weight = sum( b.weight for b in self.blocks ) #self.weight_noanchor = sum( b.weight_noanchor for b in self.blocks ) self.anchor_strs = [] return
def identify_layout(self, tree, pat_threshold, strict=True): top = (None, None) blocks = get_textblocks(tree, encoder=self.encoder) if 2 <= self.debug: tree.dump() max_weight = sum(b.weight for b in blocks) * pat_threshold for pat1 in self.pats: layout = pat1.match_blocks(blocks, strict=strict) if layout: weight = sum(sect.weight for sect in layout) if max_weight < weight: top = (pat1, layout) max_weight = weight return top