Ejemplo n.º 1
0
 def __init__(self, name, tree, url):
     self.name = name
     self.url = url
     self.blocks = get_textblocks(tree)
     self.weight = sum( b.weight for b in self.blocks )
     #self.weight_noanchor = sum( b.weight_noanchor for b in self.blocks )
     self.anchor_strs = []
     self.add_anchor_strs()
     return
Ejemplo n.º 2
0
 def identify_layout(self, tree, pat_threshold=0.9, strict=True):
     top = (None, None)
     blocks = get_textblocks(tree)
     if 2 <= self.debug:
         tree.dump()
     max_weight = sum( b.weight for b in blocks ) * pat_threshold
     for pat1 in self.pats:
         layout = pat1.match_blocks(blocks, strict=strict)
         if layout:
             weight = sum( sect.weight for sect in layout )
             if max_weight < weight:
                 top = (pat1, layout)
                 max_weight = weight
     return top