class Page: def __init__(self, filename): self.bb = Box((0,0),(0,0)) self.words = [] self.read_from(filename) def add_word(self, wordbox): self.words.append(wordbox) self.bb.join_with(wordbox) def read_from(self, filename): #print('reading [%s]...' % filename); stdout.flush() with open(filename) as f: getnum = lambda match, label: float(match.group(label)) data = [(getnum(m,'vpos'),getnum(m,'hpos'), getnum(m,'height'),getnum(m,'width')) for m in p.finditer(f.read())] for y,x,h,w in data: self.add_word(Box([y,x],[y+h,x+w]))