def get_average_line_height(top_bottoms): """Tricksy - get height of median line? """ lheights = [b - t for t, b in top_bottoms] lhm = numpy.max(lheights) def weight(val): return 0 if val < (lhm / 2) else 1 weights = numpy.vectorize(weight)(lheights) return numpy.average(numpy.array(lheights), weights=weights)
def calc_bounding_boxes(self): """Get bounding boxes if connected components.""" concomps = iulib.intarray() concomps.copy(self.inverted) iulib.label_components(concomps, False) bboxes = iulib.rectarray() iulib.bounding_boxes(bboxes, concomps) self.boxes = [] for i in range(bboxes.length()): if bboxes.at(i).area() > (self.inverted.dim(0) * self.inverted.dim(1) * 0.95): continue self.boxes.append(i2r(bboxes.at(i))) # get the average text height, excluding any %% self.avgheight = trimmed_mean(numpy.sort(numpy.array( [r.height() for r in self.boxes])), 5, 5) # remove large or weird boxes from the inverted images self.boxes = strip_non_chars(self.inverted, self.boxes, self.avgheight)
def calc_bounding_boxes(self): """Get bounding boxes if connected components.""" concomps = iulib.intarray() concomps.copy(self.inverted) iulib.label_components(concomps, False) bboxes = iulib.rectarray() iulib.bounding_boxes(bboxes, concomps) self.boxes = [] for i in range(bboxes.length()): if bboxes.at(i).area() > (self.inverted.dim(0) * self.inverted.dim(1) * 0.95): continue self.boxes.append(i2r(bboxes.at(i))) # get the average text height, excluding any %% self.avgheight = trimmed_mean( numpy.sort(numpy.array([r.height() for r in self.boxes])), 5, 5) # remove large or weird boxes from the inverted images self.boxes = strip_non_chars(self.inverted, self.boxes, self.avgheight)