Exemple #1
0
    def find_columns(self):
        """Get columns in a section of the image
        """

        portion = iulib.bytearray()
        iulib.extract_subimage(portion, self.inverted, 0, 0,
                self.inverted.dim(0), self.topptr)
        projection = high_pass_median(iulib.numpy(portion).sum(axis=1), 0.20)
        posscols = self.get_possible_columns(projection)
        bestcols = self.filter_columns(posscols, int(self._params.get("columns", 1)))
        self.columns.extend(bestcols)
    def find_columns(self):
        """Get columns in a section of the image
        """

        portion = iulib.bytearray()
        iulib.extract_subimage(portion, self.inverted, 0, 0,
                               self.inverted.dim(0), self.topptr)
        projection = high_pass_median(iulib.numpy(portion).sum(axis=1), 0.20)
        posscols = self.get_possible_columns(projection)
        bestcols = self.filter_columns(posscols,
                                       int(self._params.get("columns", 1)))
        self.columns.extend(bestcols)
Exemple #3
0
    def init(self):
        """Initialise on receipt of the input."""
        # pointer to the region that remains
        # to be segmented - starts at the top
        self.topptr = self.inarray.dim(1)
        
        # obtain an inverted version of the array
        self.inverted = iulib.bytearray()
        self.inverted.copy(self.inarray)
        iulib.binary_invert(self.inverted)
        self.calc_bounding_boxes()

        # list of extracted line rectangles
        self.textlines = []
        self.columns = []
    def init(self):
        """Initialise on receipt of the input."""
        # pointer to the region that remains
        # to be segmented - starts at the top
        self.topptr = self.inarray.dim(1)

        # obtain an inverted version of the array
        self.inverted = iulib.bytearray()
        self.inverted.copy(self.inarray)
        iulib.binary_invert(self.inverted)
        self.calc_bounding_boxes()

        # list of extracted line rectangles
        self.textlines = []
        self.columns = []
Exemple #5
0
def remove_border(narray, average_char_height):
    """Try and remove anything that's in a likely
    border region and return the subimage."""
    na = iulib.numpy(narray)
    hpr = na.sum(axis=0)
    vpr = na.sum(axis=1)
    hhp = high_pass_median(hpr, 5.0 / average_char_height)
    vhp = high_pass_median(vpr, 5.0 / average_char_height)

    vidx = vhp.nonzero()[0]
    hidx = hhp.nonzero()[0]

    b = iulib.bytearray()
    iulib.extract_subimage(b, narray, int(vidx[0]), int(hidx[0]),
            int(vidx[-1]), int(hidx[-1]))
    return b
def remove_border(narray, average_char_height):
    """Try and remove anything that's in a likely
    border region and return the subimage."""
    na = iulib.numpy(narray)
    hpr = na.sum(axis=0)
    vpr = na.sum(axis=1)
    hhp = high_pass_median(hpr, 5.0 / average_char_height)
    vhp = high_pass_median(vpr, 5.0 / average_char_height)

    vidx = vhp.nonzero()[0]
    hidx = hhp.nonzero()[0]

    b = iulib.bytearray()
    iulib.extract_subimage(b, narray, int(vidx[0]), int(hidx[0]),
                           int(vidx[-1]), int(hidx[-1]))
    return b
Exemple #7
0
    def find_lines(self):
        """
        Get lines in a section of the images.
        """
        for colrect in self.columns:
            newrect = Rectangle(colrect.x0, 0, colrect.x1, self.topptr)
            if newrect.area() < 1:
                continue
            portion = iulib.bytearray()
            iulib.extract_subimage(portion, self.inverted, *newrect.points())
            regions = get_lines_by_projection(
                portion, float(self._params.get("highpass")))
            plines = []
            for bottom, top in regions:
                height = top - bottom
                if height - self.avgheight < self.avgheight / 3:
                    continue
                plines.append(Rectangle(colrect.x0, bottom, colrect.x1, top))

            cpline = None
            clline = Rectangle(0, 0, 0, 0)
            charboxes = self.get_char_boxes(self.boxes)
            colboxes = [b for b in charboxes \
                    if b.overlaps(colrect.grow(10, 10))]
            colboxes.sort(lambda x, y: cmp(x.y1, y.y1))
            colboxes.reverse()

            clines = []
            for p in plines:
                clines.append(Rectangle(0, 0, 0, 0))

            while colboxes:
                char = colboxes.pop(0)
                cline = Rectangle(0, 0, 0, 0)
                for i in range(len(plines)):
                    pline = plines[i]
                    if char.overlaps(pline):
                        clines[i].include(char)
            self.textlines.extend(clines)
Exemple #8
0
    def find_lines(self):
        """
        Get lines in a section of the images.
        """
        for colrect in self.columns:
            newrect = Rectangle(colrect.x0, 0, colrect.x1, self.topptr)
            if newrect.area() < 1:
                continue
            portion = iulib.bytearray()
            iulib.extract_subimage(portion, self.inverted, *newrect.points())
            regions = get_lines_by_projection(portion, float(self._params.get("highpass")))
            plines = []
            for bottom, top in regions:
                height = top - bottom
                if height - self.avgheight < self.avgheight / 3:
                    continue
                plines.append(Rectangle(colrect.x0, bottom, colrect.x1, top))

            cpline = None
            clline = Rectangle(0, 0, 0, 0)
            charboxes = self.get_char_boxes(self.boxes)
            colboxes = [b for b in charboxes \
                    if b.overlaps(colrect.grow(10, 10))]
            colboxes.sort(lambda x, y: cmp(x.y1, y.y1))
            colboxes.reverse()

            clines = []
            for p in plines:
                clines.append(Rectangle(0, 0, 0, 0))

            while colboxes:
                char = colboxes.pop(0)
                cline = Rectangle(0, 0, 0, 0)
                for i in range(len(plines)):
                    pline = plines[i]
                    if char.overlaps(pline):
                        clines[i].include(char)
            self.textlines.extend(clines)