Esempio n. 1
0
 def process(self, input):
     """
     Crop an image, using IULIB.  If any of
     the parameters are -1 or less, use the
     outer dimensions.
     """
     x0, y0 = 0, 0
     y1, x1 = input.shape
     try:
         x0 = int(self._params.get("x0", -1))
         if x0 < 0: x0 = 0
     except TypeError:
         pass
     try:
         y0 = int(self._params.get("y0", -1))
         if y0 < 0: y0 = 0
     except TypeError:
         pass
     try:
         x1 = int(self._params.get("x1", -1))
         if x1 < 0: x1 = input.shape[1]
     except TypeError:
         pass
     try:
         y1 = int(self._params.get("y1", -1))
         if y1 < 0: y1 = input.shape[0]
     except TypeError:
         pass
     # flip the coord system from HOCR to internal
     iy0 = input.shape[1] - y1
     iy1 = input.shape[1] - y0i
     iulibbin = ocrolib.numpy2narray(input)
     out = ocrolib.iulib.bytearray()
     ocrolib.iulib.extract_subimage(out, iulibbin, x0, iy0, x1, iy1)
     return ocrolib.narray2numpy(out)
Esempio n. 2
0
    def process(self, binary, boxes):
        """Recognize page text.

        input: tuple of binary, input boxes
        return: page data
        """
        self.prepare()
        pageheight, pagewidth = binary.shape
        iulibbin = ocrolib.numpy2narray(binary)
        out = dict(bbox=[0, 0, pagewidth, pageheight], lines=[])
        numlines = len(boxes.get("lines", []))
        for i in range(numlines):
            set_progress(self.logger, self.progress_func, i, numlines)
            coords = boxes.get("lines")[i]
            iulibcoords = (
                    coords[0], pageheight - coords[3], coords[2],
                    pageheight - coords[1])
            lineimage = ocrolib.iulib.bytearray()
            ocrolib.iulib.extract_subimage(lineimage, iulibbin, *iulibcoords)
            out["lines"].append(dict(
                    index=i+1,
                    bbox=[coords[0], coords[1], coords[2], coords[3]],
                    text=self.get_transcript(ocrolib.narray2numpy(lineimage)),
            ))
        set_progress(self.logger, self.progress_func, numlines, numlines)
        self.cleanup()
        return utils.hocr_from_data(out)
Esempio n. 3
0
 def process(self, input):
     """
     Crop an image, using IULIB.  If any of
     the parameters are -1 or less, use the
     outer dimensions.
     """
     x0, y0 = 0, 0
     y1, x1 = input.shape
     try:
         x0 = int(self._params.get("x0", -1))
         if x0 < 0: x0 = 0
     except TypeError: pass
     try:
         y0 = int(self._params.get("y0", -1))
         if y0 < 0: y0 = 0
     except TypeError: pass
     try:
         x1 = int(self._params.get("x1", -1))
         if x1 < 0: x1 = input.shape[1]
     except TypeError: pass
     try:
         y1 = int(self._params.get("y1", -1))
         if y1 < 0: y1 = input.shape[0]
     except TypeError: pass
     # flip the coord system from HOCR to internal
     iy0 = input.shape[1] - y1
     iy1 = input.shape[1] - y0i
     iulibbin = ocrolib.numpy2narray(input)
     out = ocrolib.iulib.bytearray()
     ocrolib.iulib.extract_subimage(out, iulibbin, x0, iy0, x1, iy1)
     return ocrolib.narray2numpy(out)
Esempio n. 4
0
    def process(self, binary, boxes):
        """
        Recognize page text.

        input: tuple of binary, input boxes
        return: page data
        """
        self.prepare()
        pageheight, pagewidth = binary.shape
        iulibbin = ocrolib.numpy2narray(binary)
        out = dict(bbox=[0, 0, pagewidth, pageheight], lines=[])
        numlines = len(boxes.get("lines", []))
        for i in range(numlines):
            set_progress(self.logger, self.progress_func, i, numlines)
            coords = boxes.get("lines")[i]
            iulibcoords = (
                    coords[0], pageheight - coords[3], coords[2],
                    pageheight - coords[1])
            lineimage = ocrolib.iulib.bytearray()
            ocrolib.iulib.extract_subimage(lineimage, iulibbin, *iulibcoords)
            out["lines"].append(dict(
                    index=i+1,
                    bbox=[coords[0], coords[1], coords[2], coords[3]],
                    text=self.get_transcript(ocrolib.narray2numpy(lineimage)),
            ))
        set_progress(self.logger, self.progress_func, numlines, numlines)
        self.cleanup()
        return utils.hocr_from_data(out)
Esempio n. 5
0
 def process(self, input):
     """Blockout an image, using PIL.  If any of
     the parameters are -1 or less, use the
     outer dimensions."""
     height = input.shape[0]
     pstr = self._params.get("boxes", "")
     coords = get_coords(pstr) 
     if len(coords) == 0:
         return input
     sancoords = sanitise_coords(coords, input.shape[1], input.shape[0]);
     flipcoords = [flip_coord(r, height) for r in sancoords]
     narray = ocrolib.numpy2narray(input)
     for rect in flipcoords:
         ocrolib.iulib.fill_rect(narray, rect.x0, rect.y0, rect.x1, rect.y1, 255)
     return ocrolib.narray2numpy(narray)
Esempio n. 6
0
 def process(self, input):
     """Blockout an image, using PIL.  If any of
     the parameters are -1 or less, use the
     outer dimensions."""
     height = input.shape[0]
     pstr = self._params.get("boxes", "")
     coords = get_coords(pstr)
     if len(coords) == 0:
         return input
     sancoords = sanitise_coords(coords, input.shape[1], input.shape[0])
     flipcoords = [flip_coord(r, height) for r in sancoords]
     narray = ocrolib.numpy2narray(input)
     for rect in flipcoords:
         ocrolib.iulib.fill_rect(narray, rect.x0, rect.y0, rect.x1, rect.y1,
                                 255)
     return ocrolib.narray2numpy(narray)
Esempio n. 7
0
    def process(self, binary, boxes):
        """Recognize page text.

        input: tuple of binary, input boxes
        return: page data
        """
        self.prepare()
        pageheight, pagewidth = binary.shape
        iulibbin = ocrolib.numpy2narray(binary)
        out = []  # list of hocr strings
        numcols = len(boxes.get("columns", []))
        for i in range(numcols):
            set_progress(self.logger, self.progress_func, i, numcols)
            coords = boxes.get("columns")[i]
            iulibcoords = (coords[0], pageheight - coords[3], coords[2],
                           pageheight - coords[1])
            colimage = ocrolib.iulib.bytearray()
            ocrolib.iulib.extract_subimage(colimage, iulibbin, *iulibcoords)
            out.append(self.get_transcript(ocrolib.narray2numpy(colimage)))
        set_progress(self.logger, self.progress_func, numcols, numcols)
        self.cleanup()
        return utils.merge_hocr(out)
Esempio n. 8
0
    def process(self, binary, boxes):
        """Recognize page text.

        input: tuple of binary, input boxes
        return: page data
        """
        self.prepare()
        pageheight, pagewidth = binary.shape
        iulibbin = ocrolib.numpy2narray(binary)
        out = [] # list of hocr strings
        numcols = len(boxes.get("columns", []))
        for i in range(numcols):
            set_progress(self.logger, self.progress_func, i, numcols)
            coords = boxes.get("columns")[i]
            iulibcoords = (
                    coords[0], pageheight - coords[3], coords[2],
                    pageheight - coords[1])
            colimage = ocrolib.iulib.bytearray()
            ocrolib.iulib.extract_subimage(colimage, iulibbin, *iulibcoords)
            out.append(self.get_transcript(ocrolib.narray2numpy(colimage)))
        set_progress(self.logger, self.progress_func, numcols, numcols)
        self.cleanup()
        return utils.merge_hocr(out)
Esempio n. 9
0
 def segment_portion(self, portion, dx, dy, pheight):
     """Segment a single-column chunk."""
     page_seg = self._segmenter.segment(ocrolib.narray2numpy(portion))
     return self.extract_boxes(self._regions, page_seg, dx, dy, pheight)
Esempio n. 10
0
 def segment_portion(self, portion, dx, dy, pheight):
     """Segment a single-column chunk."""
     page_seg = self._segmenter.segment(ocrolib.narray2numpy(portion))
     return self.extract_boxes(self._regions, page_seg, dx, dy, pheight)