Python pil2array 예제들, ocrolib.pil2array Python 예제들

예제 #1

0

파일 보기

    def process(self):
        if not tf.test.is_gpu_available():
            LOG.error("Your system has no CUDA installed. No GPU detected.")
            sys.exit(1)
        model_path = Path(self.parameter['model_path'])
        class_mapper_path = Path(self.parameter['class_mapping_path'])
        if not Path(model_path).is_file():
            LOG.error("""\
                Layout Classfication model was not found at '%s'. Make sure the `model_path` parameter
                points to the local model path.
                model can be downloaded from http://url
                """ % model_path)
            sys.exit(1)
        else:

            LOG.info('Loading model from file %s', model_path)
            model = self.create_model(str(model_path))
            # load the mapping
            pickle_in = open(str(class_mapper_path), "rb")
            class_indices = pickle.load(pickle_in)
            label_mapping = dict((v, k) for k, v in class_indices.items())

            # print("INPUT FILE HERE",self.input_files)
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            page_id = input_file.pageId or input_file.ID
            size = 600, 500

            metadata = pcgts.get_Metadata()
            metadata.add_MetadataItem(
                MetadataItemType(
                    type_="processingStep",
                    name=self.ocrd_tool['steps'][0],
                    value=TOOL,
                    Labels=[
                        LabelsType(  #externalRef="parameter",
                            Label=[
                                LabelType(type_=name,
                                          value=self.parameter[name])
                                for name in self.parameter.keys()
                            ])
                    ]))

            page = pcgts.get_Page()
            LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID)

            page_image, page_xywh, page_image_info = self.workspace.image_from_page(
                page, page_id, feature_selector='binarized')

            img_array = ocrolib.pil2array(
                page_image.resize((500, 600), Image.ANTIALIAS))
            img_array = img_array * 1. / 255.
            img_array = img_array[np.newaxis, :, :, np.newaxis]
            results = self.start_test(model, img_array, fname, label_mapping)
            LOG.info(results)
            self.workspace.mets.set_physical_page_for_file(
                "PHYS_000" + str(n), input_file)
            self.create_logmap_smlink(pcgts)
            self.write_to_mets(results, "PHYS_000" + str(n))

예제 #2

0

파일 보기

파일: ocrd_anybaseocr_cropping.py 프로젝트: profintegra/ocrd_anybaseocr

    def _process_segment(self, page_image, page, page_xywh, page_id, input_file, n):
        img_array = ocrolib.pil2array(page_image)

        # Check if image is RGB or not #FIXME: check not needed anymore?
        if len(img_array.shape) == 2:
            img_array = np.stack((img_array,)*3, axis=-1)

        img_array_bin = np.array(
            img_array > ocrolib.midrange(img_array), 'i')

        lineDetectH = []
        lineDetectV = []
        img_array_rr = self.remove_rular(img_array)

        textarea, img_array_rr_ta, height, width = self.detect_textarea(
            img_array_rr)
        colSeparator = int(
            width * self.parameter['colSeparator'])
        if len(textarea) > 1:
            textarea = self.crop_area(
                textarea, img_array_bin, img_array_rr_ta, colSeparator)

            if len(textarea) == 0:
                min_x, min_y, max_x, max_y = self.select_borderLine(
                    img_array_rr, lineDetectH, lineDetectV)
            else:
                min_x, min_y, max_x, max_y = textarea[0]
        elif len(textarea) == 1 and (height*width*0.5 < (abs(textarea[0][2]-textarea[0][0]) * abs(textarea[0][3]-textarea[0][1]))):
            x1, y1, x2, y2 = textarea[0]
            x1 = x1-20 if x1 > 20 else 0
            x2 = x2+20 if x2 < width-20 else width
            y1 = y1-40 if y1 > 40 else 0
            y2 = y2+40 if y2 < height-40 else height

            min_x, min_y, max_x, max_y = textarea[0]
        else:
            min_x, min_y, max_x, max_y = self.select_borderLine(
                img_array_rr, lineDetectH, lineDetectV)

        border_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y]]
        border_polygon = coordinates_for_segment(border_polygon, page_image, page_xywh)
        border_points = points_from_polygon(border_polygon)
        brd = BorderType(Coords=CoordsType(border_points))
        page.set_Border(brd)

        page_image = crop_image(page_image, box=(min_x, min_y, max_x, max_y))
        page_xywh['features'] += ',cropped'

        file_id = make_file_id(input_file, self.output_file_grp)

        file_path = self.workspace.save_image_file(page_image,
                                                   file_id + '-IMG',
                                                   page_id=page_id,
                                                   file_grp=self.output_file_grp)
        page.add_AlternativeImage(AlternativeImageType(
            filename=file_path, comments=page_xywh['features']))

예제 #3

0

파일 보기

파일: ocrd_anybaseocr_block_segmentation.py 프로젝트: sepastian/ocrd_anybaseocr

    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n, mrcnn_model, class_names):

        img_array = ocrolib.pil2array(page_image)
        results = mrcnn_model.detect([img_array], verbose=1)
        r = results[0]

        page_xywh['features'] += ',blksegmented'

        for i in range(len(r['rois'])):

            width, height, _ = img_array.shape
            min_x = r['rois'][i][0]
            min_y = r['rois'][i][1]
            max_x = r['rois'][i][2]
            max_y = r['rois'][i][3]

            #small post-processing incase of paragrapgh to not cut last alphabets
            if (min_x - 5) > width and r['class_ids'][i] == 2:
                min_x -= 5
            if (max_x + 10) < width and r['class_ids'][i] == 2:
                min_x += 10

            # this can be tested, provided whether we need previous comments or not?

            region_img = img_array[min_x:max_x, min_y:
                                   max_y]  #extract from points and img_array
            region_img = ocrolib.array2pil(region_img)
            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.image_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.image_grp, n)

            file_path = self.workspace.save_image_file(region_img,
                                                       file_id + "_" + str(i),
                                                       page_id=page_id,
                                                       file_grp=self.image_grp)

            ai = AlternativeImageType(filename=file_path,
                                      comments=page_xywh['features'])
            coords = CoordsType(
                "%i,%i %i,%i %i,%i %i,%i" %
                (min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))
            textregion = TextRegionType(Coords=coords,
                                        type_=class_names[r['class_ids'][i]])
            textregion.add_AlternativeImage(ai)
            page.add_TextRegion(textregion)

예제 #4

0

파일 보기

    def process(self):
        LOG = getLogger('OcrdAnybaseocrLayoutAnalyser')
        if not tf.test.is_gpu_available():
            LOG.error("Your system has no CUDA installed. No GPU detected.")
            # sys.exit(1)
        assert_file_grp_cardinality(self.input_file_grp, 1)
        assert_file_grp_cardinality(self.output_file_grp, 1)
        model_path = Path(self.resolve_resource(self.parameter['model_path']))
        class_mapper_path = Path(self.resolve_resource(self.parameter['class_mapping_path']))
        if not Path(model_path).is_file():
            LOG.error("""\
                Layout Classfication model was not found at '%s'. Make sure the `model_path` parameter
                points to the local model path.
                model can be downloaded from http://url
                """ % model_path)
            sys.exit(1)
        else:
            
            LOG.info('Loading model from file %s', model_path)
            model = self.create_model(str(model_path))
            # load the mapping
            pickle_in = open(str(class_mapper_path), "rb")
            class_indices = pickle.load(pickle_in)
            label_mapping = dict((v,k) for k,v in class_indices.items())    
        
            # print("INPUT FILE HERE",self.input_files)
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            page_id = input_file.pageId or input_file.ID
            size = 600, 500
            
            self.add_metadata(pcgts)
            page = pcgts.get_Page()
            LOG.info("INPUT FILE %s", input_file.pageId or input_file.ID)
            
            page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id, feature_selector='binarized')


            img_array = ocrolib.pil2array(page_image.resize((500, 600), Image.ANTIALIAS))
            img_array = img_array * 1./255.
            img_array = img_array[np.newaxis, :, :, np.newaxis]            
            results = self.start_test(model, img_array, fname, label_mapping)
            LOG.info(results)
            self.workspace.mets.set_physical_page_for_file("PHYS_000" + str(n) , input_file)
            self.create_logmap_smlink(pcgts)
            self.write_to_mets(results, "PHYS_000" + str(n))

예제 #5

0

파일 보기

파일: ocrd_anybaseocr_layout_analysis.py 프로젝트: wrznr/OCR-D-LAYoutERkennung

    def process(self):
        if not tf.test.is_gpu_available():
            LOG.error("Your system has no CUDA installed. No GPU detected.")
            sys.exit(1)

        model_path = Path(self.parameter['model_path'])
        class_mapper_path = Path(self.parameter['class_mapping_path'])

        LOG.info('Loading model from file ', model_path)
        model = self.create_model(str(model_path))
        # load the mapping
        pickle_in = open(str(class_mapper_path), "rb")
        class_indices = pickle.load(pickle_in)

        
        if not Path(model_path).is_file():
            LOG.error("""\
                Layout Classfication model was not found at '%s'. Make sure the `model_path` parameter
                points to the local model path.

                model can be downloaded from http://url
                """ % model_path)
            sys.exit(1)
        else:
            LOG.info('Loading model from file ', model_path)
            model = self.create_model(str(model_path))
            # load the mapping
            pickle_in = open(str(class_mapper_path), "rb")
            class_indices = pickle.load(pickle_in)
        

        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            LOG.info("INPUT FILE %s", fname)
            size = 600, 500
            img = Image.open(fname)
            img_array = ocrolib.pil2array(img.resize((500, 600), Image.ANTIALIAS))
            img_array = img_array[np.newaxis, :, :, np.newaxis]            
            results = self.start_test(model, img_array, fname, class_indices)
            LOG.info(results)
            self.workspace.mets.set_physical_page_for_file("PHYS_000" + str(n) , input_file)
            self.create_logmap_smlink(pcgts)
            self.write_to_mets(results, "PHYS_000" + str(n))

예제 #6

0

파일 보기

파일: ocrd_anybaseocr_tiseg.py 프로젝트: sepastian/ocrd_anybaseocr

    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n):

        I = ocrolib.pil2array(page_image)
        if len(I.shape) > 2:
            I = np.mean(I, 2)
        I = 1 - I / I.max()
        rows, cols = I.shape

        # Generate Mask and Seed Images
        Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I)

        # Iseedfill: Union of Mask and Seed Images
        Iseedfill = self.pixSeedfillBinary(Imask, Iseed)

        # Dilation of Iseedfill
        mask = ones((3, 3))
        Iseedfill = ndimage.binary_dilation(Iseedfill, mask)

        # Expansion of Iseedfill to become equal in size of I
        Iseedfill = self.expansion(Iseedfill, (rows, cols))

        # Write Text and Non-Text images
        image_part = array((1 - I * Iseedfill), dtype=int)
        image_part[0, 0] = 0  # only for visualisation purpose
        text_part = array((1 - I * (1 - Iseedfill)), dtype=int)
        text_part[0, 0] = 0  # only for visualisation purpose

        page_xywh['features'] += ',tiseged'

        bin_array = array(255 * (text_part > ocrolib.midrange(text_part)), 'B')
        bin_image = ocrolib.array2pil(bin_array)

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(bin_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))

예제 #7

0

파일 보기

def readFeatures(filename):
    """Read features from the image files"""
    #Open Image
    #image = Image.open(filename).transpose(Image.FLIP_TOP_BOTTOM).transpose(Image.ROTATE_270).convert('L')
    #print image.size
    image = Image.open(filename).convert('L')
    #Normalize using OCRopus normalizer
    imagea = ocrolib.pil2array(image)
    lnorm.measure(amax(imagea) - imagea)
    try:
        imagea = lnorm.normalize(imagea, cval=amax(imagea))
    except (ZeroDivisionError, ValueError):
        print 'Bad image, removing'

    #Read Image frames (1-column width window of heigh equal to that of the image)
    for w in range(imagea.shape[1]):
        v = imagea[:, w]
        inputs.append(v)

    #seqLengths is the width of the image and seqDims is the dimensions of the frames
    seqLengths.append(imagea.shape[1])
    seqDims.append([seqLengths[-1]])

예제 #8

0

파일 보기

    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n):
        raw = ocrolib.pil2array(page_image)
        if len(raw.shape) > 2:
            raw = np.mean(raw, 2)
        raw = raw.astype("float64")
        # perform image normalization
        image = raw - amin(raw)
        if amax(image) == amin(image):
            LOG.info("# image is empty: %s" % (page_id))
            return
        image /= amax(image)

        # check whether the image is already effectively binarized
        if self.parameter['gray']:
            extreme = 0
        else:
            extreme = (np.sum(image < 0.05) +
                       np.sum(image > 0.95)) * 1.0 / np.prod(image.shape)
        if extreme > 0.95:
            comment = "no-normalization"
            flat = image
        else:
            comment = ""
            # if not, we need to flatten it by estimating the local whitelevel
            LOG.info("Flattening")
            m = interpolation.zoom(image, self.parameter['zoom'])
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(self.parameter['range'], 2))
            m = filters.percentile_filter(m,
                                          self.parameter['perc'],
                                          size=(2, self.parameter['range']))
            m = interpolation.zoom(m, 1.0 / self.parameter['zoom'])
            if self.parameter['debug'] > 0:
                clf()
                imshow(m, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])
            w, h = minimum(array(image.shape), array(m.shape))
            flat = clip(image[:w, :h] - m[:w, :h] + 1, 0, 1)
            if self.parameter['debug'] > 0:
                clf()
                imshow(flat, vmin=0, vmax=1)
                ginput(1, self.parameter['debug'])

        # estimate low and high thresholds
        LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])
        # rescale the image to get the gray scale image
        LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        binarized = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        # print_info("%s lo-hi (%.2f %.2f) angle %4.1f %s" % (fname, lo, hi, angle, comment))
        LOG.info("%s lo-hi (%.2f %.2f) %s" % (page_id, lo, hi, comment))
        LOG.info("writing")
        if self.parameter['debug'] > 0 or self.parameter['show']:
            clf()
            gray()
            imshow(binarized)
            ginput(1, max(0.1, self.parameter['debug']))

        page_xywh['features'] += ',binarized'

        bin_array = array(255 * (binarized > ocrolib.midrange(binarized)), 'B')
        bin_image = ocrolib.array2pil(bin_array)

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(bin_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))

예제 #9

0

파일 보기

파일: ocrd_anybaseocr_cropping.py 프로젝트: syedsaqibbukhari/docanalysis

    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            fname = pcgts.get_Page().imageFilename
            img = self.workspace.resolve_image_as_pil(fname)
            #fname = str(fname)
            print("Process file: ", fname)
            base, _ = ocrolib.allsplitext(fname)

            img_array = ocrolib.pil2array(img)
            img_array_bin = np.array(img_array > ocrolib.midrange(img_array),
                                     'i')

            lineDetectH = []
            lineDetectV = []
            img_array_rr = self.remove_rular(img_array)

            textarea, img_array_rr_ta, height, width = self.detect_textarea(
                img_array_rr)
            self.parameter['colSeparator'] = int(
                width * self.parameter['colSeparator'])

            if len(textarea) > 1:
                textarea = self.crop_area(textarea, img_array_bin,
                                          img_array_rr_ta)

                if len(textarea) == 0:
                    min_x, min_y, max_x, max_y = self.select_borderLine(
                        img_array_rr, lineDetectH, lineDetectV)
                else:
                    min_x, min_y, max_x, max_y = textarea[0]
            elif len(textarea) == 1 and (
                    height * width * 0.5 <
                (abs(textarea[0][2] - textarea[0][0]) *
                 abs(textarea[0][3] - textarea[0][1]))):
                x1, y1, x2, y2 = textarea[0]
                x1 = x1 - 20 if x1 > 20 else 0
                x2 = x2 + 20 if x2 < width - 20 else width
                y1 = y1 - 40 if y1 > 40 else 0
                y2 = y2 + 40 if y2 < height - 40 else height

                #self.save_pf(base, [x1, y1, x2, y2])
                min_x, min_y, max_x, max_y = textarea[0]
            else:
                min_x, min_y, max_x, max_y = self.select_borderLine(
                    img_array_rr, lineDetectH, lineDetectV)

            brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                               (min_x, min_y, max_x, min_y,
                                                max_x, max_y, min_x, max_y)))
            pcgts.get_Page().set_Border(brd)

            # Use input_file's basename for the new file -
            # this way the files retain the same basenames:
            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.output_file_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts).encode('utf-8'))

예제 #10

0

파일 보기

파일: ocrd_anybaseocr_tiseg.py 프로젝트: OCR-D/ocrd_anybaseocr

    def _process_segment(self, page, page_image, page_coords, page_id,
                         input_file):
        LOG = getLogger('OcrdAnybaseocrTiseg')

        if self.model:

            I = ocrolib.pil2array(
                page_image.resize((800, 1024), Image.ANTIALIAS))
            I = np.array(I)[np.newaxis, :, :, :]
            LOG.info('I shape %s', I.shape)
            if len(I.shape) < 3:
                print('Wrong input shape. Image should have 3 channel')

            # get prediction
            #out = self.model.predict_segmentation(
            #    inp=I,
            #    out_fname="/tmp/out.png"
            #)
            out = self.model.predict(I)
            out = out.reshape((2048, 1600, 3)).argmax(axis=2)

            text_part = 255 * np.ones(out.shape, 'B')
            text_part[np.where(out == 1)] = 0
            LOG.info(
                'text: %d percent',
                100 * (1 - np.count_nonzero(text_part) / np.prod(out.shape)))

            image_part = 255 * np.ones(out.shape, 'B')
            image_part[np.where(out == 2)] = 0
            LOG.info(
                'image: %d percent',
                100 * (1 - np.count_nonzero(image_part) / np.prod(out.shape)))

            image_part = ocrolib.array2pil(image_part)
            text_part = ocrolib.array2pil(text_part)

            image_part = image_part.resize(page_image.size, Image.BICUBIC)
            text_part = text_part.resize(page_image.size, Image.BICUBIC)

        else:
            I = ocrolib.pil2array(page_image)

            if len(I.shape) > 2:
                I = np.mean(I, 2)
            I = 1 - I / I.max()
            rows, cols = I.shape

            # Generate Mask and Seed Images
            Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I)

            # Iseedfill: Union of Mask and Seed Images
            Iseedfill = self.pixSeedfillBinary(Imask, Iseed)

            # Dilation of Iseedfill
            mask = np.ones((3, 3))
            Iseedfill = ndimage.binary_dilation(Iseedfill, mask)

            # Expansion of Iseedfill to become equal in size of I
            Iseedfill = self.expansion(Iseedfill, (rows, cols))

            # Write Text and Non-Text images
            image_part = np.array(255 * (1 - I * Iseedfill), dtype='B')
            text_part = np.array(255 * (1 - I * (1 - Iseedfill)), dtype='B')
            LOG.info(
                'text: %d percent',
                100 * (1 - np.count_nonzero(text_part) / np.prod(I.shape)))
            LOG.info(
                'image: %d percent',
                100 * (1 - np.count_nonzero(image_part) / np.prod(I.shape)))

            image_part = ocrolib.array2pil(image_part)
            text_part = ocrolib.array2pil(text_part)

        file_id = make_file_id(input_file, self.output_file_grp)
        file_path = self.workspace.save_image_file(
            image_part,
            file_id + "_img",
            page_id=input_file.pageId,
            file_grp=self.output_file_grp,
        )
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_coords['features'] +
                                 ',non_text'))

        file_path = self.workspace.save_image_file(
            text_part,
            file_id + "_txt",
            page_id=input_file.pageId,
            file_grp=self.output_file_grp,
        )
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_coords['features'] +
                                 ',clipped'))

예제 #11

0

파일 보기

    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n, model):

        I = ocrolib.pil2array(page_image)
        LOG.info('image size: %s', page_image.size)

        if model:

            if len(I.shape) < 3:
                print('Wrong input shape. Image should have 3 channel')

            # get prediction
            out = model.predict_segmentation(inp=I, out_fname="/tmp/out.png")
            cv2.imwrite('out_image.png', out * (255 / 2))
            text_part = np.ones(out.shape)
            text_part[np.where(out == 1)] = 0

            image_part = np.ones(out.shape)
            image_part[np.where(out == 2)] = 0

            image_part = array(255 * (image_part), 'B')
            image_part = ocrolib.array2pil(image_part)

            text_part = array(255 * (text_part), 'B')
            text_part = ocrolib.array2pil(text_part)

            text_part = text_part.resize(page_image.size, Image.BICUBIC)
            image_part = image_part.resize(page_image.size, Image.BICUBIC)

        else:

            if len(I.shape) > 2:
                I = np.mean(I, 2)
            I = 1 - I / I.max()
            rows, cols = I.shape

            # Generate Mask and Seed Images
            Imask, Iseed = self.pixMorphSequence_mask_seed_fill_holes(I)

            # Iseedfill: Union of Mask and Seed Images
            Iseedfill = self.pixSeedfillBinary(Imask, Iseed)

            # Dilation of Iseedfill
            mask = ones((3, 3))
            Iseedfill = ndimage.binary_dilation(Iseedfill, mask)

            # Expansion of Iseedfill to become equal in size of I
            Iseedfill = self.expansion(Iseedfill, (rows, cols))

            # Write Text and Non-Text images
            image_part = array((1 - I * Iseedfill), dtype=int)
            text_part = array((1 - I * (1 - Iseedfill)), dtype=int)

            bin_array = array(255 * (text_part > ocrolib.midrange(img_part)),
                              'B')
            text_part = ocrolib.array2pil(bin_array)

            bin_array = array(255 * (text_part > ocrolib.midrange(text_part)),
                              'B')
            image_part = ocrolib.array2pil(bin_array)

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(
            image_part,
            file_id + "_img",
            page_id=page_id,
            file_grp=self.image_grp,
            force=self.parameter['force'])
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features'] + ',non_text'))

        page_xywh['features'] += ',clipped'
        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(
            text_part,
            file_id + "_txt",
            page_id=page_id,
            file_grp=self.image_grp,
            force=self.parameter['force'])
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))

예제 #12

0

파일 보기

파일: ocrd_anybaseocr_cropping.py 프로젝트: sepastian/ocrd_anybaseocr

    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n):
        # Get image orientation
        # orientation = page.get_orientation() # This function is not working
        #         rotated_image = self.rotate_image(orientation, page_image)
        #         img_array = ocrolib.pil2array(rotated_image)

        img_array = ocrolib.pil2array(page_image)

        # Check if image is RGB or not #FIXME: check not needed anymore?
        if len(img_array.shape) == 2:
            img_array = np.stack((img_array, ) * 3, axis=-1)

        img_array_bin = np.array(img_array > ocrolib.midrange(img_array), 'i')

        lineDetectH = []
        lineDetectV = []
        img_array_rr = self.remove_rular(img_array)

        textarea, img_array_rr_ta, height, width = self.detect_textarea(
            img_array_rr)
        self.parameter['colSeparator'] = int(width *
                                             self.parameter['colSeparator'])
        if len(textarea) > 1:
            textarea = self.crop_area(textarea, img_array_bin, img_array_rr_ta)

            if len(textarea) == 0:
                min_x, min_y, max_x, max_y = self.select_borderLine(
                    img_array_rr, lineDetectH, lineDetectV)
            else:
                min_x, min_y, max_x, max_y = textarea[0]
        elif len(textarea) == 1 and (height * width * 0.5 <
                                     (abs(textarea[0][2] - textarea[0][0]) *
                                      abs(textarea[0][3] - textarea[0][1]))):
            x1, y1, x2, y2 = textarea[0]
            x1 = x1 - 20 if x1 > 20 else 0
            x2 = x2 + 20 if x2 < width - 20 else width
            y1 = y1 - 40 if y1 > 40 else 0
            y2 = y2 + 40 if y2 < height - 40 else height

            min_x, min_y, max_x, max_y = textarea[0]
        else:
            min_x, min_y, max_x, max_y = self.select_borderLine(
                img_array_rr, lineDetectH, lineDetectV)

        brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                           (min_x, min_y, max_x, min_y, max_x,
                                            max_y, min_x, max_y)))
        page.set_Border(brd)

        page_image = crop_image(page_image, box=(min_x, min_y, max_x, max_y))
        page_xywh['features'] += ',cropped'

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)

        file_path = self.workspace.save_image_file(page_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))

예제 #13

0

파일 보기

    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n):

        raw = ocrolib.pil2array(page_image)
        flat = raw.astype("float64")

        # estimate skew angle and rotate
        if self.parameter['maxskew'] > 0:
            if self.parameter['parallel'] < 2:
                LOG.info("Estimating Skew Angle")
            d0, d1 = flat.shape
            o0, o1 = int(self.parameter['bignore'] * d0), int(
                self.parameter['bignore'] * d1)
            flat = amax(flat) - flat
            flat -= amin(flat)
            est = flat[o0:d0 - o0, o1:d1 - o1]
            ma = self.parameter['maxskew']
            ms = int(2 * self.parameter['maxskew'] *
                     self.parameter['skewsteps'])
            angle = self.estimate_skew_angle(est, linspace(-ma, ma, ms + 1))
            flat = interpolation.rotate(flat,
                                        angle,
                                        mode='constant',
                                        reshape=0)
            flat = amax(flat) - flat
        else:
            angle = 0

        # self.write_angles_to_pageXML(base,angle)
        # estimate low and high thresholds
        if self.parameter['parallel'] < 2:
            LOG.info("Estimating Thresholds")
        d0, d1 = flat.shape
        o0, o1 = int(self.parameter['bignore'] * d0), int(
            self.parameter['bignore'] * d1)
        est = flat[o0:d0 - o0, o1:d1 - o1]
        if self.parameter['escale'] > 0:
            # by default, we use only regions that contain
            # significant variance; this makes the percentile
            # based low and high estimates more reliable
            e = self.parameter['escale']
            v = est - filters.gaussian_filter(est, e * 20.0)
            v = filters.gaussian_filter(v**2, e * 20.0)**0.5
            v = (v > 0.3 * amax(v))
            v = morphology.binary_dilation(v, structure=ones((int(e * 50), 1)))
            v = morphology.binary_dilation(v, structure=ones((1, int(e * 50))))
            if self.parameter['debug'] > 0:
                imshow(v)
                ginput(1, self.parameter['debug'])
            est = est[v]
        lo = stats.scoreatpercentile(est.ravel(), self.parameter['lo'])
        hi = stats.scoreatpercentile(est.ravel(), self.parameter['hi'])

        # rescale the image to get the gray scale image
        if self.parameter['parallel'] < 2:
            LOG.info("Rescaling")
        flat -= lo
        flat /= (hi - lo)
        flat = clip(flat, 0, 1)
        if self.parameter['debug'] > 0:
            imshow(flat, vmin=0, vmax=1)
            ginput(1, self.parameter['debug'])
        deskewed = 1 * (flat > self.parameter['threshold'])

        # output the normalized grayscale and the thresholded images
        #LOG.info("%s lo-hi (%.2f %.2f) angle %4.1f" %(lo, hi, angle))

        #TODO: Need some clarification as the results effect the following pre-processing steps.
        #orientation = -angle
        #orientation = 180 - ((180 - orientation) % 360)

        if angle is None:  # FIXME: quick fix to prevent angle of "none"
            angle = 0

        page.set_orientation(angle)

        page_xywh['features'] += ',deskewed'
        bin_array = array(255 * (deskewed > ocrolib.midrange(deskewed)), 'B')
        page_image = ocrolib.array2pil(bin_array)

        file_id = input_file.ID.replace(self.input_file_grp, self.image_grp)
        if file_id == input_file.ID:
            file_id = concat_padded(self.image_grp, n)
        file_path = self.workspace.save_image_file(page_image,
                                                   file_id,
                                                   page_id=page_id,
                                                   file_grp=self.image_grp)
        page.add_AlternativeImage(
            AlternativeImageType(filename=file_path,
                                 comments=page_xywh['features']))

예제 #14

0

파일 보기

파일: ocrd_anybaseocr_textline.py 프로젝트: profintegra/ocrd_anybaseocr

    def _process_segment(self, page_image, page, textregion, region_xywh,
                         page_id, input_file, n):
        LOG = getLogger('OcrdAnybaseocrTextline')
        #check for existing text lines and whether to overwrite them
        if textregion.get_TextLine():
            if self.parameter['overwrite']:
                LOG.info('removing existing TextLines in region "%s"', page_id)
                textregion.set_TextLine([])
            else:
                LOG.warning('keeping existing TextLines in region "%s"',
                            page_id)
                return

        binary = ocrolib.pil2array(page_image)

        if len(binary.shape) > 2:
            binary = np.mean(binary, 2)
        binary = np.array(1 - binary / np.amax(binary), 'B')

        if self.parameter['scale'] == 0:
            scale = psegutils.estimate_scale(binary)
        else:
            scale = self.parameter['scale']

        if np.isnan(
                scale) or scale > 1000.0 or scale < self.parameter['minscale']:
            LOG.warning(str(scale) + ": bad scale; skipping!\n")
            return

        segmentation = self.compute_segmentation(binary, scale)
        if np.amax(segmentation) > self.parameter['maxlines']:
            LOG.warning("too many lines %i; skipping!\n",
                        (np.amax(segmentation)))
            return
        lines = psegutils.compute_lines(segmentation, scale)
        order = psegutils.reading_order([l.bounds for l in lines])
        lsort = psegutils.topsort(order)

        # renumber the labels so that they conform to the specs

        nlabels = np.amax(segmentation) + 1
        renumber = np.zeros(nlabels, 'i')
        for i, v in enumerate(lsort):
            renumber[lines[v].label] = 0x010000 + (i + 1)
        segmentation = renumber[segmentation]

        lines = [lines[i] for i in lsort]
        cleaned = ocrolib.remove_noise(binary, self.parameter['noise'])

        for i, l in enumerate(lines):
            #LOG.info('check this: ')
            #LOG.info(type(l.bounds))
            #LOG.info(l.bounds)
            #line_points = np.where(l.mask==1)
            #hull = MultiPoint([x for x in zip(line_points[0],line_points[1])]).convex_hull
            #x,y = hull.exterior.coords.xy
            #LOG.info('hull coords x: ',x)
            #LOG.info('hull coords y: ',y)

            min_x, max_x = (l.bounds[0].start, l.bounds[0].stop)
            min_y, max_y = (l.bounds[1].start, l.bounds[1].stop)

            line_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y],
                            [min_x, max_y]]

            #line_polygon = [x for x in zip(y, x)]
            line_polygon = coordinates_for_segment(line_polygon, page_image,
                                                   region_xywh)
            line_points = points_from_polygon(line_polygon)

            img = cleaned[l.bounds[0], l.bounds[1]]
            img = np.array(255 * (img > ocrolib.midrange(img)), 'B')
            img = 255 - img
            img = ocrolib.array2pil(img)

            file_id = make_file_id(input_file, self.output_file_grp)
            file_path = self.workspace.save_image_file(
                img,
                file_id + "_" + str(n) + "_" + str(i),
                page_id=page_id,
                file_grp=self.output_file_grp)
            ai = AlternativeImageType(filename=file_path,
                                      comments=region_xywh['features'])
            line_id = '%s_line%04d' % (page_id, i)
            line = TextLineType(custom='readingOrder {index:' + str(i) + ';}',
                                id=line_id,
                                Coords=CoordsType(line_points))
            line.add_AlternativeImage(ai)
            textregion.add_TextLine(line)

예제 #15

0

파일 보기

파일: ocropus.py 프로젝트: amitdo/nidaba

def ocr(image_path, segmentation_path, output_path, model_path):
    """
    Scan a single image with ocropus.

    Reads a single image file from ```imagepath``` and writes the recognized
    text as a TEI document into output_path.

    Args:
        image_path (unicode): Path of the input file
        segmentation_path (unicode): Path of the segmentation XML file.
        output_path (unicode): Path of the output file
        model_path (unicode): Path of the recognition model. Must be a pyrnn.gz
                             pickle dump interoperable with ocropus-rpred.

    Returns:
        (unicode): A string of the output file that is actually written. As
                   Ocropus rewrites output file paths without notice it may be
                   different from the ```outputfilepath``` argument.

    Raises:
        NidabaOcropusException: Ocropus somehow failed. The error output is
                                contained in the message but as it is de facto
                                unusable as a library it's impossible to deduct
                                the nature of the problem.
    """

    try:
        logger.debug('Loading pyrnn from {}'.format(model_path))
        network = ocrolib.load_object(model_path, verbose=0)
        lnorm = getattr(network, "lnorm")
    except Exception as e:
        raise NidabaOcropusException('Something somewhere broke: ' + e.msg)
    im = Image.open(image_path)

    logger.debug('Loading TEI segmentation {}'.format(segmentation_path))
    tei = TEIFacsimile()
    with open(segmentation_path, 'r') as seg_fp:
        tei.read(seg_fp)

    logger.debug('Clearing out word/grapheme boxes')
    # ocropus is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('ocropus', 'character recognition')
    for box in tei.lines:
        logger.debug('Recognizing line {}'.format(box[4]))
        ib = tuple(int(x) for x in box[:-2])
        line = ocrolib.pil2array(im.crop(ib))
        temp = np.amax(line) - line
        temp = temp * 1.0 / np.amax(temp)
        lnorm.measure(temp)
        line = lnorm.normalize(line, cval=np.amax(line))
        if line.ndim == 3:
            np.mean(line, 2)
        line = ocrolib.lstm.prepare_line(line, 16)
        pred = network.predictString(line)
        pred = ocrolib.normalize_text(pred)
        logger.debug('Scoping line {}'.format(box[4]))
        tei.scope_line(box[4])
        logger.debug('Adding graphemes: {}'.format(pred))
        tei.add_graphemes(pred)
    with open(output_path, 'wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write(fp)
    return output_path

예제 #16

0

파일 보기

파일: ocropus.py 프로젝트: mirskiy/nidaba

def ocr(image_path, segmentation_path, output_path, model_path):
    """
    Scan a single image with ocropus.

    Reads a single image file from ```imagepath``` and writes the recognized
    text as a TEI document into output_path.

    Args:
        image_path (unicode): Path of the input file
        segmentation_path (unicode): Path of the segmentation XML file.
        output_path (unicode): Path of the output file
        model_path (unicode): Path of the recognition model. Must be a pyrnn.gz
                             pickle dump interoperable with ocropus-rpred.

    Returns:
        (unicode): A string of the output file that is actually written. As
                   Ocropus rewrites output file paths without notice it may be
                   different from the ```outputfilepath``` argument.

    Raises:
        NidabaOcropusException: Ocropus somehow failed. The error output is
                                contained in the message but as it is de facto
                                unusable as a library it's impossible to deduct
                                the nature of the problem.
    """

    try:
        logger.debug('Loading pyrnn from {}'.format(model_path))
        network = ocrolib.load_object(model_path, verbose=0)
        lnorm = getattr(network, "lnorm")
    except Exception as e:
        raise NidabaOcropusException('Something somewhere broke: ' + e.msg)
    im = Image.open(image_path)

    logger.debug('Loading TEI segmentation {}'.format(segmentation_path))
    tei = TEIFacsimile()
    with open(segmentation_path, 'r') as seg_fp:
        tei.read(seg_fp)

    logger.debug('Clearing out word/grapheme boxes')
    # ocropus is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('ocropus', 'character recognition')
    for box in tei.lines:
        logger.debug('Recognizing line {}'.format(box[4]))
        ib = tuple(int(x) for x in box[:-2])
        line = ocrolib.pil2array(im.crop(ib))
        temp = np.amax(line) - line
        temp = temp * 1.0 / np.amax(temp)
        lnorm.measure(temp)
        line = lnorm.normalize(line, cval=np.amax(line))
        if line.ndim == 3:
            np.mean(line, 2)
        line = ocrolib.lstm.prepare_line(line, 16)
        pred = network.predictString(line)
        pred = ocrolib.normalize_text(pred)
        logger.debug('Scoping line {}'.format(box[4]))
        tei.scope_line(box[4])
        logger.debug('Adding graphemes: {}'.format(pred))
        tei.add_graphemes(pred)
    with open(output_path, 'wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write(fp)
    return output_path

예제 #17

0

파일 보기

    def _process_segment(self, page_image, page, region_xywh, page_id,
                         input_file, n):
        binary = ocrolib.pil2array(page_image)
        binary = np.array(1 - binary / np.amax(binary), 'B')
        if page.get_TextRegion() is None or len(page.get_TextRegion()) < 1:
            min_x, max_x = (0, binary.shape[0])
            min_y, max_y = (0, binary.shape[1])
            textregion = TextRegionType(
                Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                  (min_x, min_y, max_x, min_y, max_x, max_y,
                                   min_x, max_y)))
            page.add_TextRegion(textregion)
        else:
            textregion = page.get_TextRegion()[-1]
        ocrolib.write_image_binary("test.bin.png", binary)
        if self.parameter['scale'] == 0:
            scale = psegutils.estimate_scale(binary)
        else:
            scale = self.parameter['scale']
        if np.isnan(
                scale) or scale > 1000.0 or scale < self.parameter['minscale']:
            LOG.warning("%s: bad scale (%g); skipping\n" % (fname, scale))
            return

        segmentation = self.compute_segmentation(binary, scale)
        if np.amax(segmentation) > self.parameter['maxlines']:
            LOG.warning("%s: too many lines %i",
                        (fname, np.amax(segmentation)))
            return
        lines = psegutils.compute_lines(segmentation, scale)
        order = psegutils.reading_order([l.bounds for l in lines])
        lsort = psegutils.topsort(order)

        # renumber the labels so that they conform to the specs

        nlabels = np.amax(segmentation) + 1
        renumber = np.zeros(nlabels, 'i')
        for i, v in enumerate(lsort):
            renumber[lines[v].label] = 0x010000 + (i + 1)
        segmentation = renumber[segmentation]

        lines = [lines[i] for i in lsort]
        cleaned = ocrolib.remove_noise(binary, self.parameter['noise'])
        region_xywh['features'] += ",textline"
        for i, l in enumerate(lines):
            ocrolib.write_image_binary("test.bin.png", binary[l.bounds[0],
                                                              l.bounds[1]])
            min_x, max_x = (l.bounds[0].start, l.bounds[0].stop)
            min_y, max_y = (l.bounds[1].start, l.bounds[1].stop)

            img = binary[l.bounds[0], l.bounds[1]]
            img = np.array(255 * (img > ocrolib.midrange(img)), 'B')
            img = ocrolib.array2pil(img)

            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.image_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.image_grp, n)

            file_path = self.workspace.save_image_file(img,
                                                       file_id + "_" + str(i),
                                                       page_id=page_id,
                                                       file_grp=self.image_grp)
            ai = AlternativeImageType(filename=file_path,
                                      comments=region_xywh['features'])
            line = TextLineType(
                Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                  (min_x, min_y, max_x, min_y, max_x, max_y,
                                   min_x, max_y)))
            line.add_AlternativeImage(ai)
            textregion.add_TextLine(line)

예제 #18

0

파일 보기

    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, mask, dpi):
        LOG = getLogger('processor.AnybaseocrBlockSegmenter')
        # check for existing text regions and whether to overwrite them
        if page.get_TextRegion() or page.get_TableRegion():
            if self.parameter['overwrite']:
                LOG.info('removing existing text/table regions in page "%s"',
                         page_id)
                page.set_TextRegion([])
            else:
                LOG.warning('keeping existing text/table regions in page "%s"',
                            page_id)
        # check if border exists
        border_polygon = None
        if page.get_Border():
            border_coords = page.get_Border().get_Coords()
            border_points = polygon_from_points(border_coords.get_points())
            border_polygon = Polygon(border_points)

        LOG.info('detecting regions on page "%s"', page_id)
        img_array = ocrolib.pil2array(page_image)
        if len(img_array.shape) <= 2:
            img_array = np.stack((img_array, ) * 3, axis=-1)
        # convert to incidence matrix
        class_ids = np.array([[
            1 if category in self.parameter['active_classes'] else 0
            for category in CLASS_NAMES
        ]],
                             dtype=np.int32)
        results = self.mrcnn_model.detect([img_array],
                                          verbose=0,
                                          active_class_ids=class_ids)
        r = results[0]
        LOG.info('found %d candidates on page "%s"', len(r['rois']), page_id)

        th = self.parameter['th']
        # check for existing semgentation mask
        # this code executes only when the workflow had tiseg run before with use_deeplr=true
        if mask:
            mask = ocrolib.pil2array(mask)
            mask = mask // 255
            mask = 1 - mask
            # multiply all the bounding box part with 2
            for i in range(len(r['rois'])):

                min_y, min_x, max_y, max_x = r['rois'][i]
                mask[min_y:max_y, min_x:max_x] *= i + 2

            # check for left over pixels and add them to the bounding boxes
            pixel_added = True

            while pixel_added:

                pixel_added = False
                left_over = np.where(mask == 1)
                for y, x in zip(left_over[0], left_over[1]):
                    local_mask = mask[y - th:y + th, x - th:x + th]
                    candidates = np.where(local_mask > 1)
                    candidates = [k for k in zip(candidates[0], candidates[1])]
                    if len(candidates) > 0:
                        pixel_added = True
                        # find closest pixel with x>1
                        candidates.sort(key=lambda j: np.sqrt((j[0] - th)**2 +
                                                              (j[1] - th)**2))
                        index = local_mask[candidates[0]] - 2

                        # add pixel to mask/bbox
                        # y,x to bbox with index
                        if y < r['rois'][index][0]:
                            r['rois'][index][0] = y

                        elif y > r['rois'][index][2]:
                            r['rois'][index][2] = y

                        if x < r['rois'][index][1]:
                            r['rois'][index][1] = x

                        elif x > r['rois'][index][3]:
                            r['rois'][index][3] = x

                        # update the mask
                        mask[y, x] = index + 2

        for i in range(len(r['rois'])):
            class_id = r['class_ids'][i]
            if class_id >= len(CLASS_NAMES):
                raise Exception(
                    'Unexpected class id %d - model does not match' % class_id)

        # find hull contours on masks
        if self.parameter['use_masks']:
            r.setdefault('polygons', list())
            # estimate glyph scale (roughly)
            scale = int(dpi / 6)
            scale = scale + (scale + 1) % 2  # odd
            for i in range(len(r['rois'])):
                mask = r['masks'][:, :, i]
                mask = cv2.dilate(mask.astype(np.uint8),
                                  np.ones((scale, scale), np.uint8)) > 0
                # close mask until we have a single outer contour
                contours = None
                for _ in range(10):
                    mask = cv2.morphologyEx(
                        mask.astype(np.uint8), cv2.MORPH_CLOSE,
                        np.ones((scale, scale), np.uint8)) > 0
                    contours, _ = cv2.findContours(mask.astype(np.uint8),
                                                   cv2.RETR_EXTERNAL,
                                                   cv2.CHAIN_APPROX_SIMPLE)
                    if len(contours) == 1:
                        break
                r['polygons'].append(Polygon(
                    contours[0][:, 0, :]))  # already in x,y order

        # to reduce overlaps, apply IoU-based non-maximum suppression
        # (and other post-processing against overlaps) across classes,
        # but not on the raw pixels, but the smoothed hull polygons
        LOG.info('post-processing detections on page "%s"', page_id)
        worse = []
        if self.parameter['post_process']:
            active = True

            def _merge_rois(i, j):
                """merges i into j"""
                nonlocal r, active
                r['rois'][j][0] = min(r['rois'][i][0], r['rois'][j][0])
                r['rois'][j][1] = min(r['rois'][i][1], r['rois'][j][1])
                r['rois'][j][2] = max(r['rois'][i][2], r['rois'][j][2])
                r['rois'][j][3] = max(r['rois'][i][3], r['rois'][j][3])
                r['polygons'][j] = r['polygons'][i].union(r['polygons'][j])
                #r['scores'][j] = max(r['scores'][i], r['scores'][i])
                active = True

            # find overlapping pairs
            while active:
                active = False
                for i in range(len(r["class_ids"])):
                    if i in worse:
                        continue
                    for j in range(i + 1, len(r['class_ids'])):
                        if j in worse:
                            continue
                        iclass = r['class_ids'][i]
                        jclass = r['class_ids'][j]
                        iname = CLASS_NAMES[iclass]
                        jname = CLASS_NAMES[jclass]
                        if (iname == 'drop-capital') != (jname
                                                         == 'drop-capital'):
                            # ignore drop-capital overlapping with others
                            continue
                        # rs todo: lower priority for footnote?
                        if (r['rois'][i][1] > r['rois'][j][3]
                                or r['rois'][i][3] < r['rois'][j][1]
                                or r['rois'][i][0] > r['rois'][j][2]
                                or r['rois'][i][2] < r['rois'][j][0]):
                            # no overlap (cut)
                            continue
                        iscore = r['scores'][i]
                        jscore = r['scores'][j]
                        if not self.parameter['use_masks']:
                            LOG.debug(
                                "roi %d[%s] overlaps roi %d[%s] and %s (replacing)",
                                i, iname, j, jname,
                                "looses" if iscore < jscore else "wins")
                            if iscore < jscore:
                                worse.append(i)
                                break
                            else:
                                worse.append(j)
                                continue
                        # compare masks
                        ipoly = r['polygons'][i]
                        jpoly = r['polygons'][j]
                        isize = ipoly.area
                        jsize = jpoly.area
                        inter = ipoly.intersection(jpoly).area
                        union = ipoly.union(jpoly).area
                        # LOG.debug("%d/%d %dpx/%dpx shared %dpx overall %dpx",
                        #           i, j, isize, jsize, inter, union)
                        if inter / isize > self.parameter['min_share_drop']:
                            LOG.debug(
                                "roi %d[%s] contains roi %d[%s] (replacing)",
                                j, jname, i, iname)
                            worse.append(i)
                            break
                        elif inter / jsize > self.parameter['min_share_drop']:
                            LOG.debug(
                                "roi %d[%s] contains roi %d[%s] (replacing)",
                                i, iname, j, jname)
                            worse.append(j)
                        elif inter / union > self.parameter['min_iou_drop']:
                            LOG.debug(
                                "roi %d[%s] heavily overlaps roi %d[%s] and %s (replacing)",
                                i, iname, j, jname,
                                "looses" if iscore < jscore else "wins")
                            if iscore < jscore:
                                worse.append(i)
                                break
                            else:
                                worse.append(j)
                        elif inter / isize > self.parameter['min_share_merge']:
                            LOG.debug("roi %d[%s] covers roi %d[%s] (merging)",
                                      j, jname, i, iname)
                            worse.append(i)
                            _merge_rois(i, j)
                            break
                        elif inter / jsize > self.parameter['min_share_merge']:
                            LOG.debug("roi %d[%s] covers roi %d[%s] (merging)",
                                      i, iname, j, jname)
                            worse.append(j)
                            _merge_rois(j, i)
                        elif inter / union > self.parameter['min_iou_merge']:
                            LOG.debug(
                                "roi %d[%s] slightly overlaps roi %d[%s] and %s (merging)",
                                i, iname, j, jname,
                                "looses" if iscore < jscore else "wins")
                            if iscore < jscore:
                                worse.append(i)
                                _merge_rois(i, j)
                                break
                            else:
                                worse.append(j)
                                _merge_rois(j, i)

        # define reading order on basis of coordinates
        partial_order = np.zeros((len(r['rois']), len(r['rois'])), np.uint8)
        for i, (min_y_i, min_x_i, max_y_i, max_x_i) in enumerate(r['rois']):
            for j, (min_y_j, min_x_j, max_y_j,
                    max_x_j) in enumerate(r['rois']):
                if min_x_i < max_x_j and max_x_i > min_x_j:
                    # xoverlaps
                    if min_y_i < min_y_j:
                        partial_order[i, j] = 1
                else:
                    min_y = min(min_y_i, min_y_j)
                    max_y = max(max_y_i, max_y_j)
                    min_x = min(min_x_i, min_x_j)
                    max_x = max(max_x_i, max_x_j)
                    if next(
                        (False
                         for (min_y_k, min_x_k, max_y_k, max_x_k) in r['rois']
                         if (min_y_k < max_y and max_y_k > min_y
                             and min_x_k < max_x and max_x_k > min_x)), True):
                        # no k in between
                        if ((min_y_j + max_y_j) / 2 < min_y_i
                                and (min_y_i + max_y_i) / 2 > max_y_j):
                            # vertically unrelated
                            partial_order[j, i] = 1
                        elif max_x_i < min_x_j:
                            partial_order[i, j] = 1

        def _topsort(po):
            visited = np.zeros(po.shape[0], np.bool)
            result = list()

            def _visit(k):
                if visited[k]:
                    return
                visited[k] = True
                for l in np.nonzero(po[:, k])[0]:
                    _visit(l)
                result.append(k)

            for k in range(po.shape[0]):
                _visit(k)
            return result

        reading_order = _topsort(partial_order)

        # Creating Reading Order object in PageXML
        order_group = OrderedGroupType(caption="Regions reading order",
                                       id=page_id)
        reading_order_object = ReadingOrderType()
        reading_order_object.set_OrderedGroup(order_group)
        page.set_ReadingOrder(reading_order_object)

        for i in range(len(r['rois'])):
            width, height, _ = img_array.shape
            min_y, min_x, max_y, max_x = r['rois'][i]
            score = r['scores'][i]
            class_id = r['class_ids'][i]
            class_name = CLASS_NAMES[class_id]
            if i in worse:
                LOG.debug(
                    "Ignoring instance %d[%s] overlapping better/larger neighbour",
                    i, class_name)
                continue

            if self.parameter['use_masks']:
                region_polygon = r['polygons'][i].exterior.coords[:-1]
            else:
                region_polygon = polygon_from_bbox(
                    max(min_x - 5, 0) if class_name == 'paragraph' else min_x,
                    min_y,
                    min(max_x +
                        10, width) if class_name == 'paragraph' else max_x,
                    max_y)

            # convert to absolute coordinates
            region_polygon = coordinates_for_segment(region_polygon,
                                                     page_image, page_xywh)
            # intersect with parent and plausibilize
            cut_region_polygon = Polygon(region_polygon)
            if border_polygon:
                cut_region_polygon = border_polygon.intersection(
                    cut_region_polygon)
            if cut_region_polygon.is_empty:
                LOG.warning('region %d does not intersect page frame', i)
                continue
            if not cut_region_polygon.is_valid:
                LOG.warning('region %d has invalid polygon', i)
                continue
            region_polygon = cut_region_polygon.exterior.coords[:-1]
            region_coords = CoordsType(points_from_polygon(region_polygon),
                                       conf=score)
            read_order = reading_order.index(i)
            region_args = {
                'custom': 'readingOrder {index:' + str(read_order) + ';}',
                'id': 'region%04d' % i,
                'Coords': region_coords
            }
            if class_name == 'image':
                image_region = ImageRegionType(**region_args)
                page.add_ImageRegion(image_region)
            elif class_name == 'table':
                table_region = TableRegionType(**region_args)
                page.add_TableRegion(table_region)
            elif class_name == 'graphics':
                graphic_region = GraphicRegionType(**region_args)
                page.add_GraphicRegion(graphic_region)
            else:
                region_args['type_'] = class_name
                textregion = TextRegionType(**region_args)
                page.add_TextRegion(textregion)
            order_index = reading_order.index(i)
            regionRefIndex = RegionRefIndexedType(index=order_index,
                                                  regionRef=region_args['id'])
            order_group.add_RegionRefIndexed(regionRefIndex)
            LOG.info('added %s region on page "%s"', class_name, page_id)

예제 #19

0

파일 보기

    def _process_segment(self, page_image, page, page_xywh, page_id,
                         input_file, n, mrcnn_model, class_names, mask):
        LOG = getLogger('OcrdAnybaseocrBlockSegmenter')
        # check for existing text regions and whether to overwrite them
        border = None
        if page.get_TextRegion():
            if self.parameter['overwrite']:
                LOG.info('removing existing TextRegions in page "%s"', page_id)
                page.set_TextRegion([])
            else:
                LOG.warning('keeping existing TextRegions in page "%s"',
                            page_id)
                return
        # check if border exists
        if page.get_Border():
            border_coords = page.get_Border().get_Coords()
            border_points = polygon_from_points(border_coords.get_points())
            border = Polygon(border_points)


#            page_image, page_xy = self.workspace.image_from_segment(page.get_Border(), page_image, page_xywh)

        img_array = ocrolib.pil2array(page_image)
        page_image.save('./checkthis.png')
        if len(img_array.shape) <= 2:
            img_array = np.stack((img_array, ) * 3, axis=-1)
        results = mrcnn_model.detect([img_array], verbose=1)
        r = results[0]

        th = self.parameter['th']
        # check for existing semgentation mask
        # this code executes only when use_deeplr is set to True in ocrd-tool.json file
        if mask:
            mask = ocrolib.pil2array(mask)
            mask = mask // 255
            mask = 1 - mask
            # multiply all the bounding box part with 2
            for i in range(len(r['rois'])):

                min_x = r['rois'][i][0]
                min_y = r['rois'][i][1]
                max_x = r['rois'][i][2]
                max_y = r['rois'][i][3]
                mask[min_x:max_x, min_y:max_y] *= i + 2
            cv2.imwrite('mask_check.png', mask * (255 / (len(r['rois']) + 2)))

            # check for left over pixels and add them to the bounding boxes
            pixel_added = True

            while pixel_added:

                pixel_added = False
                left_over = np.where(mask == 1)
                for x, y in zip(left_over[0], left_over[1]):
                    local_mask = mask[x - th:x + th, y - th:y + th]
                    candidates = np.where(local_mask > 1)
                    candidates = [k for k in zip(candidates[0], candidates[1])]
                    if len(candidates) > 0:
                        pixel_added = True
                        # find closest pixel with x>1
                        candidates.sort(key=lambda j: np.sqrt((j[0] - th)**2 +
                                                              (j[1] - th)**2))
                        index = local_mask[candidates[0]] - 2

                        # add pixel to mask/bbox
                        # x,y to bbox with index
                        if x < r['rois'][index][0]:
                            r['rois'][index][0] = x

                        elif x > r['rois'][index][2]:
                            r['rois'][index][2] = x

                        if y < r['rois'][index][1]:
                            r['rois'][index][1] = y

                        elif y > r['rois'][index][3]:
                            r['rois'][index][3] = y

                        # update the mask
                        mask[x, y] = index + 2

        # resolving overlapping problem
        bbox_dict = {}  # to check any overlapping bbox
        class_id_check = []

        for i in range(len(r['rois'])):
            min_x = r['rois'][i][0]
            min_y = r['rois'][i][1]
            max_x = r['rois'][i][2]
            max_y = r['rois'][i][3]

            region_bbox = [min_y, min_x, max_y, max_x]

            for key in bbox_dict:
                for bbox in bbox_dict[key]:

                    # checking for ymax case with vertical overlapping
                    # along with y, check both for xmax and xmin
                    if (region_bbox[3] <= bbox[3] and region_bbox[3] >= bbox[1]
                            and ((region_bbox[0] >= bbox[0]
                                  and region_bbox[0] <= bbox[2]) or
                                 (region_bbox[2] >= bbox[0]
                                  and region_bbox[2] <= bbox[2]) or
                                 (region_bbox[0] <= bbox[0]
                                  and region_bbox[2] >= bbox[2]))
                            and r['class_ids'][i] != 5):

                        r['rois'][i][2] = bbox[1] - 1

                    # checking for ymin now
                    # along with y, check both for xmax and xmin
                    if (region_bbox[1] <= bbox[3] and region_bbox[1] >= bbox[1]
                            and ((region_bbox[0] >= bbox[0]
                                  and region_bbox[0] <= bbox[2]) or
                                 (region_bbox[2] >= bbox[0]
                                  and region_bbox[2] <= bbox[2]) or
                                 (region_bbox[0] <= bbox[0]
                                  and region_bbox[2] >= bbox[2]))
                            and r['class_ids'][i] != 5):

                        r['rois'][i][0] = bbox[3] + 1

            if r['class_ids'][i] not in class_id_check:
                bbox_dict[r['class_ids'][i]] = []
                class_id_check.append(r['class_ids'][i])

            bbox_dict[r['class_ids'][i]].append(region_bbox)

        # resolving overlapping problem code

        # define reading order on basis of coordinates
        reading_order = []

        for i in range(len(r['rois'])):
            width, height, _ = img_array.shape
            min_x = r['rois'][i][0]
            min_y = r['rois'][i][1]
            max_x = r['rois'][i][2]
            max_y = r['rois'][i][3]

            if (min_y - 5) > width and r['class_ids'][i] == 2:
                min_y -= 5
            if (max_y + 10) < width and r['class_ids'][i] == 2:
                min_y += 10
            reading_order.append((min_y, min_x, max_y, max_x))

        reading_order = sorted(reading_order,
                               key=lambda reading_order:
                               (reading_order[1], reading_order[0]))
        for i in range(len(reading_order)):
            min_y, min_x, max_y, max_x = reading_order[i]
            min_y = 0
            i_poly = Polygon([[min_x, min_y], [max_x, min_y], [max_x, max_y],
                              [min_x, max_y]])
            for j in range(i + 1, len(reading_order)):
                min_y, min_x, max_y, max_x = reading_order[j]
                j_poly = Polygon([[min_x, min_y], [max_x, min_y],
                                  [max_x, max_y], [min_x, max_y]])
                inter = i_poly.intersection(j_poly)
                if inter:
                    reading_order.insert(j + 1, reading_order[i])
                    del reading_order[i]

        # Creating Reading Order object in PageXML
        order_group = OrderedGroupType(caption="Regions reading order",
                                       id=page_id)

        for i in range(len(r['rois'])):
            min_x = r['rois'][i][0]
            min_y = r['rois'][i][1]
            max_x = r['rois'][i][2]
            max_y = r['rois'][i][3]
            if (min_y - 5) > width and r['class_ids'][i] == 2:
                min_y -= 5
            if (max_y + 10) < width and r['class_ids'][i] == 2:
                min_y += 10

            region_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y],
                              [min_x, max_y]]

            if border:
                cut_region_polygon = border.intersection(
                    Polygon(region_polygon))
                if cut_region_polygon.is_empty:
                    continue
            else:
                cut_region_polygon = Polygon(region_polygon)

            order_index = reading_order.index((min_y, min_x, max_y, max_x))
            region_id = '%s_region%04d' % (page_id, i)
            regionRefIndex = RegionRefIndexedType(index=order_index,
                                                  regionRef=region_id)
            order_group.add_RegionRefIndexed(regionRefIndex)

        reading_order_object = ReadingOrderType()
        reading_order_object.set_OrderedGroup(order_group)
        page.set_ReadingOrder(reading_order_object)

        for i in range(len(r['rois'])):
            width, height, _ = img_array.shape
            min_x = r['rois'][i][0]
            min_y = r['rois'][i][1]
            max_x = r['rois'][i][2]
            max_y = r['rois'][i][3]

            if (min_y - 5) > width and r['class_ids'][i] == 2:
                min_y -= 5
            if (max_y + 10) < width and r['class_ids'][i] == 2:
                min_y += 10

            # one change here to resolve flipped coordinates
            region_polygon = [[min_y, min_x], [max_y, min_x], [max_y, max_x],
                              [min_y, max_x]]

            cut_region_polygon = border.intersection(Polygon(region_polygon))

            if cut_region_polygon.is_empty:
                continue
            cut_region_polygon = [
                j for j in zip(list(cut_region_polygon.exterior.coords.xy[0]),
                               list(cut_region_polygon.exterior.coords.xy[1]))
            ][:-1]

            # checking whether coordinates are flipped

            region_polygon = coordinates_for_segment(cut_region_polygon,
                                                     page_image, page_xywh)
            region_points = points_from_polygon(region_polygon)

            read_order = reading_order.index((min_y, min_x, max_y, max_x))

            # this can be tested, provided whether we need previous comments or not?
            # resolving overlapping problem

            region_img = img_array[min_x:max_x, min_y:
                                   max_y]  # extract from points and img_array

            region_img = ocrolib.array2pil(region_img)

            file_id = make_file_id(input_file, self.output_file_grp)
            file_path = self.workspace.save_image_file(
                region_img,
                file_id + "_" + str(i),
                page_id=page_id,
                file_grp=self.output_file_grp)

            # ai = AlternativeImageType(filename=file_path, comments=page_xywh['features'])
            region_id = '%s_region%04d' % (page_id, i)
            coords = CoordsType(region_points)

            # incase of imageRegion
            if r['class_ids'][i] == 15:
                image_region = ImageRegionType(
                    custom='readingOrder {index:' + str(read_order) + ';}',
                    id=region_id,
                    Coords=coords,
                    type_=class_names[r['class_ids'][i]])
                # image_region.add_AlternativeImage(ai)
                page.add_ImageRegion(image_region)
                continue
            if r['class_ids'][i] == 16:
                table_region = TableRegionType(
                    custom='readingOrder {index:' + str(read_order) + ';}',
                    id=region_id,
                    Coords=coords,
                    type_=class_names[r['class_ids'][i]])
                # table_region.add_AlternativeImage(ai)
                page.add_TableRegion(table_region)
                continue
            if r['class_ids'][i] == 17:
                graphic_region = GraphicRegionType(
                    custom='readingOrder {index:' + str(read_order) + ';}',
                    id=region_id,
                    Coords=coords,
                    type_=class_names[r['class_ids'][i]])
                # graphic_region.add_AlternativeImage(ai)
                page.add_GraphicRegion(graphic_region)
                continue

            textregion = TextRegionType(custom='readingOrder {index:' +
                                        str(read_order) + ';}',
                                        id=region_id,
                                        Coords=coords,
                                        type_=class_names[r['class_ids'][i]])
            # textregion.add_AlternativeImage(ai)

            #border = page.get_Border()
            # if border:
            #    border.add_TextRegion(textregion)
            # else:
            page.add_TextRegion(textregion)

예제 #20

0

파일 보기

    def process(self):
        for (n, input_file) in enumerate(self.input_files):
            pcgts = page_from_file(self.workspace.download_file(input_file))
            page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID
            page = pcgts.get_Page()
            page_image, page_xywh, _ = self.workspace.image_from_page(
                page, page_id)
            print(type(page_image), page_image.filename)

            # Get image orientation
            orientation = pcgts.get_Page().get_orientation()
            rotated_image = self.rotate_image(orientation, page_image)

            LOG.info("INPUT FILE %s ", input_file.pageId or input_file.ID)

            img_array = ocrolib.pil2array(rotated_image)

            #Check if image is RGB or not
            if len(img_array.shape) == 2:
                img_array = np.stack((img_array, ) * 3, axis=-1)

            img_array_bin = np.array(img_array > ocrolib.midrange(img_array),
                                     'i')

            lineDetectH = []
            lineDetectV = []
            img_array_rr = self.remove_rular(img_array)

            textarea, img_array_rr_ta, height, width = self.detect_textarea(
                img_array_rr)
            self.parameter['colSeparator'] = int(
                width * self.parameter['colSeparator'])

            if len(textarea) > 1:
                textarea = self.crop_area(textarea, img_array_bin,
                                          img_array_rr_ta)

                if len(textarea) == 0:
                    min_x, min_y, max_x, max_y = self.select_borderLine(
                        img_array_rr, lineDetectH, lineDetectV)
                else:
                    min_x, min_y, max_x, max_y = textarea[0]
            elif len(textarea) == 1 and (
                    height * width * 0.5 <
                (abs(textarea[0][2] - textarea[0][0]) *
                 abs(textarea[0][3] - textarea[0][1]))):
                x1, y1, x2, y2 = textarea[0]
                x1 = x1 - 20 if x1 > 20 else 0
                x2 = x2 + 20 if x2 < width - 20 else width
                y1 = y1 - 40 if y1 > 40 else 0
                y2 = y2 + 40 if y2 < height - 40 else height

                #self.save_pf(base, [x1, y1, x2, y2])
                min_x, min_y, max_x, max_y = textarea[0]
            else:
                min_x, min_y, max_x, max_y = self.select_borderLine(
                    img_array_rr, lineDetectH, lineDetectV)

            brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                               (min_x, min_y, max_x, min_y,
                                                max_x, max_y, min_x, max_y)))
            pcgts.get_Page().set_Border(brd)

            # Use input_file's basename for the new file -
            # this way the files retain the same basenames:
            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.output_file_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=file_id,
                                    file_grp=self.output_file_grp,
                                    pageId=input_file.pageId,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename=os.path.join(
                                        self.output_file_grp,
                                        file_id + '.xml'),
                                    content=to_xml(pcgts).encode('utf-8'))