コード例 #1
0
ファイル: process_images.py プロジェクト: KayneWest/basicocr
def extract(image):

    try:
        binary = ocrolib.read_image_binary(image)
        binary = 1-binary

        scale = psegutils.estimate_scale(binary)
        segmentation = compute_segmentation(binary,scale)

        # ...lines = compute_lines(segmentation,scale)

        # compute the reading order
        lines = psegutils.compute_lines(segmentation,scale)
        order = psegutils.reading_order([l.bounds for l in lines])
        lsort = psegutils.topsort(order)

        # renumber the labels so that they conform to the specs
        nlabels = amax(compute_segmentation)+1
        renumber = zeros(nlabels,'i')
        for i,v in enumerate(lsort): renumber[lines[v].label] = 0x010000+(i+1)
        segmentation = renumber[segmentation]

        outputdir = "http://127.0.0.1:5000/uploads/"
        
        lines = [lines[i] for i in lsort]
        ocrolib.write_page_segmentation("%s.pseg.png"%outputdir,segmentation)


        cleaned = ocrolib.remove_noise(binary,args.noise)
        for i,l in enumerate(lines):
            binline = psegutils.extract_masked(1-cleaned,l,pad=args.pad,expand=args.expand)
            ocrolib.write_image_binary("%s/01%04x.bin.png"%(outputdir,i+1),binline)
        #print "%6d"%i,fname,"%4.1f"%scale,len(lines)
    except:
        print ('error')
コード例 #2
0
def text_line_segmentation(binary, scale=None, gray=None, num_col = 1):
    """Given a binary image, compute a complete segmentation into
    lines, computing both columns and text lines."""
    binary = array(binary, 'B')
    if scale is None:
        scale = psegutils.estimate_scale(binary)

    # do the column finding
    if num_col > 1:
        colseps, binary = compute_colseps(binary, scale)
    else:
        colseps = np.zeros(binary.shape)

    # now compute the text line seeds
    bottom, top, boxmap = compute_gradmaps(binary, scale)
    seeds = compute_line_seeds(binary, bottom, top, colseps, scale)

    # spread the text line seeds to all the remaining components
    llabels = morph.propagate_labels(boxmap, seeds, conflict=0)
    spread = morph.spread_labels(seeds, maxdist=scale)
    llabels = where(llabels > 0, llabels, spread * binary)
    segmentation = llabels * binary

    lines = psegutils.compute_lines(segmentation, scale, 0.8)
    line_ims = []

    for l in lines:
        if gray is None:
            binline = psegutils.extract_masked(1-binary, l, pad=0)
        else:
            binline = psegutils.extract_masked(gray, l, pad=0)
        binline = pad_by(binline, 10, invert=False)
        line_ims.append(binline)

    return line_ims, lines
コード例 #3
0
ファイル: canny.py プロジェクト: loveheaven/ocropy
def processOneLineImage(gray_img, iTag):
    (_, img) = cv2.threshold(gray_img, 110, 255, cv2.THRESH_BINARY_INV)
    img = img[:, 2 : img.shape[1] - 2]
    scale = psegutils.estimate_scale(img)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
    closed = cv2.dilate(img, kernel, iterations=1)
    edges = cv2.Canny(closed, 60, 300)
    contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(edges, contours, -1, (255, 255, 255), 1)
    # cv2.imwrite('edges%s.png' % iTag,edges)
    boxmap = psegutils.compute_boxmap(img, scale, threshold=(0.4, 10), dtype="B")
    # combineBoxmap(boxmap)
    cv2.imwrite("box%s.png" % iTag, boxmap * 255)
    h_projection = hprojection(boxmap * 255)
    top, bottom = cropProjection(h_projection)
    regions = splitProjection(h_projection, top, bottom, 30, 2)
    # print iTag, top,bottom
    # print regions
    # print v_projection[1270:1450]
    if len(iTag) == 0:
        return regions, top, bottom
    for region in regions:
        topStart, TopEnd = region
        cr_img = cv2.getRectSubPix(
            gray_img, (gray_img.shape[1] - 4, TopEnd - topStart + 8), (gray_img.shape[1] / 2, (TopEnd + topStart) / 2)
        )
        cv2.imwrite("%sx%d.png" % (iTag, topStart), cr_img)
    return regions, top, bottom
コード例 #4
0
def processOneLineImage(gray_img, iTag):
    (_, img) = cv2.threshold(gray_img, 110, 255, cv2.THRESH_BINARY_INV)
    img = img[:, 2:img.shape[1]-2]
    scale = psegutils.estimate_scale(img)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
    closed = cv2.dilate(img, kernel, iterations = 1)
    edges = cv2.Canny(closed,60,300)
    contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(edges,contours,-1,(255,255,255),1)
    #cv2.imwrite('edges%s.png' % iTag,edges)
    boxmap = psegutils.compute_boxmap(img,scale,threshold=(.4,10),dtype='B')
    #combineBoxmap(boxmap)
    cv2.imwrite('box%s.png' % iTag, boxmap*255)
    h_projection = hprojection(boxmap*255)
    top, bottom = cropProjection(h_projection)
    regions = splitProjection(h_projection, top, bottom,30,2)
    #print iTag, top,bottom
    #print regions
    #print v_projection[1270:1450]
    if len(iTag) == 0:
        return regions,top,bottom
    for region in regions:
        topStart, TopEnd = region
        cr_img =cv2.getRectSubPix(gray_img, (gray_img.shape[1]-4, TopEnd-topStart+8), (gray_img.shape[1]/2, (TopEnd+topStart)/2))
        cv2.imwrite('%sx%d.png' % (iTag, topStart), cr_img)
    return regions,top,bottom
コード例 #5
0
def caption_segment(binary):
    '''
    :param gray:待分析的析"标题栏"
    :param bina:
    :return:
    '''
    # 排除边界处干扰部分
    bina = ocrolib.remove_noise(binary, 8)
    scale = psegutils.estimate_scale(bina)
    lines = morph.select_regions(bina, sl.dim1, min=2 * scale)
    bina = bina - lines
    bina = morph.select_regions(bina, sl.dim0, min=scale / 3)
    #扩大文本区域,连接相邻文本
    textlines = filters.maximum_filter(bina, (scale, scale / 2))
    #计算候选文本区域起始位置
    indexs_white = compute_index(textlines, th=scale / 2, n=1)
    indexs_lists = []
    if len(indexs_white) > 2:
        index_fir = indexs_white[0]
        #排除过小同时连接相邻的候选文本区域
        for i, index in enumerate(indexs_white):
            if index[1] - index[0] > scale / 2:  #排除过小
                if i != 0 and index[0] - index_fir[1] < scale / 3:  #连接相近

                    index_acc = [index_fir[0], index[1]]
                    indexs_lists.remove(index_fir)
                    indexs_lists.append(index_acc)
                    index_fir = index_acc
                else:
                    indexs_lists.append(index)
                    index_fir = index
    return indexs_lists
コード例 #6
0
    def process(self):
        """
        Segment with ocropy
        """

        for (n, input_file) in enumerate(self.input_files):
            log.info("INPUT FILE %i / %s", n, input_file)
            downloaded_file = self.workspace.download_file(input_file)
            log.info("downloaded_file %s", downloaded_file)
            pcgts = page_from_file(downloaded_file)
            page_width = pcgts.get_Page().get_imageWidth()
            page_height = pcgts.get_Page().get_imageHeight()
            # TODO binarized variant from get_AlternativeImage()
            image_url = pcgts.get_Page().imageFilename
            log.info("pcgts %s", pcgts)

            binary = ocrolib.read_image_binary(
                self.workspace.download_url(image_url))
            binary = 1 - binary

            scale = self.parameter['scale'] if self.parameter[
                'scale'] != 0 else psegutils.estimate_scale(binary)
            log.debug(binary)

            pseg = self.compute_segmentation(binary, scale)
            log.debug("pseg=%s", pseg)

            # TODO reading order / enumber
            #  log.debug("finding reading order")
            #  lines = psegutils.compute_lines(pseg, scale)
            #  order = psegutils.reading_order([l.bounds for l in lines])
            #  lsort = psegutils.topsort(order)

            regions = ocrolib.RegionExtractor()
            regions.setPageLines(pseg)

            dummyRegion = TextRegionType(
                id="dummy",
                Coords=CoordsType(
                    points="0,0 %s,0 %s,%s 0,%s" %
                    (page_width, page_width, page_height, page_height)))
            pcgts.get_Page().add_TextRegion(dummyRegion)

            for lineno in range(1, regions.length()):
                log.debug("id=%s bbox=%s", regions.id(lineno),
                          regions.bbox(lineno))
                textline = TextLineType(
                    id=concat_padded("line", lineno),
                    Coords=CoordsType(
                        points=points_from_y0x0y1x1(regions.bbox(lineno))))
                dummyRegion.add_TextLine(textline)
            ID = concat_padded(self.output_file_grp, n)
            self.workspace.add_file(ID=ID,
                                    file_grp=self.output_file_grp,
                                    mimetype=MIMETYPE_PAGE,
                                    local_filename="%s/%s.xml" %
                                    (self.output_file_grp, ID),
                                    content=to_xml(pcgts))
コード例 #7
0
ファイル: canny.py プロジェクト: loveheaven/ocropy
def processOnePageImage(gray_img, iTag, rotationAngel=0):
    (_, img) = cv2.threshold(gray_img, 110, 255, cv2.THRESH_BINARY_INV)
    # cv2.imwrite('crop1%s.png' % iTag, img)
    scale = psegutils.estimate_scale(img)
    binary = remove_hlines(img, gray_img, scale)
    binary = remove_vlines(binary, gray_img, scale)
    # cv2.imwrite('crop2%s.png' % iTag, binary*255)
    # dftSkew(gray_img)
    if rotationAngel == 0:
        img_crop = interpolation.rotate(gray_img, 90)
        angle = estimate_angle(img_crop)
        print iTag, angle
        #    binary = interpolation.rotate(binary,90+angle)
        #    boxmap = psegutils.compute_boxmap(binary,scale,dtype='B')
        #    cv2.imwrite('box%s.png' % iTag, boxmap*255)
        img_crop = interpolation.rotate(img_crop, angle - 90, cval=140)
        cv2.imwrite("crop%s.png" % iTag, img_crop)
    elif rotationAngel == -10:
        print "start"
        maxSplit = 0
        minIndex = 20
        for i in range(6):
            img_crop = interpolation.rotate(gray_img, i * 0.1, cval=140)
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
            closed = cv2.dilate(img_crop, kernel, iterations=1)
            edges = processImage(closed, 50, 150, tag=iTag)
            regions, left, right, top, bottom = splitImage(edges, img_crop)
            print len(regions), i
            if len(regions) > maxSplit:
                maxSplit = len(regions)
                minIndex = i

        for i in range(5):
            img_crop = interpolation.rotate(gray_img, (i + 1) * -0.1, cval=140)
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
            closed = cv2.dilate(img_crop, kernel, iterations=1)
            edges = processImage(closed, 50, 150, tag=iTag)
            regions, left, right, top, bottom = splitImage(edges, img_crop)
            print len(regions), (i + 1) * -1
            if len(regions) > maxSplit:
                maxSplit = len(regions)
                minIndex = -1 * (i + 1)
        print "angle is: %d" % minIndex
        if minIndex == 0:
            return
        img_crop = interpolation.rotate(gray_img, minIndex * 0.1, cval=140)
    # cv2.imwrite('crop%s.png' % iTag, img_crop)
    else:
        if rotationAngel != 0.01:
            img_crop = interpolation.rotate(gray_img, rotationAngel, cval=140)
            cv2.imwrite("crop%s.png" % iTag, img_crop)
        else:
            img_crop = gray_img
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
    closed = cv2.dilate(img_crop, kernel, iterations=1)
    edges = processImage(closed, 50, 150, tag=iTag)
    splitImage(edges, img_crop, iTag)
コード例 #8
0
def processOnePageImage(gray_img, iTag, rotationAngel=0):
    (_, img) = cv2.threshold(gray_img, 110, 255, cv2.THRESH_BINARY_INV)
    #cv2.imwrite('crop1%s.png' % iTag, img)
    scale = psegutils.estimate_scale(img)
    binary = remove_hlines(img, gray_img, scale)
    binary = remove_vlines(binary, gray_img, scale)
    #cv2.imwrite('crop2%s.png' % iTag, binary*255)
    #dftSkew(gray_img)
    if rotationAngel == 0:
        img_crop = interpolation.rotate(gray_img,90)
        angle = estimate_angle(img_crop)
        print iTag,angle
    #    binary = interpolation.rotate(binary,90+angle)
    #    boxmap = psegutils.compute_boxmap(binary,scale,dtype='B')
    #    cv2.imwrite('box%s.png' % iTag, boxmap*255)
        img_crop = interpolation.rotate(img_crop,angle-90, cval=140)
        cv2.imwrite('crop%s.png' % iTag, img_crop)
    elif rotationAngel == -10:
        print "start"
        maxSplit = 0
        minIndex = 20
        for i in range(6):
            img_crop = interpolation.rotate(gray_img,i*0.1,cval=140)
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
            closed = cv2.dilate(img_crop, kernel, iterations = 1)
            edges = processImage(closed, 50, 150, tag=iTag)
            regions,left, right,top,bottom = splitImage(edges, img_crop)
            print len(regions),i
            if len(regions) > maxSplit:
                maxSplit = len(regions)
                minIndex = i

        for i in range(5):
            img_crop = interpolation.rotate(gray_img,(i+1)*-0.1,cval=140)
            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
            closed = cv2.dilate(img_crop, kernel, iterations = 1)
            edges = processImage(closed, 50, 150, tag=iTag)
            regions,left, right,top,bottom = splitImage(edges, img_crop)
            print len(regions), (i+1)*-1
            if len(regions) > maxSplit:
                maxSplit = len(regions)
                minIndex = -1*(i+1)
        print "angle is: %d" % minIndex
        if minIndex == 0:
            return
        img_crop = interpolation.rotate(gray_img,minIndex*0.1,cval=140)
#cv2.imwrite('crop%s.png' % iTag, img_crop)
    else:
        if rotationAngel != 0.01:
            img_crop = interpolation.rotate(gray_img,rotationAngel,cval=140)
            cv2.imwrite('crop%s.png' % iTag, img_crop)
        else:
            img_crop = gray_img
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 1))
    closed = cv2.dilate(img_crop, kernel, iterations = 1)
    edges = processImage(closed, 50, 150, tag=iTag)
    splitImage(edges, img_crop, iTag)
コード例 #9
0
    def _process_segment(self, page_image, page, textregion, region_xywh,
                         page_id, input_file, n):
        LOG = getLogger('OcrdAnybaseocrTextline')
        #check for existing text lines and whether to overwrite them
        if textregion.get_TextLine():
            if self.parameter['overwrite']:
                LOG.info('removing existing TextLines in region "%s"', page_id)
                textregion.set_TextLine([])
            else:
                LOG.warning('keeping existing TextLines in region "%s"',
                            page_id)
                return

        binary = ocrolib.pil2array(page_image)

        if len(binary.shape) > 2:
            binary = np.mean(binary, 2)
        binary = np.array(1 - binary / np.amax(binary), 'B')

        if self.parameter['scale'] == 0:
            scale = psegutils.estimate_scale(binary)
        else:
            scale = self.parameter['scale']

        if np.isnan(
                scale) or scale > 1000.0 or scale < self.parameter['minscale']:
            LOG.warning(str(scale) + ": bad scale; skipping!\n")
            return

        segmentation = self.compute_segmentation(binary, scale)
        if np.amax(segmentation) > self.parameter['maxlines']:
            LOG.warning("too many lines %i; skipping!\n",
                        (np.amax(segmentation)))
            return
        lines = psegutils.compute_lines(segmentation, scale)
        order = psegutils.reading_order([l.bounds for l in lines])
        lsort = psegutils.topsort(order)

        # renumber the labels so that they conform to the specs

        nlabels = np.amax(segmentation) + 1
        renumber = np.zeros(nlabels, 'i')
        for i, v in enumerate(lsort):
            renumber[lines[v].label] = 0x010000 + (i + 1)
        segmentation = renumber[segmentation]

        lines = [lines[i] for i in lsort]
        cleaned = ocrolib.remove_noise(binary, self.parameter['noise'])

        for i, l in enumerate(lines):
            #LOG.info('check this: ')
            #LOG.info(type(l.bounds))
            #LOG.info(l.bounds)
            #line_points = np.where(l.mask==1)
            #hull = MultiPoint([x for x in zip(line_points[0],line_points[1])]).convex_hull
            #x,y = hull.exterior.coords.xy
            #LOG.info('hull coords x: ',x)
            #LOG.info('hull coords y: ',y)

            min_x, max_x = (l.bounds[0].start, l.bounds[0].stop)
            min_y, max_y = (l.bounds[1].start, l.bounds[1].stop)

            line_polygon = [[min_x, min_y], [max_x, min_y], [max_x, max_y],
                            [min_x, max_y]]

            #line_polygon = [x for x in zip(y, x)]
            line_polygon = coordinates_for_segment(line_polygon, page_image,
                                                   region_xywh)
            line_points = points_from_polygon(line_polygon)

            img = cleaned[l.bounds[0], l.bounds[1]]
            img = np.array(255 * (img > ocrolib.midrange(img)), 'B')
            img = 255 - img
            img = ocrolib.array2pil(img)

            file_id = make_file_id(input_file, self.output_file_grp)
            file_path = self.workspace.save_image_file(
                img,
                file_id + "_" + str(n) + "_" + str(i),
                page_id=page_id,
                file_grp=self.output_file_grp)
            ai = AlternativeImageType(filename=file_path,
                                      comments=region_xywh['features'])
            line_id = '%s_line%04d' % (page_id, i)
            line = TextLineType(custom='readingOrder {index:' + str(i) + ';}',
                                id=line_id,
                                Coords=CoordsType(line_points))
            line.add_AlternativeImage(ai)
            textregion.add_TextLine(line)
コード例 #10
0
def analyze_page_layout(binary, gray, rgb=None):
    hscale = 1.0  # Non-standard scaling of horizontal parameters.
    vscale = 1.0  # Non-standard scaling of vertical parameters.
    threshold = 0.2  # baseline threshold.
    usegauss = True  # Use gaussian instead of uniform.
    maxseps = 0  # Maximum black column separators.
    sepwiden = 10  # Widen black separators (to account for warping).
    blackseps = True
    maxcolseps = 3  # Maximum # whitespace column separators.
    csminheight = 10  # Minimum column height (units=scale).
    noise = 8  # Noise threshold for removing small components from lines.
    gray_output = True  # Output grayscale lines as well, which are extracted from the grayscale version of the pages.
    pad = 3  # Padding for extracted lines.
    expand = 3  # Expand mask for grayscale extraction.

    if False:
        bin_image_filepath = './ocropy_test.bin.png'
        gray_image_filepath = './ocropy_test.nrm.png'

        binary = ocrolib.read_image_binary(bin_image_filepath)
        gray = ocrolib.read_image_gray(gray_image_filepath)

    binary = 1 - binary  # Invert.

    scale = psegutils.estimate_scale(binary)
    segmentation = compute_segmentation(binary,
                                        scale,
                                        blackseps,
                                        maxseps,
                                        maxcolseps,
                                        csminheight,
                                        sepwiden,
                                        usegauss,
                                        vscale,
                                        hscale,
                                        threshold,
                                        quiet=True)

    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # Renumber the labels so that they conform to the specs.
    nlabels = np.amax(segmentation) + 1
    renumber = np.zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]  # Image.

    lines = [lines[i] for i in lsort]

    # Visualize bounding boxes.
    if False:
        if rgb is not None:
            # REF [function] >> extract_masked() in ${OCROPY_HOME}/ocrolib/psegutils.py.
            for l in lines:
                y0, x0, y1, x1 = [
                    int(x) for x in [
                        l.bounds[0].start, l.bounds[1].start, l.bounds[0].stop,
                        l.bounds[1].stop
                    ]
                ]
                cv2.rectangle(rgb, (x0, y0), (x1, y1), (0, 0, 255), 1,
                              cv2.LINE_AA)
            cv2.imshow('Image', rgb)
            cv2.waitKey(0)

    # Output everything.
    if False:
        if not os.path.exists(outputdir):
            os.mkdir(outputdir)

        ocrolib.write_page_segmentation("%s.pseg.png" % outputdir,
                                        segmentation)
        cleaned = ocrolib.remove_noise(binary, noise)
        for i, l in enumerate(lines):
            binline = psegutils.extract_masked(1 - cleaned,
                                               l,
                                               pad=pad,
                                               expand=expand)  # Image.
            ocrolib.write_image_binary(
                "%s/01%04x.bin.png" % (outputdir, i + 1), binline)
            if gray_output:
                grayline = psegutils.extract_masked(gray,
                                                    l,
                                                    pad=pad,
                                                    expand=expand)  # Image.
                ocrolib.write_image_gray(
                    "%s/01%04x.nrm.png" % (outputdir, i + 1), grayline)
コード例 #11
0
def processPngFile(outRoot, origFile, fileNum):
    baseName = os.path.basename(origFile)
    baseBase, _ = os.path.splitext(baseName)
    outDir = os.path.join(outRoot, "%s.%03d" % (baseBase, fileNum))
    inFile = os.path.join(outDir, baseName)

    os.makedirs(outDir, exist_ok=True)
    shutil.copy(origFile, inFile)

    inBase, _ = ocrolib.allsplitext(inFile)
    print("**  inBase=%s" % inBase)
    # print("** binBase=%s" % binBase)

    fname = inFile
    outputdir = inBase
    binFile = inBase + ".bin.png"
    outFile = inBase + ".out.png"
    outRoot2, outDir2 = os.path.split(outRoot)
    outFile2 = os.path.join(outRoot2, "%s.out" % outDir2, baseName)
    print("outFile2=%s" % outFile2)
    # assert False
    grayFile = inBase + ".nrm.png"
    psegFile = inBase + ".pseg.png"
    print("  inFile=%s" % inFile)
    print(" binFile=%s" % binFile)
    print("grayFile=%s" % grayFile)
    print(" outFile=%s" % outFile)
    assert inFile and binFile
    assert outFile != inFile
    assert outFile != binFile

    if not binarize(inFile, binFile, grayFile):
        binExists = os.path.exists(binFile)
        print("Couldn't binarize inFile=%s binFile=%s exists=%s" %
              (inFile, binFile, binExists))
        return False

    binary = ocrolib.read_image_binary(binFile)
    print("$$ %s=%s" % (binFile, desc(binary)))
    height, width = binary.shape
    checktype(binary, ABINARY2)
    check = check_page(np.amax(binary) - binary)
    if check is not None:
        print("%s SKIPPED %s (use -n to disable this check)" % (inFile, check))
        return False

    # if args.gray:
    #     if os.path.exists(base+".nrm.png"):
    #         gray = ocrolib.read_image_gray(base+".nrm.png")
    #         checktype(gray, GRAYSCALE)
    #     else:
    #         print_error("Grayscale version %s.nrm.png not found. Use ocropus-nlbin for creating " +
    #                     "normalized grayscale version of the pages as well." % base)
    #         return

    binary = 1 - binary  # invert

    scale = psegutils.estimate_scale(binary)
    print("scale %f" % scale)
    if np.isnan(scale) or scale > 1000.0:
        print("%s: bad scale (%g); skipping\n" % (fname, scale))
        return False

    # find columns and text lines
    print("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if np.amax(segmentation) > maxlines:
        print("%s: too many lines %g" % (fname, np.amax(segmentation)))
        return False

    print("segmentation=%s" % desc(segmentation))
    print("number of lines %g" % np.amax(segmentation))

    # compute the reading order
    print("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)
    print("$$ lsort = %d = %s...%s" % (len(lsort), lsort[:10], lsort[-10:]))

    # renumber the labels so that they conform to the specs
    nlabels = np.amax(segmentation) + 1
    renumber = np.zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything
    print("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, noise)
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=pad,
                                           expand=expand)
        ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1),
                                   binline)
        # if args.gray:
        #     grayline = psegutils.extract_masked(
        #         gray, l, pad=args.pad, expand=args.expand)
        #     ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i+1), grayline)
    print("%6d  %s %4.1f %d" % (i, fname, scale, len(lines)))

    # to proceed, we need a pseg file and a subdirectory containing text lines
    assert os.path.exists(psegFile), "%s: no such file" % psegFile
    assert os.path.isdir(inBase), "%s: no such directory" % inBase

    # iterate through the text lines in reading order, based on the page segmentation file
    pseg = ocrolib.read_page_segmentation(psegFile)
    print("$$ %s=%s" % (psegFile, desc(pseg)))

    regions = ocrolib.RegionExtractor()
    print("$$ regions=%s" % regions)
    regions.setPageLines(pseg)

    im = Image.open(inFile)
    print("~~%s %s" % (inFile, im.size))
    print("$$ regions=%s=%s" % (regions, sorted(regions.__dict__)))
    print("$$ regions.length=%s" % regions.length())

    n = regions.length()
    for i in range(1, n):

        id = regions.id(i)
        y0, x0, y1, x1 = regions.bbox(i)
        # print("%5d: 0x%05X %s %d x %d" %
        #       (i, id, [y0, x0, y1, x1], y1 - y0, x1 - x0))

        draw = ImageDraw.Draw(im)
        draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0), width=3)
        draw.rectangle((x0, y0, x1, y1), outline=(0, 0, 255), width=0)
        # draw.rectangle((x0, y0, x1, y1), outline=255, width=5)
        # draw.rectangle((x0, y0, x1, y1), outline=10,  width=1)
        del draw

    # write output files
    print("outFile=%s" % outFile)
    im.save(outFile, "PNG")
    print("outFile2=%s" % outFile2)
    outDir2 = os.path.dirname(outFile2)
    os.makedirs(outDir2, exist_ok=True)
    im.save(outFile2, "PNG")
    assert os.path.exists(outFile2)
    # outFile3, _ = os.path.splitext(outFile)
    # outFile3 = "%s.jpg" % outFile3
    # print("outFile3=%s" % outFile3)
    # im.save(outFile3, "JPEG")
    # assert os.path.exists(outFile3)
    return True
コード例 #12
0
def Segment(fname, save_path):
    # pdb.set_trace()
    # 清理上次执行的缓存结果
    if os.path.exists(save_path):
        shutil.rmtree(save_path)
    os.makedirs(save_path)

    #读取图像数据
    raw = read_image_gray(fname)

    #二值化,抗旋转,抗明暗度变化
    gray_o, bina_o = Binarization(raw)

    #出现类型错误,返回分割失败标识0
    if gray_o is None and bina_o is None:
        new_fname = os.path.basename(fname)
        cv2.imwrite(os.path.join(save_path, new_fname))
        return 0

    #估计文本宽度
    bina_o = ocrolib.remove_noise(bina_o, 8)
    scale = psegutils.estimate_scale(bina_o)

    #页面分块
    block_grays, block_binas = split_columns_vertical(gray_o, bina_o, scale)

    if len(block_grays) > 2:  # 图片格式出现特殊情况,即指标栏之间均以垂直黑线分隔开,直接进行行分割
        mb_dics = {}  # 存储删除空白行记录后的所有文件记录
        mb_block = []  # 记录每块中的行列数
        for i, gray_i in enumerate(block_grays):
            mb_dic = {}
            bina_i = block_binas[i]

            #分离属性栏区域和核心指标栏
            cp_gray, cp_bina, mb_gray, mb_bina = get_caption_mainbody(
                gray_i, bina_i, scale)

            #行分割,并将结果分割结果及存储名称以字典形式存放
            mb_dic, max_row = mainbody_textline_segment(
                mb_gray, mb_bina, scale, i, 0, mb_dic)
            max_col = 0
            max_row += 1

            #表单结构化初步调整:调整空白栏
            mb_dics = modify_fname_dictionary(mb_dic, mb_dics)
            mb_block.append([i, max_col, max_row])

        #表单结构化后处理:合并多块
        res_mb_dics = modify_mainbody_display(mb_dics, mb_block)

        #去除分块标识
        res_mb_dics = add_flag(res_mb_dics)

    else:  # 正常的格式,即单块或者两块
        mb_dics = {}  # 存储删除空白行记录后的所有文件记录
        mb_block = []  # 记录每块中的行列数

        for i, gray_i in enumerate(block_grays):
            bina_i = block_binas[i]

            #分离属性栏区域和核心指标栏区域
            cp_gray, cp_bina, mb_gray, mb_bina = get_caption_mainbody(
                gray_i, bina_i, scale)

            #属性栏列方向分割,获得各属性分割位置,以列表形式存放
            cp_index_list = []
            if cp_bina is not None:
                cp_index_list = caption_segment(cp_bina)

            #核心指标栏区域列方向分割,获得各属性列分隔位置,并截取图像数据,以列表形式存放
            mb_grays, mb_binas = mainbody_segment(mb_gray, mb_bina, scale,
                                                  cp_index_list)

            ######----------"核心指标栏":文本行分割----------########
            max_col = 0  # 第一块中列数
            max_row = 0  # 第一块中最大行数
            mb_dic = {}  # 存储每块中的文件记录
            for j, bina_j in enumerate(mb_binas):
                if j > max_col:
                    max_col = j
                gray_j = mb_grays[j]

                #文本行分割,获得文本行分割结果,以字典形式存储
                mb_dic, row = mainbody_textline_segment(
                    gray_j, bina_j, scale, i, j, mb_dic)
                if row > max_row:
                    max_row = row

            max_col += 1
            max_row += 1

            if mb_dic is not {}:
                # 表单结构化初步调整:调整空白栏
                mb_dics = modify_fname_dictionary(mb_dic, mb_dics)
                mb_block.append([i, max_col, max_row])

        # 表单结构化后处理:合并多块
        res_mb_dics = modify_mainbody_display(mb_dics,
                                              mb_block)  # 存储经过显示调整的所有文件记录
        # 去除块标识
        res_mb_dics = add_flag(res_mb_dics)

    #根据字典数据保存分割结果,并返回分割成功标识1
    save_img_from_dic(save_path, res_mb_dics)
    return 1
コード例 #13
0
def mainbody_textline_segment(gray, bina, scale, black_id, col_id, dictionary):
    '''
    :param gray: "核心指标栏"中某属性列灰度图
    :param bina: "核心指标栏"中某属性列二值图
    :param black_id: "核心指标栏"中某属性列所属块id
    :param col_id: "核心指标栏"中某属性列所属列id
    :param dictionary: 文件存储记录
    :return: 文件存储记录和此属性列所含行数
    '''

    #排除多种干扰
    bina = 1 * (gray < 0.5)
    bina = ocrolib.remove_noise(bina, 5)  #希望排除一定的噪声干扰
    scale = psegutils.estimate_scale(bina)
    height, width = gray.shape
    lines = morph.select_regions(bina, sl.dim0,
                                 min=2 * scale)  #希望排除水平方向边缘处的亮斑干扰
    bina = bina - lines
    lines = morph.select_regions(bina, sl.dim1,
                                 min=2 * scale)  #希望排除垂直方向边缘处的亮斑干扰
    bina = bina - lines

    #字符合并
    textlines = filters.maximum_filter(bina, (0, scale))
    textlines = morph.rb_erosion(textlines, (3, 0))
    textlines = morph.rb_dilation(textlines, (0, scale))

    #统计文本行位置
    textpixe_num = np.sum(textlines, axis=1)
    textpixe_num = 1 * ((1.0 * textpixe_num / scale) > 1)
    textpixe_num = list(textpixe_num)

    text_index = [i for i, a in enumerate(textpixe_num) if a == 1]
    indexs = []
    max_row = 0
    if len(text_index) > 0:
        beg_index = text_index[0]
        end_index = text_index[0]
        for i in range(1, len(text_index) - 1):
            if text_index[i] - text_index[i - 1] != 1:
                end_index = text_index[i - 1]
                indexs.append([beg_index, end_index])
                beg_index = text_index[i]
            end_index = text_index[i]
        indexs.append([beg_index, end_index])

        #选取有效的文本行
        results_indexs = []
        if len(indexs) > 0:
            for index in indexs:
                if index[1] - index[0] >= scale / 4:
                    results_indexs.append(index)

        # res_index = []
        # if len(results_indexs)>0:
        #     i=0
        #     beg_index=results_indexs[i][0]/2
        #     for i in range(len(results_indexs)-1):
        #         end_index=(results_indexs[i][1]+results_indexs[i+1][0])/2
        #         res_index.append([beg_index, end_index])
        #         beg_index = end_index
        #     if i==0:
        #         end_index = (results_indexs[i][1] + height) / 2
        #     else:
        #         end_index = (results_indexs[i+1][1] + height) / 2
        #
        #     res_index.append([beg_index,end_index])

        for row_id, index in enumerate(results_indexs):
            key = '%d.%d.%d.png' % (black_id, col_id, row_id)
            data = 255 * gray[max(0, index[0] - 5):min(height, index[1] +
                                                       5), :]
            value = name_dic(index, data)
            dictionary[key] = value
            max_row = row_id
    return dictionary, max_row
コード例 #14
0
def process(job):
    imagepath, i = job
    global base
    base, _ = ocrolib.allsplitext(imagepath)
    outputdir = base
    imagename_base = os.path.basename(os.path.normpath(base))

    try:
        binary = ocrolib.read_image_binary(imagepath)
    except IOError:
        if ocrolib.trace: traceback.print_exc()
        print_error("cannot open either %s.bin.png or %s" % (base, imagepath))
        return

    checktype(binary, ABINARY2)

    if not args['nocheck']:
        check = check_page(amax(binary) - binary)
        if check is not None:
            print_error("%s SKIPPED %s (use -n to disable this check)" %
                        (imagepath, check))
            return

    binary = 1 - binary  # invert

    if args['scale'] == 0:
        scale = psegutils.estimate_scale(binary)
    else:
        scale = args['scale']
    print_info("scale %f" % (scale))
    if isnan(scale) or scale > 1000.0:
        print_error("%s: bad scale (%g); skipping\n" % (imagepath, scale))
        return
    if scale < args['minscale']:
        print_error("%s: scale (%g) less than --minscale; skipping\n" %
                    (imagepath, scale))
        return

    # find columns and text lines

    if not args['quiet']: print_info("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if amax(segmentation) > args['maxlines']:
        print_error("%s: too many lines %g" % (imagepath, amax(segmentation)))
        return
    if not args['quiet']: print_info("number of lines %g" % amax(segmentation))

    # compute the reading order

    if not args['quiet']: print_info("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # renumber the labels so that they conform to the specs

    nlabels = amax(segmentation) + 1
    renumber = zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything
    if not args['quiet']: print_info("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, args['noise'])
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=args['pad'],
                                           expand=args['expand'])
        ocrolib.write_image_binary(
            "%s/%s_01%04x.bin.png" % (outputdir, imagename_base, i + 1),
            binline)
    print_info("%6d  %s %4.1f %d" % (i, imagepath, scale, len(lines)))
    return outputdir
コード例 #15
0
def process1(job):
    fname, i = job
    global base
    base, _ = ocrolib.allsplitext(fname)
    outputdir = base

    try:
        binary = ocrolib.read_image_binary(base + ".bin.png")
    except IOError:
        try:
            binary = ocrolib.read_image_binary(fname)
        except IOError:
            if ocrolib.trace:
                traceback.print_exc()
            print("cannot open either", base + ".bin.png", "or", fname)
            return

    checktype(binary, ABINARY2)

    if not args.nocheck:
        check = check_page(amax(binary) - binary)
        if check is not None:
            print(fname, "SKIPPED", check, "(use -n to disable this check)")
            return

    if args.gray:
        if os.path.exists(base + ".nrm.png"):
            gray = ocrolib.read_image_gray(base + ".nrm.png")
        checktype(gray, GRAYSCALE)

    binary = 1 - binary  # invert

    if args.scale == 0:
        scale = psegutils.estimate_scale(binary)
    else:
        scale = args.scale
    print("scale", scale)
    if isnan(scale) or scale > 1000.0:
        sys.stderr.write("%s: bad scale (%g); skipping\n" % (fname, scale))
        return
    if scale < args.minscale:
        sys.stderr.write("%s: scale (%g) less than --minscale; skipping\n" %
                         (fname, scale))
        return

    # find columns and text lines

    if not args.quiet:
        print("computing segmentation")
    segmentation = compute_segmentation(binary, scale)
    if amax(segmentation) > args.maxlines:
        print(fname, ": too many lines", amax(segmentation))
        return
    if not args.quiet:
        print("number of lines", amax(segmentation))

    # compute the reading order

    if not args.quiet:
        print("finding reading order")
    lines = psegutils.compute_lines(segmentation, scale)
    order = psegutils.reading_order([l.bounds for l in lines])
    lsort = psegutils.topsort(order)

    # renumber the labels so that they conform to the specs

    nlabels = amax(segmentation) + 1
    renumber = zeros(nlabels, 'i')
    for i, v in enumerate(lsort):
        renumber[lines[v].label] = 0x010000 + (i + 1)
    segmentation = renumber[segmentation]

    # finally, output everything

    if not args.quiet:
        print("writing lines")
    if not os.path.exists(outputdir):
        os.mkdir(outputdir)
    lines = [lines[i] for i in lsort]
    ocrolib.write_page_segmentation("%s.pseg.png" % outputdir, segmentation)
    cleaned = ocrolib.remove_noise(binary, args.noise)
    for i, l in enumerate(lines):
        binline = psegutils.extract_masked(1 - cleaned,
                                           l,
                                           pad=args.pad,
                                           expand=args.expand)
        ocrolib.write_image_binary("%s/01%04x.bin.png" % (outputdir, i + 1),
                                   binline)
        if args.gray:
            grayline = psegutils.extract_masked(gray,
                                                l,
                                                pad=args.pad,
                                                expand=args.expand)
            ocrolib.write_image_gray("%s/01%04x.nrm.png" % (outputdir, i + 1),
                                     grayline)
    print("%6d" % i, fname, "%4.1f" % scale, len(lines))
コード例 #16
0
    def _process_segment(self, page_image, page, region_xywh, page_id,
                         input_file, n):
        binary = ocrolib.pil2array(page_image)
        binary = np.array(1 - binary / np.amax(binary), 'B')
        if page.get_TextRegion() is None or len(page.get_TextRegion()) < 1:
            min_x, max_x = (0, binary.shape[0])
            min_y, max_y = (0, binary.shape[1])
            textregion = TextRegionType(
                Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                  (min_x, min_y, max_x, min_y, max_x, max_y,
                                   min_x, max_y)))
            page.add_TextRegion(textregion)
        else:
            textregion = page.get_TextRegion()[-1]
        ocrolib.write_image_binary("test.bin.png", binary)
        if self.parameter['scale'] == 0:
            scale = psegutils.estimate_scale(binary)
        else:
            scale = self.parameter['scale']
        if np.isnan(
                scale) or scale > 1000.0 or scale < self.parameter['minscale']:
            LOG.warning("%s: bad scale (%g); skipping\n" % (fname, scale))
            return

        segmentation = self.compute_segmentation(binary, scale)
        if np.amax(segmentation) > self.parameter['maxlines']:
            LOG.warning("%s: too many lines %i",
                        (fname, np.amax(segmentation)))
            return
        lines = psegutils.compute_lines(segmentation, scale)
        order = psegutils.reading_order([l.bounds for l in lines])
        lsort = psegutils.topsort(order)

        # renumber the labels so that they conform to the specs

        nlabels = np.amax(segmentation) + 1
        renumber = np.zeros(nlabels, 'i')
        for i, v in enumerate(lsort):
            renumber[lines[v].label] = 0x010000 + (i + 1)
        segmentation = renumber[segmentation]

        lines = [lines[i] for i in lsort]
        cleaned = ocrolib.remove_noise(binary, self.parameter['noise'])
        region_xywh['features'] += ",textline"
        for i, l in enumerate(lines):
            ocrolib.write_image_binary("test.bin.png", binary[l.bounds[0],
                                                              l.bounds[1]])
            min_x, max_x = (l.bounds[0].start, l.bounds[0].stop)
            min_y, max_y = (l.bounds[1].start, l.bounds[1].stop)

            img = binary[l.bounds[0], l.bounds[1]]
            img = np.array(255 * (img > ocrolib.midrange(img)), 'B')
            img = ocrolib.array2pil(img)

            file_id = input_file.ID.replace(self.input_file_grp,
                                            self.image_grp)
            if file_id == input_file.ID:
                file_id = concat_padded(self.image_grp, n)

            file_path = self.workspace.save_image_file(img,
                                                       file_id + "_" + str(i),
                                                       page_id=page_id,
                                                       file_grp=self.image_grp)
            ai = AlternativeImageType(filename=file_path,
                                      comments=region_xywh['features'])
            line = TextLineType(
                Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" %
                                  (min_x, min_y, max_x, min_y, max_x, max_y,
                                   min_x, max_y)))
            line.add_AlternativeImage(ai)
            textregion.add_TextLine(line)