def compute_segmentation(binary, scale, blackseps, maxseps, maxcolseps, csminheight, sepwiden, usegauss, vscale, hscale, threshold, quiet): """Given a binary image, compute a complete segmentation into lines, computing both columns and text lines.""" binary = np.array(binary, 'B') # start by removing horizontal black lines, which only # interfere with the rest of the page segmentation binary = remove_hlines(binary, scale) # do the column finding if not quiet: print_info("computing column separators") colseps, binary = compute_colseps(binary, scale, blackseps, maxseps, maxcolseps, csminheight, sepwiden) # now compute the text line seeds if not quiet: print_info("computing lines") bottom, top, boxmap = compute_gradmaps(binary, scale, usegauss, vscale, hscale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale, threshold, vscale) DSAVE("seeds", [bottom, top, boxmap]) # spread the text line seeds to all the remaining # components if not quiet: print_info("propagating labels") llabels = morph.propagate_labels(boxmap, seeds, conflict=0) if not quiet: print_info("spreading labels") spread = morph.spread_labels(seeds, maxdist=scale) llabels = np.where(llabels > 0, llabels, spread * binary) segmentation = llabels * binary return segmentation
def compute_segmentation(binary, scale): """Given a binary image, compute a complete segmentation into lines, computing both columns and text lines. """ print("$$ compute_segmentation: %s %g" % (desc(binary), scale)) binary = np.array(binary, 'B') # start by removing horizontal black lines, which only interfere with the rest of the page # segmentation binary = remove_hlines(binary, scale) # do the column finding print("computing column separators") colseps, binary = compute_colseps(binary, scale) # now compute the text line seeds print("computing lines") bottom, top, boxmap = compute_gradmaps(binary, scale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale) print("seeds=%s" % desc(seeds)) DSAVE("seeds", [bottom, top, boxmap]) # spread the text line seeds to all the remaining components print("propagating labels") llabels = morph.propagate_labels(boxmap, seeds, conflict=0) print("spreading labels: llabels=%s" % desc(llabels)) spread = morph.spread_labels(seeds, maxdist=scale) llabels = np.where(llabels > 0, llabels, spread * binary) segmentation = llabels * binary print("$$ llabels: %s" % desc(llabels)) print("$$ segmentation: %s" % desc(segmentation)) return segmentation
def text_line_segmentation(binary, scale=None, gray=None, num_col = 1): """Given a binary image, compute a complete segmentation into lines, computing both columns and text lines.""" binary = array(binary, 'B') if scale is None: scale = psegutils.estimate_scale(binary) # do the column finding if num_col > 1: colseps, binary = compute_colseps(binary, scale) else: colseps = np.zeros(binary.shape) # now compute the text line seeds bottom, top, boxmap = compute_gradmaps(binary, scale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale) # spread the text line seeds to all the remaining components llabels = morph.propagate_labels(boxmap, seeds, conflict=0) spread = morph.spread_labels(seeds, maxdist=scale) llabels = where(llabels > 0, llabels, spread * binary) segmentation = llabels * binary lines = psegutils.compute_lines(segmentation, scale, 0.8) line_ims = [] for l in lines: if gray is None: binline = psegutils.extract_masked(1-binary, l, pad=0) else: binline = psegutils.extract_masked(gray, l, pad=0) binline = pad_by(binline, 10, invert=False) line_ims.append(binline) return line_ims, lines
def compute_segmentation(binary, scale): """Given a binary image, compute a complete segmentation into lines, computing both columns and text lines.""" binary = array(binary, 'B') # start by removing horizontal black lines, which only # interfere with the rest of the page segmentation binary = remove_hlines(binary, scale) # do the column finding if not args.quiet: print("computing column separators") colseps, binary = compute_colseps(binary, scale) # now compute the text line seeds if not args.quiet: print("computing lines") bottom, top, boxmap = compute_gradmaps(binary, scale) seeds = compute_line_seeds(binary, bottom, top, colseps, scale) ####imsave('/home/gupta/Documents/combinedseeds.png', [bottom,top,boxmap]) # DSAVE("seeds",[bottom,top,boxmap]) # spread the text line seeds to all the remaining # components if not args.quiet: print("propagating labels") llabels = morph.propagate_labels(boxmap, seeds, conflict=0) if not args.quiet: print("spreading labels") spread = morph.spread_labels(seeds, maxdist=scale) llabels = where(llabels > 0, llabels, spread * binary) segmentation = llabels * binary return segmentation
def compute_segmentation(self, binary, scale): """Given a binary image, compute a complete segmentation into lines, computing both columns and text lines.""" binary = np.array(binary, 'B') # start by removing horizontal black lines, which only # interfere with the rest of the page segmentation binary = self.remove_hlines(binary, scale) # do the column finding log.debug("computing column separators") colseps, binary = self.compute_colseps(binary, scale) # now compute the text line seeds log.debug("computing lines") bottom, top, boxmap = self.compute_gradmaps(binary, scale) log.debug("bottom=%s top=%s boxmap=%s", bottom, top, boxmap) seeds = self.compute_line_seeds(binary, bottom, top, colseps, scale) # spread the text line seeds to all the remaining # components log.debug("propagating labels") llabels = morph.propagate_labels(boxmap, seeds, conflict=0) log.debug("spreading labels") spread = morph.spread_labels(seeds, maxdist=scale) llabels = np.where(llabels > 0, llabels, spread * binary) segmentation = llabels * binary return segmentation
def compute_segmentation(binary,scale): """Given a binary image, compute a complete segmentation into lines, computing both columns and text lines.""" binary = array(binary,'B') # start by removing horizontal black lines, which only # interfere with the rest of the page segmentation binary = remove_hlines(binary,scale) # do the column finding colseps,binary = compute_colseps(binary,scale) # now compute the text line seeds bottom,top,boxmap = compute_gradmaps(binary,scale) seeds = compute_line_seeds(binary,bottom,top,colseps,scale) #DSAVE("seeds",[bottom,top,boxmap]) # spread the text line seeds to all the remaining # components llabels = morph.propagate_labels(boxmap,seeds,conflict=0) spread = morph.spread_labels(seeds,maxdist=scale) llabels = where(llabels>0,llabels,spread*binary) segmentation = llabels*binary return segmentation