Ejemplos de select_regions en Python, ejemplos de ocrolib.morph.select_regions en Python

Ejemplo n.º 1

0

Mostrar archivo

def caption_segment(binary):
    '''
    :param gray:待分析的析"标题栏"
    :param bina:
    :return:
    '''
    # 排除边界处干扰部分
    bina = ocrolib.remove_noise(binary, 8)
    scale = psegutils.estimate_scale(bina)
    lines = morph.select_regions(bina, sl.dim1, min=2 * scale)
    bina = bina - lines
    bina = morph.select_regions(bina, sl.dim0, min=scale / 3)
    #扩大文本区域,连接相邻文本
    textlines = filters.maximum_filter(bina, (scale, scale / 2))
    #计算候选文本区域起始位置
    indexs_white = compute_index(textlines, th=scale / 2, n=1)
    indexs_lists = []
    if len(indexs_white) > 2:
        index_fir = indexs_white[0]
        #排除过小同时连接相邻的候选文本区域
        for i, index in enumerate(indexs_white):
            if index[1] - index[0] > scale / 2:  #排除过小
                if i != 0 and index[0] - index_fir[1] < scale / 3:  #连接相近

                    index_acc = [index_fir[0], index[1]]
                    indexs_lists.remove(index_fir)
                    indexs_lists.append(index_acc)
                    index_fir = index_acc
                else:
                    indexs_lists.append(index)
                    index_fir = index
    return indexs_lists

Ejemplo n.º 2

0

Mostrar archivo

Archivo: process_images.py Proyecto: KayneWest/basicocr

def compute_separators_morph(binary,scale,sepwiden=10,maxseps=2):
    """Finds vertical black lines corresponding to column separators."""
    d0 = int(max(5,scale/4))
    d1 = int(max(5,scale))+sepwiden
    thick = morph.r_dilation(binary,(d0,d1))
    vert = morph.rb_opening(thick,(10*scale,1))
    vert = morph.r_erosion(vert,(d0//2,sepwiden))
    vert = morph.select_regions(vert,sl.dim1,min=3,nbest=2*maxseps)
    vert = morph.select_regions(vert,sl.dim0,min=20*scale,nbest=maxseps)
    return vert

Ejemplo n.º 3

0

Mostrar archivo

def compute_separators_morph(binary, scale, sepwiden, maxseps):
    """Finds vertical black lines corresponding to column separators."""
    d0 = int(max(5, scale / 4))
    d1 = int(max(5, scale)) + sepwiden
    thick = morph.r_dilation(binary, (d0, d1))
    vert = morph.rb_opening(thick, (10 * scale, 1))
    vert = morph.r_erosion(vert, (d0 // 2, sepwiden))
    vert = morph.select_regions(vert, sl.dim1, min=3, nbest=2 * maxseps)
    vert = morph.select_regions(vert, sl.dim0, min=20 * scale, nbest=maxseps)
    return vert

Ejemplo n.º 4

0

Mostrar archivo

def compute_colseps_conv(binary, csminheight, maxcolseps, scale=1.0):
    """Find column separators by convolution and
	thresholding."""
    h, w = binary.shape
    # find vertical whitespace by thresholding
    smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5))
    smoothed = uniform_filter(smoothed, (5.0 * scale, 1))
    thresh = (smoothed < np.amax(smoothed) * 0.1)
    DSAVE("1thresh", thresh)
    # find column edges by filtering
    grad = gaussian_filter(1.0 * binary, (scale, scale * 0.5), order=(0, 1))
    grad = uniform_filter(grad, (10.0 * scale, 1))
    # grad = abs(grad) # use this for finding both edges
    grad = (grad > 0.5 * np.amax(grad))
    DSAVE("2grad", grad)
    # combine edges and whitespace
    seps = np.minimum(thresh, maximum_filter(grad,
                                             (int(scale), int(5 * scale))))
    seps = maximum_filter(seps, (int(2 * scale), 1))
    DSAVE("3seps", seps)
    # select only the biggest column separators
    seps = morph.select_regions(seps,
                                sl.dim0,
                                min=csminheight * scale,
                                nbest=maxcolseps)
    DSAVE("4seps", seps)
    return seps

Ejemplo n.º 5

0

Mostrar archivo

def compute_colseps_conv(binary, scale=1.0, csminheight=10, maxcolseps=3):
    """Find column separators by convolution and thresholding.
        csminheight: minimum column height (units=scale)
        maxcolseps: maximum # whitespace column separators
    """
    h, w = binary.shape
    # find vertical whitespace by thresholding
    assert np.array_equal(binary, 1.0 * binary)
    smoothed = filters.gaussian_filter(binary, sigma=(scale, scale * 0.5))
    smoothed = filters.uniform_filter(smoothed, size=(5.0 * scale, 1))
    thresh = smoothed < np.amax(smoothed) * 0.1
    DSAVE("1thresh", thresh)
    # find column edges by filtering
    grad = filters.gaussian_filter(binary, (scale, scale * 0.5), order=(0, 1))
    grad = filters.uniform_filter(grad, (10.0 * scale, 1))
    # grad = abs(grad) # use this for finding both edges
    grad = (grad > 0.5 * np.amax(grad))
    DSAVE("2grad", grad)
    # combine edges and whitespace
    seps = np.minimum(
        thresh, filters.maximum_filter(grad, (int(scale), int(5 * scale))))
    seps = filters.maximum_filter(seps, (int(2 * scale), 1))
    DSAVE("3seps", seps)
    # select only the biggest column separators
    seps = morph.select_regions(seps,
                                sl.dim0,
                                min=csminheight * scale,
                                nbest=maxcolseps)
    DSAVE("4seps", seps)
    return seps

Ejemplo n.º 6

0

Mostrar archivo

def compute_colseps_morph(binary, scale, maxseps=3, minheight=20, maxwidth=5):
    """Finds extended vertical whitespace corresponding to column separators
    using morphological operations."""
    boxmap = psegutils.compute_boxmap(binary, scale, (0.4, 5), dtype='B')
    bounds = morph.rb_closing(B(boxmap), (int(5 * scale), int(5 * scale)))
    bounds = maximum(B(1 - bounds), B(boxmap))
    cols = 1 - morph.rb_closing(boxmap, (int(20 * scale), int(scale)))
    cols = morph.select_regions(cols, sl.aspect, min=args.csminaspect)
    cols = morph.select_regions(cols,
                                sl.dim0,
                                min=args.csminheight * scale,
                                nbest=args.maxcolseps)
    cols = morph.r_erosion(cols, (int(0.5 + scale), 0))
    cols = morph.r_dilation(cols, (int(0.5 + scale), 0),
                            origin=(int(scale / 2) - 1, 0))
    return cols

Ejemplo n.º 7

0

Mostrar archivo

 def compute_colseps_morph(self, binary, scale):
     """Finds extended vertical whitespace corresponding to column separators
     using morphological operations."""
     boxmap = psegutils.compute_boxmap(binary, scale, dtype='B')
     bounds = morph.rb_closing(B(boxmap), (int(5 * scale), int(5 * scale)))
     bounds = np.maximum(B(1 - bounds), B(boxmap))
     cols = 1 - morph.rb_closing(boxmap, (int(20 * scale), int(scale)))
     cols = morph.select_regions(cols,
                                 sl.aspect,
                                 min=self.parameter['csminaspect'])
     cols = morph.select_regions(cols,
                                 sl.dim0,
                                 min=self.parameter['csminheight'] * scale,
                                 nbest=self.parameter['maxcolseps'])
     cols = morph.r_erosion(cols, (int(0.5 + scale), 0))
     cols = morph.r_dilation(cols, (int(0.5 + scale), 0),
                             origin=(int(scale / 2) - 1, 0))
     return cols

Ejemplo n.º 8

0

Mostrar archivo

def compute_colseps_mconv(binary, scale=1.0):
    """Find column separators using a combination of morphological
    operations and convolution."""
    smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5))
    smoothed = uniform_filter(smoothed, (5.0 * scale, 1))
    thresh = (smoothed < amax(smoothed) * 0.1)
    blocks = morph.r_closing(binary, (int(4 * scale), int(4 * scale)))
    seps = minimum(blocks, thresh)
    seps = morph.select_regions(seps, sl.dim0, min=10 * scale, nbest=3)
    blocks = morph.r_dilation(blocks, (5, 5))
    seps = maximum(seps, 1 - blocks)
    return seps

Ejemplo n.º 9

0

Mostrar archivo

 def compute_colseps_mconv(self, binary, scale=1.0):
     """Find column separators using a combination of morphological
     operations and convolution."""
     #  h, w = binary.shape
     smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5))
     smoothed = uniform_filter(smoothed, (5.0 * scale, 1))
     thresh = (smoothed < np.amax(smoothed) * 0.1)
     blocks = morph.rb_closing(binary, (int(4 * scale), int(4 * scale)))
     seps = np.minimum(blocks, thresh)
     seps = morph.select_regions(seps,
                                 sl.dim0,
                                 min=self.parameter['csminheight'] * scale,
                                 nbest=self.parameter['maxcolseps'])
     blocks = morph.r_dilation(blocks, (5, 5))
     seps = np.maximum(seps, 1 - blocks)
     return seps

Ejemplo n.º 10

0

Mostrar archivo

def compute_colseps_conv(binary, scale=1.0):
    """Find column separators by convoluation and
    thresholding."""
    # find vertical whitespace by thresholding
    smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5))
    smoothed = uniform_filter(smoothed, (5.0 * scale, 1))
    thresh = (smoothed < amax(smoothed) * 0.1)
    # find column edges by filtering
    grad = gaussian_filter(1.0 * binary, (scale, scale * 0.5), order=(0, 1))
    grad = uniform_filter(grad, (10.0 * scale, 1))
    # grad = abs(grad) # use this for finding both edges
    grad = (grad > 0.5 * amax(grad))
    # combine edges and whitespace
    seps = minimum(thresh, maximum_filter(grad, (int(scale), int(5 * scale))))
    seps = maximum_filter(seps, (int(2 * scale), 1))
    # select only the biggest column separators
    seps = morph.select_regions(seps, sl.dim0, min=10 * scale, nbest=3)
    return seps

Ejemplo n.º 11

0

Mostrar archivo

def compute_colseps_mconv(binary, scale=1.0):
    """Find column separators using a combination of morphological
    operations and convolution."""
    h, w = binary.shape
    smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5))
    smoothed = uniform_filter(smoothed, (5.0 * scale, 1))
    thresh = (smoothed < amax(smoothed) * 0.1)
    DSAVE("1thresh", thresh)
    blocks = morph.rb_closing(binary, (int(4 * scale), int(4 * scale)))
    DSAVE("2blocks", blocks)
    seps = minimum(blocks, thresh)
    seps = morph.select_regions(seps,
                                sl.dim0,
                                min=args['csminheight'] * scale,
                                nbest=args['maxcolseps'])
    DSAVE("3seps", seps)
    blocks = morph.r_dilation(blocks, (5, 5))
    DSAVE("4blocks", blocks)
    seps = maximum(seps, 1 - blocks)
    DSAVE("5combo", seps)
    return seps

Ejemplo n.º 12

0

Mostrar archivo

Archivo: process_images.py Proyecto: KayneWest/basicocr

def compute_colseps_conv(binary,scale=1.0,csminheight=10,maxcolseps=2):
    """Find column separators by convolution and
    thresholding."""
    h,w = binary.shape
    # find vertical whitespace by thresholding
    smoothed = gaussian_filter(1.0*binary,(scale,scale*0.5))
    smoothed = uniform_filter(smoothed,(5.0*scale,1))
    thresh = (smoothed<amax(smoothed)*0.1)
    #DSAVE("1thresh",thresh)
    # find column edges by filtering
    grad = gaussian_filter(1.0*binary,(scale,scale*0.5),order=(0,1))
    grad = uniform_filter(grad,(10.0*scale,1))
    # grad = abs(grad) # use this for finding both edges
    grad = (grad>0.5*amax(grad))
    #DSAVE("2grad",grad)
    # combine edges and whitespace
    seps = minimum(thresh,maximum_filter(grad,(int(scale),int(5*scale))))
    seps = maximum_filter(seps,(int(2*scale),1))
    #DSAVE("3seps",seps)
    # select only the biggest column separators
    seps = morph.select_regions(seps,sl.dim0,min=csminheight*scale,nbest=maxcolseps+1)
    #DSAVE("4seps",seps)
    return seps

Ejemplo n.º 13

0

Mostrar archivo

Archivo: ocrd_anybaseocr_textline.py Proyecto: profintegra/ocrd_anybaseocr

    def compute_colseps_conv(self, binary, scale=1.0):
        """Find column separators by convoluation and
        thresholding."""
        h, w = binary.shape

        # find vertical whitespace by thresholding
        smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5))
        smoothed = uniform_filter(smoothed, (5.0 * scale, 1))
        thresh = (smoothed < np.amax(smoothed) * 0.1)

        # find column edges by filtering
        grad = gaussian_filter(1.0 * binary, (scale, scale * 0.5),
                               order=(0, 1))
        grad = uniform_filter(grad, (10.0 * scale, 1))
        grad = (grad > 0.25 * np.amax(grad))
        grad1 = morph.select_regions(grad,
                                     sl.dim0,
                                     min=self.parameter['csminheight'] * scale,
                                     nbest=self.parameter['maxcolseps'] + 10)

        x = (1 - thresh) * (1 - grad1)
        thresh11 = (1 - thresh) * x

        for r in range(0, len(thresh11)):
            count = 0
            for c in range(0, len(thresh11[0])):
                if (thresh11[r][c] == 1):
                    continue
                count += 1
                if (c != len(thresh11[0]) - 1 and thresh11[r][c + 1] == 1):
                    if (count <= 50):
                        for z in range(c - count, c + 1):
                            thresh11[r][z] = 1
                    count = 0

        y = 1 - (thresh11 * (1 - thresh))

        # combine edges and whitespace
        seps = np.minimum(thresh,
                          maximum_filter(grad, (int(scale), int(5 * scale))))
        seps = maximum_filter(seps, (int(2 * scale), 1))

        h, w = seps.shape
        smoothed = gaussian_filter(1.0 * seps, (scale, scale * 0.5))
        smoothed = uniform_filter(smoothed, (5.0 * scale, 1))
        seps1 = (smoothed < np.amax(smoothed) * 0.1)
        seps1 = 1 - seps1

        seps1 = (grad) * seps1

        for c in range(0, len(seps1[0])):
            count = 0
            for r in range(0, len(seps1)):
                if (seps1[r][c] == 1):
                    continue
                count += 1
                if (r != len(seps1) - 1 and seps1[r + 1][c] == 1):
                    if (count <= 400):  # by making it 300 u can improve
                        for z in range(r - count, r + 1):
                            seps1[z][c] = 1
                    count = 0

        seps1 = morph.select_regions(seps1,
                                     sl.dim0,
                                     min=self.parameter['csminheight'] * scale,
                                     nbest=self.parameter['maxcolseps'] + 10)
        seps1 = (seps1 * (1 - y)) + seps1
        for c in range(0, len(seps1[0])):
            for r in range(0, len(seps1)):
                if (seps1[r][c] != 0):
                    seps1[r][c] = 1

        for c in range(0, len(seps1[0])):
            count = 0
            for r in range(0, len(seps1)):
                if (seps1[r][c] == 1):
                    continue
                count += 1
                if (r != len(seps1) - 1 and seps1[r + 1][c] == 1):
                    if (count <= 350):
                        for z in range(r - count, r + 1):
                            seps1[z][c] = 1
                    count = 0

        return seps1

Ejemplo n.º 14

0

Mostrar archivo

def mainbody_textline_segment(gray, bina, scale, black_id, col_id, dictionary):
    '''
    :param gray: "核心指标栏"中某属性列灰度图
    :param bina: "核心指标栏"中某属性列二值图
    :param black_id: "核心指标栏"中某属性列所属块id
    :param col_id: "核心指标栏"中某属性列所属列id
    :param dictionary: 文件存储记录
    :return: 文件存储记录和此属性列所含行数
    '''

    #排除多种干扰
    bina = 1 * (gray < 0.5)
    bina = ocrolib.remove_noise(bina, 5)  #希望排除一定的噪声干扰
    scale = psegutils.estimate_scale(bina)
    height, width = gray.shape
    lines = morph.select_regions(bina, sl.dim0,
                                 min=2 * scale)  #希望排除水平方向边缘处的亮斑干扰
    bina = bina - lines
    lines = morph.select_regions(bina, sl.dim1,
                                 min=2 * scale)  #希望排除垂直方向边缘处的亮斑干扰
    bina = bina - lines

    #字符合并
    textlines = filters.maximum_filter(bina, (0, scale))
    textlines = morph.rb_erosion(textlines, (3, 0))
    textlines = morph.rb_dilation(textlines, (0, scale))

    #统计文本行位置
    textpixe_num = np.sum(textlines, axis=1)
    textpixe_num = 1 * ((1.0 * textpixe_num / scale) > 1)
    textpixe_num = list(textpixe_num)

    text_index = [i for i, a in enumerate(textpixe_num) if a == 1]
    indexs = []
    max_row = 0
    if len(text_index) > 0:
        beg_index = text_index[0]
        end_index = text_index[0]
        for i in range(1, len(text_index) - 1):
            if text_index[i] - text_index[i - 1] != 1:
                end_index = text_index[i - 1]
                indexs.append([beg_index, end_index])
                beg_index = text_index[i]
            end_index = text_index[i]
        indexs.append([beg_index, end_index])

        #选取有效的文本行
        results_indexs = []
        if len(indexs) > 0:
            for index in indexs:
                if index[1] - index[0] >= scale / 4:
                    results_indexs.append(index)

        # res_index = []
        # if len(results_indexs)>0:
        #     i=0
        #     beg_index=results_indexs[i][0]/2
        #     for i in range(len(results_indexs)-1):
        #         end_index=(results_indexs[i][1]+results_indexs[i+1][0])/2
        #         res_index.append([beg_index, end_index])
        #         beg_index = end_index
        #     if i==0:
        #         end_index = (results_indexs[i][1] + height) / 2
        #     else:
        #         end_index = (results_indexs[i+1][1] + height) / 2
        #
        #     res_index.append([beg_index,end_index])

        for row_id, index in enumerate(results_indexs):
            key = '%d.%d.%d.png' % (black_id, col_id, row_id)
            data = 255 * gray[max(0, index[0] - 5):min(height, index[1] +
                                                       5), :]
            value = name_dic(index, data)
            dictionary[key] = value
            max_row = row_id
    return dictionary, max_row

Ejemplo n.º 15

0

Mostrar archivo

def mainbody_segment(gray, binary, scale, index_list):
    '''
    :param gray:
    :param binary: 待测的"核心指标栏"
    :param scale: 字符宽度, float类型
    :param index_list: "标题栏"中文本沿着水平方向所在的位置列表, list类型
    :return: 沿着空白区域分割的分割图, array类型
    '''

    # 当存在"标题栏"情况下,计算垂直空白分割位置
    def search_sep_index1(bina, th, n=2):
        '''
        :param bina:待检测图像
        :param n: 匹配的模板列数
        :return:  返回待测图中连续n列白点最少的位置,如果存在多个最少,则取两个最少中间的位置
        '''
        height, width = bina.shape
        beg_index = []
        end_index = []
        min_sum = n * height
        all_sum = np.sum(bina, axis=0)
        for i in range(0, width - 1):  #以非重复方式递进
            num_sum = sum(all_sum[i:i + n])
            if num_sum < min_sum:
                min_sum = num_sum
                beg_index = [i, i + 1]
                end_index = [i, i + 1]
            elif num_sum == min_sum:
                end_index = [i, i + 1]
        if len(beg_index) > 0 and len(end_index) > 0:
            res_index = (beg_index[1] + end_index[0]) / 2

            if np.sum(bina[:, res_index]) < th:  #白像素个数小于一定数目,才认为是真的分割位置
                return res_index
        else:
            return None

    # 当不存在"标题栏"情况下,计算垂直白色空白位置
    def search_sep_index2(bina, scale):
        '''
        :param binary:待检测的"核心指标"图,array类型
        :param scale: 字体宽度, float类型
        :return: 返回待测图中各空白区域处的中间位置, int类型
        '''
        indexs = np.sum(bina, axis=0)

        indexs = list(1.0 * indexs / scale)  # 排除噪声干扰:当某列中像素点数小于一定量时候,排除干扰
        text_index_temp = [i for i, index in enumerate(indexs)
                           if index > 1]  # 候选文本位置列表

        text_index_acct = []  # 真正的文本位置列表

        if len(text_index_temp) > 0:
            beg_index = text_index_temp[0]
            end_index = text_index_temp[0]
            for i in range(1, len(text_index_temp)):
                end_index = text_index_temp[i]
                if text_index_temp[i] - text_index_temp[
                        i - 1] > 4:  # 当文本间隔超过一定阈值时候,才认为文本从新开始
                    end_index = text_index_temp[i - 1]
                    if end_index - beg_index > scale:  # 当文本宽度大于一个字符跨度时候,才认为是真正的文本
                        text_index_acct.append([beg_index, end_index])
                    beg_index = text_index_temp[i]
            text_index_acct.append([beg_index, end_index])
        text_index_acct.sort(key=lambda x: x[0])
        res_index = []
        for i in range(len(text_index_acct) - 1):
            index = (text_index_acct[i + 1][0] + text_index_acct[i][1]) / 2
            res_index.append(index)
        return res_index

    # import pdb
    # pdb.set_trace()
    # 排除边界处干扰部分
    bina = ocrolib.remove_noise(binary, 8)
    lines = morph.select_regions(bina, sl.dim1, min=2 * scale)
    bina = bina - lines
    lines = morph.select_regions(bina, sl.dim0, min=2 * scale)
    bina = bina - lines

    # 存在"标题栏"
    if 6 > len(index_list) > 3:
        colsep_index = []
        # 线性扩张:白色区域变大
        bina_d = filters.maximum_filter(bina, (scale, scale))
        i = 0
        while len(index_list):
            # 取标题栏中对应的连续两个位置的中间值,组成新的位置,作为待测位置
            # eg:假设"标题栏"中两个相邻的文本区域,在对水平方向应的位置分别是是[x00,x01],[x10,x11],
            # 则:分割线应该出现在"核心指标栏"中水平方向[(x00+x01)/2, (x10+x11)/2]范围内.
            # TODO:此处有个bug,即当标题栏只有一个属性的情况下.该如何分割指标区域
            sep_index = None
            while sep_index is None and i < len(index_list) - 1:

                index = [(index_list[i][0] + index_list[i][1]) / 2,
                         (index_list[i + 1][0] + index_list[i + 1][1]) / 2]

                bina_i = bina_d[:, index[0]:index[1]]
                sep_index = search_sep_index1(bina_i, 10 * scale)  # 返回计算得到分割位置

                if sep_index is None:  # 意味着标题栏初始分割失败
                    index_re1 = index_list[i]
                    index_re2 = index_list[i + 1]
                    index_new = [index_re1[0], index_re2[1]]
                    index_list.remove(index_re1)
                    index_list.remove(index_re2)
                    index_list.insert(i, index_new)
                    if len(colsep_index) > 0:
                        b = colsep_index.pop()
                    if i > 0:
                        i = i - 1
            if sep_index is not None:
                sep_index = sep_index + index[0]
                colsep_index.append(sep_index)
                if i > 0:
                    i = i - 1
                    index_list.remove(index_list[i])
            else:
                index_list.remove(index_list[0])
            i += 1

    # 不存在"标题栏"
    else:
        bina_d = filters.maximum_filter(bina, (scale, scale / 2))  #改为２＊scale？
        colsep_index = search_sep_index2(bina_d, scale)

    colsep_index.append(0)
    colsep_index.append(bina.shape[1])
    colsep_index.sort(key=lambda x: x)

    # 返回最终的文本位置列表
    res_index = []
    for i in range(len(colsep_index) - 1):
        beg_index = colsep_index[i]
        end_index = colsep_index[i + 1]
        res_index.append([beg_index, end_index])

    bina_lists = []
    gray_lists = []
    for index in res_index:
        gray_i = gray[:, index[0]:index[1]]
        bina_i = binary[:, index[0]:index[1]]
        gray_lists.append(gray_i)
        bina_lists.append(bina_i)
        # plt.imshow(bina_i, 'gray'), plt.show()
    return gray_lists, bina_lists

Ejemplo n.º 16

0

Mostrar archivo

def compute_colseps_conv(binary, scale=1.0):
    """Find column separators by convoluation and
    thresholding."""
    h, w = binary.shape
    # find vertical whitespace by thresholding
    smoothed = gaussian_filter(1.0 * binary, (scale, scale * 0.5))
    smoothed = uniform_filter(smoothed, (5.0 * scale, 1))
    thresh = (smoothed < amax(smoothed) * 0.1)
    ####imsave('/home/gupta/Documents/1_thresh.png', thresh)
    # DSAVE("1thresh",thresh)
    # find column edges by filtering

    #
    grad = gaussian_filter(1.0 * binary, (scale, scale * 0.5), order=(0, 1))
    grad = uniform_filter(grad, (10.0 * scale, 1))
    # grad = abs(grad) # use this for finding both edges
    grad = (grad > 0.25 * amax(grad))
    grad1 = morph.select_regions(grad,
                                 sl.dim0,
                                 min=args.csminheight * scale,
                                 nbest=args.maxcolseps + 10)

    ####imsave('/home/gupta/Documents/2_grad.png', grad1)
    x = (1 - thresh) * (1 - grad1)
    thresh11 = (1 - thresh) * x
    ####imsave('/home/gupta/Documents/3_x.png', thresh11)

    #############################################################################################################
    for r in range(0, len(thresh11)):
        count = 0
        for c in range(0, len(thresh11[0])):
            if (thresh11[r][c] == 1):
                continue
            count += 1
            if (c != len(thresh11[0]) - 1 and thresh11[r][c + 1] == 1):
                if (count <= 50):
                    for z in range(c - count, c + 1):
                        thresh11[r][z] = 1
                count = 0

    y = 1 - (thresh11 * (1 - thresh))
    ####imsave('/home/gupta/Documents/4_uniformed.png', y)

    #############################################################################################################

    # DSAVE("2grad",grad)
    # combine edges and whitespace
    seps = minimum(thresh, maximum_filter(grad, (int(scale), int(5 * scale))))
    seps = maximum_filter(seps, (int(2 * scale), 1))
    #
    ####imsave('/home/gupta/Documents/5_seps.png', seps)
    h, w = seps.shape
    smoothed = gaussian_filter(1.0 * seps, (scale, scale * 0.5))
    smoothed = uniform_filter(smoothed, (5.0 * scale, 1))
    seps1 = (smoothed < amax(smoothed) * 0.1)
    ####imsave('/home/gupta/Documents/6_smooth.png', seps1)
    seps1 = 1 - seps1
    #
    ####imsave('/home/gupta/Documents/7_smooth.png', seps1)
    seps1 = (grad) * seps1
    ####imsave('/home/gupta/Documents/8_multigrad.png', seps1)

    #############################################################################################################
    for c in range(0, len(seps1[0])):
        count = 0
        for r in range(0, len(seps1)):
            if (seps1[r][c] == 1):
                continue
            count += 1
            if (r != len(seps1) - 1 and seps1[r + 1][c] == 1):
                if (count <= 400):  # by making it 300 u can improve
                    for z in range(r - count, r + 1):
                        seps1[z][c] = 1
                count = 0

    ####imsave('/home/gupta/Documents/9_uniformed.png', seps1)
    #############################################################################################################

    seps1 = morph.select_regions(seps1,
                                 sl.dim0,
                                 min=args.csminheight * scale,
                                 nbest=args.maxcolseps + 10)
    ####imsave('/home/gupta/Documents/10_seps1.png', seps1)
    #
    # seps2=seps1*y
    # t=seps1*(1-y)
    ####imsave('/home/gupta/Documents/t.png', t)
    ####imsave('/home/gupta/Documents/s.png', seps2)

    #
    seps1 = (seps1 * (1 - y)) + seps1
    for c in range(0, len(seps1[0])):
        for r in range(0, len(seps1)):
            if (seps1[r][c] != 0):
                seps1[r][c] = 1
    ####imsave('/home/gupta/Documents/11_testing.png', 0.7*seps1+0.3*binary)
    # f=(seps1-seps2)+seps1

    #############################################################################################################
    for c in range(0, len(seps1[0])):
        count = 0
        for r in range(0, len(seps1)):
            if (seps1[r][c] == 1):
                continue
            count += 1
            if (r != len(seps1) - 1 and seps1[r + 1][c] == 1):
                if (count <= 350):
                    for z in range(r - count, r + 1):
                        seps1[z][c] = 1
                count = 0

    ####imsave('/home/gupta/Documents/12_uniformed.png', seps1)
    #############################################################################################################

    ####imsave('/home/gupta/Documents/13_col_sep.png', seps1)
    return seps1