예제 #1
0
파일: canny.py 프로젝트: loveheaven/ocropy
def remove_vlines(binary, gray, scale, maxsize=10):
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i, b in enumerate(objects):
        if (sl.width(b) <= 20 and sl.height(b) > 200) or (sl.width(b) <= 45 and sl.height(b) > 500):
            gray[b][labels[b] == i + 1] = 140
            # gray[:,b[1].start:b[1].stop]=140
            labels[b][labels[b] == i + 1] = 0
    return array(labels != 0, "B")
예제 #2
0
def remove_vlines(binary,gray,scale,maxsize=10):
    labels,_ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i,b in enumerate(objects):
        if (sl.width(b)<=20 and sl.height(b)>200) or (sl.width(b)<=45 and sl.height(b)>500):
            gray[b][labels[b]==i+1] = 140
            #gray[:,b[1].start:b[1].stop]=140
            labels[b][labels[b]==i+1] = 0
    return array(labels!=0, 'B')
예제 #3
0
def remove_hlines(binary,scale,maxsize=10):
    labels,_ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i,b in enumerate(objects):
        if sl.width(b)>maxsize*scale:
            labels[b][labels[b]==i+1] = 0
    return array(labels!=0,'B')
 def remove_hlines(self, binary, scale, maxsize=10):
     labels, _ = morph.label(binary)
     objects = morph.find_objects(labels)
     for i, b in enumerate(objects):
         if sl.width(b) > maxsize * scale:
             labels[b][labels[b] == i + 1] = 0
     return np.array(labels != 0, 'B')
예제 #5
0
def firstAnalyse(binary):
    binaryary = morph.r_closing(binary.astype(bool), (1,1))
    labels,_ = morph.label(binaryary)
    objects = morph.find_objects(labels) ### <<<==== objects here
    bysize = sorted(range(len(objects)), key=lambda k: sl.area(objects[k]))
#     bysize = sorted(objects,key=sl.area)
    scalemap = zeros(binaryary.shape)
    smalldot = zeros(binaryary.shape, dtype=binary.dtype)
    for i in bysize:
        o = objects[i]
        if amax(scalemap[o])>0: 
#             mask = where(labels[o] != (i+1),uint8(255),uint8(0))
#             binary[o] = cv2.bitwise_and(binary[o],binary[o],mask=mask)
            continue
        scalemap[o] = sl.area(o)**0.5
    scale = median(scalemap[(scalemap>3)&(scalemap<100)]) ### <<<==== scale here

    for i,o in enumerate(objects):       
        if (sl.width(o) < scale/2) or (sl.height(o) < scale/2):
            smalldot[o] = binary[o]
        if sl.dim0(o) > 3*scale:
            mask = where(labels[o] != (i+1),uint8(255),uint8(0))
            binary[o] = cv2.bitwise_and(binary[o],binary[o],mask=mask)
            continue
    return objects, smalldot, scale
예제 #6
0
def filter_junk_cc(binary, scale, maxsize):
    junk_cc = np.zeros(binary.shape, dtype='B')
    text_like = np.zeros(binary.shape, dtype='B')

    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)

    for i, b in enumerate(objects):

        if sl.width(b) > maxsize * scale or sl.area(b) > scale * scale * 8 or \
                        sl.aspect_normalized(b) > 8 or sl.min_dim(b) < scale * 0.35:

            junk_cc[b][labels[b] == i + 1] = 1
        else:
            if sl.width(b) > 0.3 * scale and sl.height(b) > 0.3 * scale:
                text_like[b][labels[b] == i + 1] = 1

    return junk_cc, text_like
예제 #7
0
def simplefirstAnalyse(binary):
    binaryary = morph.r_closing(binary.astype(bool), (1,1))
    labels,_ = morph.label(binaryary)
    objects = morph.find_objects(labels) ### <<<==== objects here
    smalldot = zeros(binaryary.shape, dtype=binary.dtype)
    scale = int(binary.shape[0]*0.7)
    for i,o in enumerate(objects):       
        if (sl.width(o) < scale/2) or (sl.height(o) < scale/2):
            smalldot[o] = binary[o]
        if sl.dim0(o) > 3*scale:
            mask = where(labels[o] != (i+1),uint8(255),uint8(0))
            binary[o] = cv2.bitwise_and(binary[o],binary[o],mask=mask)
            continue
    return objects, smalldot, scale
예제 #8
0
def cut_dash_line(im, num_cells):
    binary = convert_to_binary(255-im, thres=0.5)
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)

    scale = int(round(1.0 * binary.shape[1] / num_cells + 0.2))
    h = binary.shape[0] - 1
    # list to store objects for each cell
    cells = [[] for _ in range(num_cells)]
    cell_ims = []

    for i, b in enumerate(objects):
        # only process object with width < 2 x scale
        if sl.width(b) < 2 * scale:
            x1, x2 = b[1].start, b[1].stop
            mid_x = (x1 + x2) // 2
            cell_index = np.median([x1 // scale, x2 // scale, mid_x // scale]).astype(int)
            #print(cell_index)
            # handle case where digit from 2 cells connected
            if x2 - (cell_index + 1) * scale > 0.3 * scale:
                temp_b = (b[0], slice(b[1].start, (cell_index + 1) * (scale + 1), None))
                print("2 char connected!!!")
            else:
                temp_b = b
            cells[cell_index].append(temp_b)

    for i, c in enumerate(cells):
        if len(c) > 0:
            x1 = min([obj[1].start for obj in c])
            x2 = max([obj[1].stop for obj in c])
            cell_ims.append(normalize_cell_img(im[0:h, x1:x2]))
        else:
            blank = np.zeros((h, scale))
            cell_ims.append(normalize_cell_img(convert_binary_to_normal_im(blank)))

    return cell_ims
예제 #9
0
def detect_table(image, scale, maxsize=10, debug_path=None):
    h, w = image.shape[:2]
    if len(image.shape) > 2 and image.shape[2] >= 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image

    # kernel = np.ones((5,5),np.uint8)
    # gray = 255-cv2.morphologyEx(255-gray, cv2.MORPH_CLOSE, kernel)

    binary = 1 - morph.thresh_sauvola(gray, k=0.05)
    junk_cc, _ = filter_junk_cc(binary, scale, maxsize)

    junk_cc = morph.r_closing(junk_cc, (5, 5))

    print('calculating combine sep...')
    combine_sep = compute_combine_seps(junk_cc, scale)
    # using closing morphology to connect disconnected edges
    close_thes = int(scale * 0.15)
    closed_sep = morph.r_closing(combine_sep, (close_thes, close_thes))

    if debug_path is not None:
        cv2.imwrite(filename[:-4] + '_bin.png',
                    ((1 - junk_cc) * 255).astype('uint8'))
        cv2.imwrite(filename[:-4] + '_sep.png',
                    (closed_sep * 255).astype('uint8'))

    labels, _ = morph.label(closed_sep)
    objects = morph.find_objects(labels)

    # result table list
    boxes = []

    for i, b in enumerate(objects):
        if sl.width(b) > maxsize * scale or sl.area(
                b) > scale * scale * 10 or (sl.aspect_normalized(b) > 6
                                            and sl.max_dim(b) > scale * 1.5):

            density = np.sum(combine_sep[b])
            density = density / sl.area(b)

            if (sl.area(b) > scale * scale * 10 and sl.min_dim(b) > scale * 1.0
                    and sl.max_dim(b) > scale * 8 and density < 0.4):
                # calculate projection to determine table border
                w = sl.width(b)
                h = sl.height(b)

                region = (labels[b] == i + 1).astype('uint8')

                border_pad = max(w, h)
                border_thres = scale * 2

                proj_x = np.sum(region, axis=0)
                proj_y = np.sum(region, axis=1)

                proj_x[3:] += proj_x[:-3]
                proj_y[3:] += proj_y[:-3]

                sep_x = np.sort([j[0] for j in np.argwhere(proj_x > 0.75 * h)])
                sep_y = np.sort([j[0] for j in np.argwhere(proj_y > 0.4 * w)])

                # skip if sep count < 2
                if len(sep_x) < 1 or len(sep_y) < 1: continue

                border_left, border_right, border_top, border_bottom = None, None, None, None

                if sep_x[0] < border_pad:
                    border_left = sep_x[0]
                if sep_x[-1] > w - border_pad:
                    border_right = sep_x[-1]
                if sep_y[0] < border_pad:
                    border_top = sep_y[0]
                if sep_y[-1] > h - border_pad:
                    border_bottom = sep_y[-1]

                # print_info(border_top, border_bottom, border_left, border_right)

                if all([
                        j is not None for j in
                    [border_top, border_bottom, border_left, border_right]
                ]):
                    border_right = b[1].stop - b[1].start
                    boxes.append([
                        b[1].start + border_left, b[0].start + border_top,
                        b[1].start + border_right, b[0].start + border_bottom
                    ])
                    # boxes.append(([b[1].start, b[0].start, b[1].stop, b[0].stop]))

    return boxes