def test_image(self):
        """
        Image type Test

        when the Input is not ndarray, it throws 'None'
        """
        self.assertEqual(rlsa_fast(list(image), True, False, value), None)
 def test_rlsafast_hori_vert(self):
     """
     RLSA horizontal and vertical test
     """
     self.assertEqual(
         rlsa_fast(image.copy(), True, True, value).tolist(),
         out_h_v.tolist())
 def test_rlsafast_vert(self):
     """
     RLSA vertical test
     """
     self.assertEqual(
         rlsa_fast(image.copy(), False, True, value).tolist(),
         out_v.tolist())
 def test_rlsafast_hori(self):
     """
     RLSA horizontal test
     """
     self.assertEqual(
         rlsa_fast(image.copy(), True, False, value).tolist(),
         out_h.tolist())
    def test_value(self):
        """
        Value Test

        when the value is lessthan or equal to 1
        output == input
        """
        self.assertEqual(
            rlsa_fast(image.copy(), True, False, -1).tolist(),
            image.copy().tolist())
    def test_bool(self):
        """
        Bool Test

        when both the boolean variable "horizontal" and "vertical" are False
        output == input
        """
        self.assertEqual(
            rlsa_fast(image.copy(), False, False, value).tolist(),
            image.copy().tolist())
def crop_chars(image,img_idx,write_processed_images=False):
    """
    uses horizontal and vertical projections to crop characters
    """
    # derotate original
    image = correct_skew(image)
    # perform binarization and projection operations on copy
    img = image.copy()
    # two different binarization methods
    # img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,\
    #        cv2.THRESH_BINARY,125,0)
    bin_img,_ = better_binarize(img,c=0)
    if write_processed_images:
        cv2.imwrite(f"binary-{img_idx}.jpg",bin_img)

    # black rectangle at the outside to find peak outside of outmost character
    cv2.rectangle(bin_img,(0,0),(bin_img.shape[1]-1,bin_img.shape[0]-1),(0),1)
    proj_ver = np.sum(bin_img, axis=0)
    proj_hor = np.sum(bin_img, axis=1)

    peaks_ver, _ = find_peaks(proj_ver, distance=22) # average hor. char dist.: 31
    peaks_hor, _ = find_peaks(proj_hor, distance=20) # average vert. char dist.: 24

    # open annotations
    with open(f"{img_idx}.txt") as f:
        columns = [line.strip().strip("<lb/>").replace("&gaiji;","e") for line in f.readlines()]
        assert len(columns) == len(peaks_ver)-1, f"at img_idx {img_idx} there are {len(columns)} lines in the annotation but {len(peaks_ver)-1} columns in the image"

    img_annotation_dict = {char:[] for column in columns for char in column}
    for i in range(len(peaks_ver)-1): # iterate over columns
        # crop a column and draw its left border
        column = bin_img[0:bin_img.shape[0],peaks_ver[i]:peaks_ver[i+1]]

        # draw column border for wiki image

        column = rlsa_fast(column, True, False, 10)

        # do the same as above vertically per column
        proj_hor_local = np.sum(column, axis=1)
        proj_hor_local = proj_hor_local / np.max(proj_hor_local)
        peaks_hor_local, _ = find_peaks(proj_hor_local, prominence=0.7, distance=15) # average vert. char dist.: 24

        peaks_hor_adjusted = peaks_hor.copy()
        thresh = 6
        for peak in peaks_hor_local:
            # adjust peaks_hor_adjusted to peak wherever peak is within thresh of any element
            peaks_hor_adjusted = np.where(np.abs(peaks_hor_adjusted-peak)<thresh,peak,peaks_hor_adjusted)

        if write_processed_images:
            cv2.line(image,(peaks_ver[i],0),(peaks_ver[i],image.shape[0]),(50),2)
            for j in range(len(peaks_hor)-1): # iterate over characters within one column
                cv2.line(image,(peaks_ver[i],peaks_hor_adjusted[j]),(peaks_ver[i+1],peaks_hor_adjusted[j]),(50),2)

        # preprocess the image before cropping because single char imgs are hard to preprocess without img context
        gray_img,bin_thresh = better_binarize(image,c=-50,leave_partly_gray=True)
        gray_col = image[0:image.shape[0],peaks_ver[i]:peaks_ver[i+1]]

        for j in range(len(peaks_hor)-1):
            # crop actual character
            char_image = gray_col[peaks_hor_adjusted[j]:peaks_hor_adjusted[j+1],0:gray_col.shape[1]]
            # to re-scale array from a range between min and max to (0,1): (array-min)/(max-min)
            # we want to stretch the darkest pixel to 0 and the brightest (= bin_thresh) to 255
            darkest_pixel = char_image.min()
            char_image = np.where(
                char_image<bin_thresh, # all non-background pixels
                np.uint8((char_image-darkest_pixel)/(bin_thresh-darkest_pixel)*255),
                255
            )
            # add white (255) padding around char_image to make it squared
            img_size = max(char_image.shape[0],char_image.shape[1],30) # pad to at least 30 px
            vertical_padding = img_size - char_image.shape[0]
            if vertical_padding%2: # odd number
                top_padding = vertical_padding // 2 + 1
                btm_padding = vertical_padding // 2
            else: # even number
                top_padding = btm_padding = vertical_padding // 2
            horizontal_padding = img_size - char_image.shape[1]
            if horizontal_padding%2:
                left_padding = horizontal_padding // 2 + 1
                right_padding = horizontal_padding // 2
            else:
                left_padding = right_padding = horizontal_padding // 2
            char_image = cv2.copyMakeBorder(
                char_image,
                top_padding,
                btm_padding,
                left_padding,
                right_padding,
                cv2.BORDER_CONSTANT,
                value=255
            )
            # # compute centroid (mean of non-zero indices in the inverted binary)
            # _,b = cv2.threshold(char_image,bin_thresh,255,cv2.THRESH_BINARY)
            # b = cv2.bitwise_not(b)
            # centroid = tuple(np.uint8(nz.mean()) for nz in b.nonzero())
            # cv2.circle(b, centroid, 3,(100),1)
            # char_image = cv2.resize(char_image,(50,50),interpolation=cv2.INTER_CUBIC)

            # append  idx [-i-1] to read columns from right to left
            if len(columns[-i-1]) > j:
                char_annotation = columns[-i-1][j]
                img_annotation_dict[char_annotation].append((char_image,len(columns)-i,j+1))

    img_annotation_dict.pop("\u3000", None) # full-space space
    img_annotation_dict.pop("T", None) # we only want to learn Chinese Characters
    img_annotation_dict.pop("e", None) # "e" has been inserted as a placeholder where a character is missing or illegible (&gaiji;)
    img_annotation_dict.pop("c", None) # "cc" was inserted for a two-slot "、" (starting from 400.txt)
    for char,img_list in img_annotation_dict.items():
        for char_image,col_idx,row_idx in img_list:
            if np.isnan(char_image).any():
                print(f"char img {char}-{img_idx}-{col_idx}-{row_idx} contains nans!")
            cv2.imwrite(os.path.join("char_images",f"{char}-{img_idx}-{col_idx}-{row_idx}.png"),char_image)

    if write_processed_images:
        cv2.imwrite(f"hybrid-{img_idx}.jpg",image)
Exemple #8
0
            cv2.drawContours(img_without_small_contours, contours, idx, (255),
                             -1)

    line_ends = cv2.dilate(
        cv2.filter2D(cv2.bitwise_not(img_without_small_contours), -1,
                     bottom_line_end_kernel), np.ones((line_extension, 1)))
    img = cv2.bitwise_and(cv2.bitwise_not(line_ends), img)
    cv2.imwrite('3_2.png', line_ends)

    # rlsa
    print("applying RLSA ...")
    x, y = img.shape

    # for one-column boxes there is usually min. 30 px space, we need to go below that
    # so as not to close them up
    img = rlsa_fast(img, True, True, 25)
    cv2.imwrite('4.png', img)
    img = np.uint8(img)

    # print("skeletizing ...")
    # skel = skeletize(cv2.bitwise_not(img))

    # create kernel like this:

    print("connecting line ends and corners ...")
    corners = cv2.goodFeaturesToTrack(img, 1000, 0.2, 10)
    corners = np.int0(corners)
    copy = img.copy()  # to draw points without modifying img
    for i in corners:
        xi, yi = i.ravel()
        for j in corners: