def test_image(self): """ Image type Test when the Input is not ndarray, it throws 'None' """ self.assertEqual(rlsa_fast(list(image), True, False, value), None)
def test_rlsafast_hori_vert(self): """ RLSA horizontal and vertical test """ self.assertEqual( rlsa_fast(image.copy(), True, True, value).tolist(), out_h_v.tolist())
def test_rlsafast_vert(self): """ RLSA vertical test """ self.assertEqual( rlsa_fast(image.copy(), False, True, value).tolist(), out_v.tolist())
def test_rlsafast_hori(self): """ RLSA horizontal test """ self.assertEqual( rlsa_fast(image.copy(), True, False, value).tolist(), out_h.tolist())
def test_value(self): """ Value Test when the value is lessthan or equal to 1 output == input """ self.assertEqual( rlsa_fast(image.copy(), True, False, -1).tolist(), image.copy().tolist())
def test_bool(self): """ Bool Test when both the boolean variable "horizontal" and "vertical" are False output == input """ self.assertEqual( rlsa_fast(image.copy(), False, False, value).tolist(), image.copy().tolist())
def crop_chars(image,img_idx,write_processed_images=False): """ uses horizontal and vertical projections to crop characters """ # derotate original image = correct_skew(image) # perform binarization and projection operations on copy img = image.copy() # two different binarization methods # img = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C,\ # cv2.THRESH_BINARY,125,0) bin_img,_ = better_binarize(img,c=0) if write_processed_images: cv2.imwrite(f"binary-{img_idx}.jpg",bin_img) # black rectangle at the outside to find peak outside of outmost character cv2.rectangle(bin_img,(0,0),(bin_img.shape[1]-1,bin_img.shape[0]-1),(0),1) proj_ver = np.sum(bin_img, axis=0) proj_hor = np.sum(bin_img, axis=1) peaks_ver, _ = find_peaks(proj_ver, distance=22) # average hor. char dist.: 31 peaks_hor, _ = find_peaks(proj_hor, distance=20) # average vert. char dist.: 24 # open annotations with open(f"{img_idx}.txt") as f: columns = [line.strip().strip("<lb/>").replace("&gaiji;","e") for line in f.readlines()] assert len(columns) == len(peaks_ver)-1, f"at img_idx {img_idx} there are {len(columns)} lines in the annotation but {len(peaks_ver)-1} columns in the image" img_annotation_dict = {char:[] for column in columns for char in column} for i in range(len(peaks_ver)-1): # iterate over columns # crop a column and draw its left border column = bin_img[0:bin_img.shape[0],peaks_ver[i]:peaks_ver[i+1]] # draw column border for wiki image column = rlsa_fast(column, True, False, 10) # do the same as above vertically per column proj_hor_local = np.sum(column, axis=1) proj_hor_local = proj_hor_local / np.max(proj_hor_local) peaks_hor_local, _ = find_peaks(proj_hor_local, prominence=0.7, distance=15) # average vert. char dist.: 24 peaks_hor_adjusted = peaks_hor.copy() thresh = 6 for peak in peaks_hor_local: # adjust peaks_hor_adjusted to peak wherever peak is within thresh of any element peaks_hor_adjusted = np.where(np.abs(peaks_hor_adjusted-peak)<thresh,peak,peaks_hor_adjusted) if write_processed_images: cv2.line(image,(peaks_ver[i],0),(peaks_ver[i],image.shape[0]),(50),2) for j in range(len(peaks_hor)-1): # iterate over characters within one column cv2.line(image,(peaks_ver[i],peaks_hor_adjusted[j]),(peaks_ver[i+1],peaks_hor_adjusted[j]),(50),2) # preprocess the image before cropping because single char imgs are hard to preprocess without img context gray_img,bin_thresh = better_binarize(image,c=-50,leave_partly_gray=True) gray_col = image[0:image.shape[0],peaks_ver[i]:peaks_ver[i+1]] for j in range(len(peaks_hor)-1): # crop actual character char_image = gray_col[peaks_hor_adjusted[j]:peaks_hor_adjusted[j+1],0:gray_col.shape[1]] # to re-scale array from a range between min and max to (0,1): (array-min)/(max-min) # we want to stretch the darkest pixel to 0 and the brightest (= bin_thresh) to 255 darkest_pixel = char_image.min() char_image = np.where( char_image<bin_thresh, # all non-background pixels np.uint8((char_image-darkest_pixel)/(bin_thresh-darkest_pixel)*255), 255 ) # add white (255) padding around char_image to make it squared img_size = max(char_image.shape[0],char_image.shape[1],30) # pad to at least 30 px vertical_padding = img_size - char_image.shape[0] if vertical_padding%2: # odd number top_padding = vertical_padding // 2 + 1 btm_padding = vertical_padding // 2 else: # even number top_padding = btm_padding = vertical_padding // 2 horizontal_padding = img_size - char_image.shape[1] if horizontal_padding%2: left_padding = horizontal_padding // 2 + 1 right_padding = horizontal_padding // 2 else: left_padding = right_padding = horizontal_padding // 2 char_image = cv2.copyMakeBorder( char_image, top_padding, btm_padding, left_padding, right_padding, cv2.BORDER_CONSTANT, value=255 ) # # compute centroid (mean of non-zero indices in the inverted binary) # _,b = cv2.threshold(char_image,bin_thresh,255,cv2.THRESH_BINARY) # b = cv2.bitwise_not(b) # centroid = tuple(np.uint8(nz.mean()) for nz in b.nonzero()) # cv2.circle(b, centroid, 3,(100),1) # char_image = cv2.resize(char_image,(50,50),interpolation=cv2.INTER_CUBIC) # append idx [-i-1] to read columns from right to left if len(columns[-i-1]) > j: char_annotation = columns[-i-1][j] img_annotation_dict[char_annotation].append((char_image,len(columns)-i,j+1)) img_annotation_dict.pop("\u3000", None) # full-space space img_annotation_dict.pop("T", None) # we only want to learn Chinese Characters img_annotation_dict.pop("e", None) # "e" has been inserted as a placeholder where a character is missing or illegible (&gaiji;) img_annotation_dict.pop("c", None) # "cc" was inserted for a two-slot "、" (starting from 400.txt) for char,img_list in img_annotation_dict.items(): for char_image,col_idx,row_idx in img_list: if np.isnan(char_image).any(): print(f"char img {char}-{img_idx}-{col_idx}-{row_idx} contains nans!") cv2.imwrite(os.path.join("char_images",f"{char}-{img_idx}-{col_idx}-{row_idx}.png"),char_image) if write_processed_images: cv2.imwrite(f"hybrid-{img_idx}.jpg",image)
cv2.drawContours(img_without_small_contours, contours, idx, (255), -1) line_ends = cv2.dilate( cv2.filter2D(cv2.bitwise_not(img_without_small_contours), -1, bottom_line_end_kernel), np.ones((line_extension, 1))) img = cv2.bitwise_and(cv2.bitwise_not(line_ends), img) cv2.imwrite('3_2.png', line_ends) # rlsa print("applying RLSA ...") x, y = img.shape # for one-column boxes there is usually min. 30 px space, we need to go below that # so as not to close them up img = rlsa_fast(img, True, True, 25) cv2.imwrite('4.png', img) img = np.uint8(img) # print("skeletizing ...") # skel = skeletize(cv2.bitwise_not(img)) # create kernel like this: print("connecting line ends and corners ...") corners = cv2.goodFeaturesToTrack(img, 1000, 0.2, 10) corners = np.int0(corners) copy = img.copy() # to draw points without modifying img for i in corners: xi, yi = i.ravel() for j in corners: