def teager(im): im32 = im.astype(np.int32) padded = np.pad(im32, (1, 1), 'edge') return normalize_u8(3 * im32 ** 2 - padded[2:, 2:] * padded[:-2, :-2] / 2 \ - padded[2:, :-2] * padded[:-2, 2:] / 2 \ - padded[2:, 1:-1] * padded[:-2, 1:-1] \ - padded[1:-1, 2:] * padded[1:-1, :-2])
def remove_stroke_outliers(im, lines, k=1.0): stroke_widths = fast_stroke_width(im) if lib.debug: lib.debug_imwrite('strokes.png', lib.normalize_u8(stroke_widths.clip(0, 10))) mask = np.zeros(im.shape, dtype=np.uint8) for line in lines: for letter in line: sliced = letter.crop().apply(mask) sliced |= letter.raster() lib.debug_imwrite('letter_mask.png', -mask) masked_strokes = stroke_widths.copy() masked_strokes &= -mask strokes_mean, strokes_std = masked_mean_std(masked_strokes, mask) if lib.debug: print('overall: mean:', strokes_mean, 'std:', strokes_std) debug = cv2.cvtColor(im, cv2.COLOR_GRAY2RGB) new_lines = [] for line in lines: if len(line) <= 1: continue good_letters = [] for letter in line: crop = letter.crop() if not crop.nonempty(): continue raster = letter.raster() sliced_strokes = crop.apply(stroke_widths).copy() sliced_strokes &= lib.bool_to_u8(raster) mean, std = masked_mean_std(sliced_strokes, raster) if mean < strokes_mean - k * strokes_std: if lib.debug: print('skipping {:4d} {:4d} {:.03f} {:.03f}'.format( letter.x, letter.y, mean, std, )) letter.box(debug, color=lib.RED) else: if lib.debug: letter.box(debug, color=lib.GREEN) good_letters.append(letter) if good_letters: new_lines.append(TextLine(good_letters, underlines=line.underlines)) lib.debug_imwrite("stroke_filter.png", debug) return new_lines
def pca_gray(im): assert len(im.shape) == 3 Lab = cv2.cvtColor(im, cv2.COLOR_BGR2Lab) im_1d = Lab.reshape(im.shape[0] * im.shape[1], 3).astype(np.float32) im_1d -= np.mean(im_1d) U, S, V = np.linalg.svd(im_1d, full_matrices=False) coeffs = V[0] if coeffs[0] < 0: coeffs = -coeffs result = normalize_u8(np.tensordot(Lab, coeffs, axes=1)) lib.debug_imwrite('pca.png', result) return result
def training_data(font_paths, font_size, W_h): faces = [freetype.Face(font_path) for font_path in font_paths] hi_res = np.concatenate([create_mosaic(face, font_size) for face in faces]) blurred_ims = [ cv2.GaussianBlur(hi_res, (0, 0), 7, 3), cv2.GaussianBlur(hi_res, (0, 0), 3, 7), ] blurred = np.concatenate(blurred_ims, axis=0) hi_res_2 = np.tile(hi_res, (len(blurred_ims), 1)) lib.debug_imwrite('hi.png', hi_res_2) lo_res = cv2.resize(blurred, (0, 0), None, 0.5, 0.5, interpolation=cv2.INTER_AREA) lib.debug_imwrite('lo.png', lo_res) lo_res_hi, filtered_lo = features_lo(lo_res) difference = hi_res_2.astype(np.float64) - lo_res_hi lib.debug_imwrite('diff.png', lib.normalize_u8(difference)) # make sure we're on edges (in hi-res reference) struct = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) gradient = cv2.morphologyEx(hi_res_2, cv2.MORPH_GRADIENT, struct) gradient_means, _ = lib.mean_std(gradient, W_h) patch_mask = gradient_means > np.percentile(gradient_means, 50) # patch_centers should match others' shape. step = 3 center_slice = slice(W_h // 2, -(W_h // 2) - 1, step) patch_centers = patch_mask[center_slice, center_slice] lo_patches = patches(filtered_lo, W_h, step)[patch_centers].transpose(0, 3, 1, 2) hi_patches = patches(difference, W_h, step)[patch_centers] t = lo_patches.shape[0] lo_patches_vec = lo_patches.reshape(t, -1) for i in range(lo_patches.shape[1]): print_dict('lo_sq{}.png'.format(i), lo_patches_vec[:, i * W_h * W_h:(i + 1) * W_h * W_h]) hi_patches_vec = hi_patches.reshape(t, W_h * W_h) print_dict('hi_sq.png', hi_patches_vec) # reduce dimensionality on lo-res patches with PCA. pca = sklearn.decomposition.PCA(n_components=lo_patches_vec.shape[1] // 6) Y_pca = pca.fit_transform(lo_patches_vec) return Y_pca, hi_patches_vec, pca
def ntirogiannis2014(im): lib.debug_prefix.append('ng2014') debug_imwrite('input.png', im) im_h, _ = im.shape N, BG_prime = ng2014_normalize(im) O = otsu(N) debug_imwrite('O.png', O) letters = algorithm.all_letters(O) height_map = HeightMap(letters) ratio_sum = 0 for h in range(1, height_map.max_height() + 1): if len(height_map[h]) == 0: continue ratio_sum += height_map.ratio_pixels(h) / height_map.ratio_components( h) if ratio_sum > 1: break min_height = h if lib.debug: print('Accept components only >= height', h) OP = O.copy() for h in range(1, min_height): for letter in height_map[h]: sliced = letter.slice(OP) np.place(sliced, letter.raster(), 255) debug_imwrite('OP.png', OP) strokes = fast_stroke_width(OP) debug_imwrite('strokes.png', normalize_u8(strokes.clip(0, 10))) SW = int(round(strokes.sum() / np.count_nonzero(strokes))) if lib.debug: print('SW =', SW) S = skeleton(OP) debug_imwrite('S.png', S) S_inv = ~S # S_inv_32 = S_inv.astype(np.int32) # FG_count = np.count_nonzero(S_inv) FG_pos = im[S_inv.astype(bool)] FG_avg = FG_pos.mean() FG_std = FG_pos.std() # FG = (S_inv & im).astype(np.int32) # FG_avg = FG.sum() / float(FG_count) # FG_std = np.sqrt(((S_inv_32 & (FG - FG_avg)) ** 2).sum() / float(FG_count)) if lib.debug: print('FG:', FG_avg, FG_std) BG_avg = BG_prime.mean() BG_std = BG_prime.std() if lib.debug: print('BG:', BG_avg, BG_std) if FG_avg + FG_std != 0: C = -50 * np.log10((FG_avg + FG_std) / (BG_avg - BG_std)) k = -0.2 - 0.1 * C / 10 else: # This is the extreme case when the FG is 100% black, check the article explaination page before equation 5 C = -50 * np.log10((2.5) / (BG_avg - BG_std)) k = -0.2 - 0.1 * C / 10 if lib.debug: print('niblack:', C, k) local = niblack(N, window_size=(2 * SW) | 1, k=k) debug_imwrite('local.png', local) local_CCs = algorithm.all_letters(local) # NB: paper uses OP here, which results in neglecting all small components. O_inv = ~O O_inv_32 = O_inv.astype(np.int8, copy=False).astype(np.int32).astype(np.uint32, copy=False) label_map_O_inv = O_inv_32 & local_CCs[0].label_map CO_inv = np.zeros(im.shape, dtype=np.uint8) for cc in local_CCs: if np.count_nonzero(cc.slice(label_map_O_inv) == cc.label) / float( cc.area()) >= C / 100: CO_sliced = cc.slice(CO_inv) np.place(CO_sliced, cc.raster(), 255) CO = ~CO_inv debug_imwrite('CO.png', CO) CO_inv_dilated = cv2.dilate(CO_inv, rect33) FB = ~(CO_inv | ((~O) & CO_inv_dilated)) debug_imwrite('FB.png', FB) lib.debug_prefix.pop() return FB