def dewarp_fine(im): lib.debug_prefix = 'fine_' AH, all_lines, lines = get_AH_lines(im) points = [] offsets = [] for line in lines: bases = np.array([l.base_point() for l in line]) median_y = np.median(bases[:, 1]) points.extend(bases) offsets.extend(median_y - bases[:, 1]) points = np.array(points) offsets = np.array(offsets) im_h, im_w = im.shape # grid_x, grid_y = np.mgrid[:im_h, :im_w] # y_offset_interp = interpolate.griddata(points, offsets, # (grid_x, grid_y), method='nearest') y_offset_interp = interpolate.SmoothBivariateSpline( points[:, 0], points[:, 1], offsets ) new = np.full(im.shape, 0, dtype=np.uint8) _, contours, [hierarchy] = \ cv2.findContours(im, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) draw_contours(new, contours, hierarchy, y_offset_interp, 0, 255) lib.debug_imwrite('fine.png', new) return new
def remove_outliers(im, AH, lines): debug = cv2.cvtColor(im, cv2.COLOR_GRAY2RGB) result = [] for l in lines: if len(l) < 5: continue points = np.array([letter.base_point() for letter in l]) model, inliers = ransac(points, PolyModel5, 10, AH / 10.0) poly = model.params l.model = poly # trace_baseline(debug, l, BLUE) for p, is_in in zip(points, inliers): color = GREEN if is_in else RED draw_circle(debug, p, 4, color=color) l.compress(inliers) result.append(l) for l in result: draw_circle(debug, l.original_letters[0].left_mid(), 6, BLUE, -1) draw_circle(debug, l.original_letters[-1].right_mid(), 6, BLUE, -1) lib.debug_imwrite('lines.png', debug) return merge_lines(AH, result)
def make_E_align_page(page, AH, O, n_pages, page_index, n_total_lines): # line left-mid and right-mid points on focal plane. # (LR 2, line N, coord 2) side_points_2d = [ np.array([line.left_mid() for line in page]), np.array([line.right_mid() for line in page]), ] side_inliers = [ransac(coords, LinearXModel, 3, AH / 5.0)[1] for coords in side_points_2d] inlier_use = [inliers.mean() > INLIER_THRESHOLD for inliers in side_inliers] if lib.debug: debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) for line, inlier in zip(page, side_inliers[0]): draw_circle(debug, line.left_mid(), color=lib.GREEN if inlier else lib.RED) for line, inlier in zip(page, side_inliers[1]): draw_circle(debug, line.right_mid(), color=lib.GREEN if inlier else lib.RED) lib.debug_imwrite('align_inliers.png', debug) side_points_2d_filtered = [ points[inliers].T for points, inliers in zip(side_points_2d, side_inliers) ] # axes (coord 3, line N) side_points = [ image_to_focal_plane(points, O) for points in side_points_2d_filtered ] return [ E_align_page(points, i, n_pages, page_index, n_total_lines) for i, (points, use) in enumerate(zip(side_points, inlier_use)) if use ]
def generate_mesh(all_lines, lines, C_arc, v, n_points_h): vx, vy = v C_arc_T = C_arc.T C0, C1 = C0_C1(lines, v) # first, calculate necessary mu. global mu_debug mu_debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) mu_bottom = necessary_mu(C0, C1, v, all_lines, MuMode.BOTTOM) mu_top = necessary_mu(C0, C1, v, all_lines, MuMode.TOP) lib.debug_imwrite('mu.png', mu_debug) longitude_lines = [Line.from_points(v, p) for p in C_arc_T] longitudes = [] mus = np.linspace(mu_top, mu_bottom, n_points_h) for l, C_i in zip(longitude_lines, C_arc_T): p0 = l.closest_poly_intersect(C0.model, C_i) p1 = l.closest_poly_intersect(C1.model, C_i) lam = (vy - p0[1]) / (p1[1] - p0[1]) alphas = mus * lam / (mus + lam - 1) longitudes.append(np.outer(1 - alphas, p0) + np.outer(alphas, p1)) result = np.array(longitudes) debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) for l in result[::50]: for p in l[::50]: cv2.circle(debug, tuple(p.astype(int)), 6, BLUE, -1) trace_baseline(debug, C0, RED) trace_baseline(debug, C1, RED) lib.debug_imwrite('mesh.png', debug) return np.array(longitudes).transpose(1, 0, 2)
def safe_rotate(im, angle): debug_imwrite('prerotated.png', im) im_h, im_w = im.shape[:2] if abs(angle) > math.pi / 4: print("warning: too much rotation") return im angle_deg = angle * 180 / math.pi print('rotating to angle:', angle_deg, 'deg') im_h_new = im_w * abs(math.sin(angle)) + im_h * math.cos(angle) im_w_new = im_h * abs(math.sin(angle)) + im_w * math.cos(angle) pad_h = int(math.ceil((im_h_new - im_h) / 2)) pad_w = int(math.ceil((im_w_new - im_w) / 2)) pads = ((pad_h, pad_h), (pad_w, pad_w)) + ((0, 0), ) * (len(im.shape) - 2) padded = np.pad(im, pads, 'constant', constant_values=255) padded_h, padded_w = padded.shape[:2] matrix = cv2.getRotationMatrix2D((padded_w / 2, padded_h / 2), angle_deg, 1) result = cv2.warpAffine(padded, matrix, (padded_w, padded_h), borderMode=cv2.BORDER_CONSTANT, borderValue=255) debug_imwrite('rotated.png', result) return result
def debug_print_points(filename, points, step=None, color=BLUE): if lib.debug: debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) if step is not None: points = points[[np.s_[:]] + [np.s_[::step]] * (points.ndim - 1)] for p in points.reshape(2, -1).T: draw_circle(debug, p, color=color) lib.debug_imwrite(filename, debug)
def correct_geometry(orig, mesh, interpolation=cv2.INTER_LINEAR): # coordinates (u, v) on mesh -> mesh[u][v] = (x, y) in distorted image mesh32 = mesh.astype(np.float32) xmesh, ymesh = mesh32[:, :, 0], mesh32[:, :, 1] conv_xmesh, conv_ymesh = cv2.convertMaps(xmesh, ymesh, cv2.CV_16SC2) out = cv2.remap(orig, conv_xmesh, conv_ymesh, interpolation=interpolation, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255)) lib.debug_imwrite('corrected.png', out) return out
def go(argv): im = grayscale(lib.imread(argv[1])) lib.debug = True lib.debug_prefix = ['binarize'] # lib.debug_imwrite('gradient2.png', gradient2(im)) lib.debug_imwrite('sauvola_noisy.png', sauvola_noisy(im, k=0.1)) # lib.debug_imwrite('adaptive_otsu.png', binarize(im, algorithm=adaptive_otsu)) # lib.debug_imwrite('ng2014.png', binarize(im, algorithm=ntirogiannis2014)) # lib.debug_imwrite('yan.png', binarize(im, algorithm=yan)) lib.debug_imwrite('sauvola.png', sauvola(im, k=0.1))
def min_crop(lines): box = Crop( min([line.left() for line in lines]), min([letter.y for letter in lines[0]]), max([line.right() for line in lines]), max([letter.y + letter.h for letter in lines[-1]]), ) debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) box.draw(debug) lib.debug_imwrite('crop.png', debug) return box
def adaptive_otsu(im): im_h, _ = im.shape s = (im_h // 200) | 1 ellipse = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (s, s)) background = cv2.morphologyEx(im, cv2.MORPH_DILATE, ellipse) bg_float = background.astype(np.float64) debug_imwrite('bg.png', background) C = np.percentile(im, 30) normalized = clip_u8(C / (bg_float + 1e-10) * im) debug_imwrite('norm.png', normalized) return otsu(normalized)
def pca_gray(im): assert len(im.shape) == 3 Lab = cv2.cvtColor(im, cv2.COLOR_BGR2Lab) im_1d = Lab.reshape(im.shape[0] * im.shape[1], 3).astype(np.float32) im_1d -= np.mean(im_1d) U, S, V = np.linalg.svd(im_1d, full_matrices=False) coeffs = V[0] if coeffs[0] < 0: coeffs = -coeffs result = normalize_u8(np.tensordot(Lab, coeffs, axes=1)) lib.debug_imwrite('pca.png', result) return result
def koo2010(im_inv, AH): im = -im_inv assert lib.is_bw(im) centroids, ellipses = letter_ellipses(im) centroids_rotated, ellipses_sheared = \ precompute_rotations(im, centroids, ellipses) nearby_centroids = [] for centroid in centroids: distances_sq = np.square(centroids - centroid).sum() nearby = (distances_sq < radius_sq) & ~np.all(centroids == centroid, axis=1) nearby_centroids.append(np.nonzero(nearby)[0]) tri = scipy.spatial.Delaunay(centroids) debug = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) for simplex in tri.simplices: for i, j in zip(simplex, np.roll(simplex, 1)): cv2.line(debug, tuple(centroids[i].astype(int)), tuple(centroids[j].astype(int)), GREEN, 2) lib.debug_imwrite('triang.png', debug) duplicate_segments = np.concatenate([ tri.simplices[:, (0, 1)], tri.simplices[:, (1, 2)], tri.simplices[:, (2, 0)], ]) unordered_segments = np.unique(duplicate_segments, axis=0) segments = np.stack([ unordered_segments.min(axis=1), unordered_segments.max(axis=1), ], axis=1) assert np.all(segments[:, 0] < segments[:, 1]) theta_p = np.zeros((len(ellipses), )) s_p = np.full((len(ellipses), ), 5) def V_pq_sites(p1, p2, l1, l2): s_1, theta_1 = unpack_label(l1) s_2, theta_2 = unpack_label(l2) centroid_1, centroid_2 = centroids[(p1, p2), :] d_pq_sq = np.square(centroid_1 - centroid_2).sum() scale = np.exp(-k * d_pq_sq / (s_values[s_1]**2 + s_values[s_2]**2)) f_diff = abs(s_1 - s_2) + abs(theta_1 - theta_2) mu = 0 if l1 == l2 else (lam_1 if f_diff <= 3 else lam_2) return mu * scale
def filter_size(AH, im, letters=None): if letters is None: letters = all_letters(im) if lib.debug: debug = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) for l in letters: l.box(debug, color=lib.GREEN if valid_letter(AH, l) else lib.RED) lib.debug_imwrite('size_filter.png', debug) # Slightly tuned from paper (h < 3 * AH and h < AH / 4) return [l for l in letters if valid_letter(AH, l)]
def print_dict(filename, D_T): K, W_sq = D_T.shape W = int(np.sqrt(W_sq)) assert W_sq == W**2 D_T_s = D_T - np.percentile(D_T, 5) ratio = 255 / np.percentile(D_T_s, 95) patches = lib.clip_u8(ratio * D_T_s.reshape(K, W, W)) sqrtK = int(np.ceil(np.sqrt(K))) padding = ((0, sqrtK**2 - K), (1, 1), (1, 1)) patches_padded = np.pad(patches, padding, 'constant', constant_values=127) dict_square = patches_padded.reshape(sqrtK, sqrtK, W + 2, W + 2) \ .transpose(0, 2, 1, 3).reshape(sqrtK * (W + 2), sqrtK * (W + 2)) lib.debug_imwrite(filename, dict_square)
def precompute_rotations(im, centroids, ellipses): # precompute a bunch of stuff. im_h, im_w = im.shape diag = np.sqrt(im_h**2 + im_w**2) centroids_rotated = [] ellipses_sheared = [] padding_h = int(np.ceil((diag - im_h) / 2)) padding_w = int(np.ceil((diag - im_w) / 2)) centroids_safe = centroids + np.array((padding_w, padding_h)) centroids_homo = np.concatenate( [centroids_safe.T, np.ones((1, centroids_safe.shape[0]))]) new_h, new_w = im_h + 2 * padding_h, im_w + 2 * padding_w for i, theta in enumerate(theta_values): theta_deg = theta * 180 / np.pi matrix = cv2.getRotationMatrix2D((new_w / 2., new_h / 2.), theta_deg, 1) centroids_rotated.append(matrix.dot(centroids_homo).T) # FIXME: something is wrong with these rotations ellipse_rotated = matrix[:, :2].dot(ellipses.T).T if lib.debug and i == 5: im_safe = np.pad(im, ((padding_h, padding_h), (padding_w, padding_w)), 'constant', constant_values=0) im_rotated = cv2.warpAffine(im_safe, matrix, (new_w, new_h), borderMode=cv2.BORDER_CONSTANT, borderValue=0) debug = cv2.cvtColor(im_rotated, cv2.COLOR_GRAY2BGR) for centroid, eigvecs in zip(centroids_rotated[-1], ellipse_rotated): for v in eigvecs.T: cv2.line(debug, lib.int_tuple(centroid + v), lib.int_tuple(centroid - v), GREEN, 2) lib.debug_imwrite('ellipses_rot.png', debug) # shear ellipses in x-direction to make perp to y axis x1s, x2s = ellipse_rotated[0] y1s, y2s = ellipse_rotated[1] y0s = np.sqrt(y1s**2 + y2s**2) x0s = (x1s**2 + y1s * x2s) / np.sqrt(x1s**2 + y1s**2) ellipses_sheared.append(np.stack([x0s, y0s]).T) return centroids_rotated, ellipses_sheared
def dominant_char_height(im, letters=None): if letters is None: letters = all_letters(im) heights = [letter.h for letter in letters if letter.w > 5] hist, _ = np.histogram(heights, 256, [0, 256]) # TODO: make depend on DPI. AH = np.argmax(hist[8:]) + 8 # minimum height 8 if lib.debug: debug = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) for letter in letters: letter.box(debug, color=lib.GREEN if letter.h == AH else lib.RED) debug_imwrite('heights.png', debug) return AH
def filter_spacing_deviation(im, AH, lines): new_lines = [] debug = cv2.cvtColor(im, cv2.COLOR_GRAY2RGB) for line in lines: spacings = np.array( [l2.x - l1.right() for l1, l2 in zip(line, line[1:])]) # print("spacing", spacings.std()) if spacings.std() > AH / 1.0: line.crop().draw(debug, color=lib.RED) else: line.crop().draw(debug, color=lib.GREEN) new_lines.append(line) lib.debug_imwrite("spacing_filter.png", debug) return new_lines
def skew_angle(im, orig, AH, lines): if len(orig.shape) == 2: debug = cv2.cvtColor(orig, cv2.COLOR_GRAY2RGB) else: debug = orig.copy() alphas = [] for l in lines: if len(l) < 10: continue line_model = l.fit_line() line_model.draw(debug) alphas.append(line_model.angle()) debug_imwrite('lines.png', debug) return np.median(alphas)
def vanishing_point(lines, v0, O): C0 = lines[-1] if v0[1] < 0 else lines[0] others = lines[:-1] if v0[1] < 0 else lines[1:] domain = np.linspace(C0.left(), C0.right(), N_LONGS + 2)[1:-1] C0_points = np.array([domain, C0.model(domain)]).T longitudes = [Line.from_points(v0, p) for p in C0_points] lefts = [longitudes[0].text_line_intersect(line)[0] for line in others] rights = [longitudes[-1].text_line_intersect(line)[0] for line in others] valid_mask = [line.left() <= L and R < line.right() \ for line, L, R in zip(others, lefts, rights)] valid_lines = [C0] + compress(others, valid_mask) derivs = [line.model.deriv() for line in valid_lines] print('valid lines:', len(others)) convergences = [] for longitude in longitudes: intersects = [longitude.text_line_intersect(line) for line in valid_lines] tangents = [Line.from_point_slope(p, d(p[0])) \ for p, d in zip(intersects, derivs)] convergences.append(Line.best_intersection(tangents)) # x vx + y vy + f^2 = 0 # m = -vx / vy # b = -f^2 / vy L = Line.fit(convergences) # shift into O-origin coords L_O = L.offset(-O) vy = -(f ** 2) / L_O.b vx = -vy * L_O.m v = np.array((vx, vy)) + O debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) for t in tangents: t.draw(debug, color=RED) for longitude in longitudes: longitude.draw(debug) L.draw(debug, color=GREEN) lib.debug_imwrite('vanish.png', debug) return v, f, L
def side_lines(AH, lines): im_h, _ = bw.shape left_bounds = np.array([l.original_letters[0].left_mid() for l in lines]) right_bounds = np.array([l.original_letters[-1].right_mid() for l in lines]) vertical_lines = [] debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) for coords in [left_bounds, right_bounds]: model, inliers = ransac(coords, LinearXModel, 3, AH / 10.0) vertical_lines.append(model.params) for p, inlier in zip(coords, inliers): draw_circle(debug, p, 4, color=GREEN if inlier else RED) for p in vertical_lines: draw_line(debug, (p(0), 0), (p(im_h), im_h), BLUE, 2) lib.debug_imwrite('vertical.png', debug) return vertical_lines
def word_contours(AH, im): opened = cv2.morphologyEx(im ^ 255, cv2.MORPH_OPEN, cross33) horiz = cv2.getStructuringElement(cv2.MORPH_RECT, (int(AH * 0.6) | 1, 1)) rls = cv2.morphologyEx(opened, cv2.MORPH_CLOSE, horiz) debug_imwrite('rls.png', rls) _, contours, [hierarchy] = cv2.findContours(rls, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) words = top_contours(contours, hierarchy) word_boxes = [ tuple([word] + list(cv2.boundingRect(word))) for word in words ] # Slightly tuned from paper (h < 3 * AH and h < AH / 4) word_boxes = [ __x_y_w_h for __x_y_w_h in word_boxes if __x_y_w_h[4] < 3 * AH and __x_y_w_h[4] > AH / 3 and __x_y_w_h[3] > AH / 3 ] return word_boxes
def filter_position(AH, im, lines, split): new_lines = [] line_lefts = np.array([line.left() for line in lines]) line_rights = np.array([line.right() for line in lines]) line_start_thresh = np.percentile(line_lefts, 15 if split else 30) - 15 * AH line_end_thresh = np.percentile(line_rights, 85 if split else 70) + 15 * AH debug = cv2.cvtColor(im, cv2.COLOR_GRAY2RGB) for line in lines: if line.right() < line_start_thresh or line.left() > line_end_thresh: line.crop().draw(debug, color=lib.RED) else: line.crop().draw(debug, color=lib.GREEN) new_lines.append(line) lib.debug_imwrite("position_filter.png", debug) return new_lines
def aspect_ratio(im, lines, D, v, O): vx, vy = v C0, C1 = C0_C1(lines, v) im_h, im_w = im.shape m = -(vx - O[0]) / (vy - O[1]) L0 = Line.from_point_slope(C0.first_base(), m) L1 = Line.from_point_slope(C1.first_base(), m) perp = L0.altitude(v) p0, p1 = L0.intersect(perp), L1.intersect(perp) h_img = norm(p0 - p1) L = Line(m, -m * O[0] - (f ** 2) / (vy - O[1])) F = L.altitude(v).intersect(L) _, x0r, y0r, w0r, h0r = lines[-1][-1] p0r = np.array([x0r + w0r / 2.0, y0r + h0r]) F_C0r = Line.from_points(F, p0r) q0 = F_C0r.intersect(L0) l_img = norm(q0 - p0) debug = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) L0.draw(debug) L1.draw(debug) L.draw(debug, color=GREEN) F_C0r.draw(debug, color=RED) lib.debug_imwrite('aspect.png', debug) # Convergence line perp to V=(vx, vy, f) # y = -vx / vy * x + -f^2 / vy alpha = atan2(norm(p1 - O), f) theta = acos(f / sqrt((vx - O[0]) ** 2 + (vy - O[1]) ** 2 + f ** 2)) beta = pi / 2 - theta lp_img = abs(D[0][-1] - D[0][0]) wp_img = norm(np.diff(D.T, axis=0), axis=1).sum() print('h_img:', h_img, 'l\'_img:', lp_img, 'alpha:', alpha) print('l_img:', l_img, 'w\'_img:', wp_img, 'beta:', beta) r = h_img * lp_img * cos(alpha) / (l_img * wp_img * cos(alpha + beta)) return r
def widest_domain(lines, v, n_points): C0, C1 = C0_C1(lines, v) v_lefts = [Line.from_points(v, l[0].left_bot()) for l in lines if l is not C0] v_rights = [Line.from_points(v, l[-1].right_bot()) for l in lines if l is not C0] C0_lefts = [l.text_line_intersect(C0)[0] for l in v_lefts] C0_rights = [l.text_line_intersect(C0)[0] for l in v_rights] x_min = min(C0.left(), min(C0_lefts)) x_max = max(C0.left(), max(C0_rights)) domain = np.linspace(x_min, x_max, n_points) debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) for l in lines: cv2.line(debug, tuple(l[0].left_bot().astype(int)), tuple(l[-1].right_bot().astype(int)), GREEN, 2) Line.from_points(v, (x_min, C0(x_min))).draw(debug) Line.from_points(v, (x_max, C0(x_max))).draw(debug) lib.debug_imwrite('domain.png', debug) return domain, C0, C1
def letter_ellipses(im): num_labels, labels, stats, all_centroids = cv2.connectedComponentsWithStats( im) print(labels) print(num_labels) boxes = stats[:, (cv2.CC_STAT_LEFT, cv2.CC_STAT_TOP, cv2.CC_STAT_WIDTH, cv2.CC_STAT_HEIGHT)] centroids_list = [] ellipses_list = [] for i in range(1, num_labels): x, y, w, h = boxes[i] centroid = all_centroids[i] local = labels[y:y + h, x:x + w] == i points = np.array(np.nonzero(local))[::-1] # coord order (x, y) covariance = np.cov(points) assert covariance.shape == (2, 2) # these are normalized to 1; normalize to sqrt(w) eigvals, eigvecs = np.linalg.eigh(covariance) sig_2, sig_1 = eigvals area = stats[i, cv2.CC_STAT_AREA] if sig_1 / sig_2 <= 15 and area > 10 and area < 3000: eigvecs *= np.sqrt(eigvals) centroids_list.append(i) ellipses_list.append(eigvecs) centroids = all_centroids[centroids_list] ellipses = np.array(ellipses_list) if lib.debug: debug = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) for centroid, eigvecs in zip(centroids, ellipses): for v in eigvecs.T: cv2.line(debug, tuple((centroid + v).astype(int)), tuple((centroid - v).astype(int)), GREEN, 2) lib.debug_imwrite('ellipses.png', debug) return centroids, ellipses
def debug_images(self, R, g, align, l_m): if not lib.debug: return debug = cv2.cvtColor(self.im, cv2.COLOR_GRAY2BGR) ts_surface = E_str_project(R, g, self.base_points, 0) # debug_jac(theta, R, g, l_m, base_points, ts_surface) for Y, (_, points_XYZ) in zip(l_m, ts_surface): Xs, Ys, _ = points_XYZ # print('Y diffs:', Ys - Y) X_min, X_max = Xs.min(), Xs.max() line_Xs = np.linspace(X_min, X_max, 100) line_Ys = np.full((100,), Y) line_Zs = g(line_Xs) line_XYZ = np.stack([line_Xs, line_Ys, line_Zs]) line_2d = gcs_to_image(line_XYZ, self.O, R).T for p0, p1 in zip(line_2d, line_2d[1:]): draw_line(debug, p0, p1, GREEN, 1) if isinstance(g, SplitPoly): line_Xs = np.array([g.T, g.T]) line_Ys = np.array([-10000, 10000]) line_Zs = g(line_Xs) line_XYZ = np.stack([line_Xs, line_Ys, line_Zs]) line_2d = gcs_to_image(line_XYZ, self.O, R).T for p0, p1 in zip(line_2d, line_2d[1:]): draw_line(debug, p0, p1, RED, 4) for x in align.flatten(): line_Xs = np.array([x, x]) line_Ys = np.array([-10000, 10000]) line_Zs = g(line_Xs) line_XYZ = np.stack([line_Xs, line_Ys, line_Zs]) line_2d = gcs_to_image(line_XYZ, self.O, R).T draw_line(debug, line_2d[0], line_2d[1], BLUE, 4) lib.debug_imwrite('surface_lines.png', debug)
def lu_dewarp(im): # morphological operators morph_a = [ np.array([1] + [0] * (2 * i), dtype=np.uint8).reshape(2 * i + 1, 1) \ for i in range(9) ] morph_d = [a.T for a in morph_a] morph_c = [ np.array([0] * (2 * i) + [1], dtype=np.uint8).reshape(2 * i + 1, 1) \ for i in range(9) ] # morph_b = [c.T for c in morph_c] im_inv = im ^ 255 bdyt = np.zeros(im.shape, dtype=np.uint8) - 1 for struct in morph_c + morph_d: # ++ morph_b bdyt &= cv2.erode(im_inv, struct) debug_imwrite("bdyt.png", bdyt) return bdyt for struct in morph_c + morph_d: bdyt &= im_inv ^ cv2.erode(im_inv, struct)
def remove_stroke_outliers(im, lines, k=1.0): stroke_widths = fast_stroke_width(im) if lib.debug: lib.debug_imwrite('strokes.png', lib.normalize_u8(stroke_widths.clip(0, 10))) mask = np.zeros(im.shape, dtype=np.uint8) for line in lines: for letter in line: sliced = letter.crop().apply(mask) sliced += letter.raster() lib.debug_imwrite('letter_mask.png', -mask) masked_strokes = stroke_widths.copy() masked_strokes &= -mask strokes_mean, strokes_std = masked_mean_std(masked_strokes, mask) if lib.debug: print('overall: mean:', strokes_mean, 'std:', strokes_std) debug = cv2.cvtColor(im, cv2.COLOR_GRAY2RGB) new_lines = [] for line in lines: if len(line) <= 1: continue good_letters = [] for letter in line: crop = letter.crop() if not crop.nonempty(): continue raster = letter.raster() sliced_strokes = crop.apply(stroke_widths).copy() sliced_strokes &= lib.bool_to_u8(raster) mean, std = masked_mean_std(sliced_strokes, raster) if mean < strokes_mean - k * strokes_std: if lib.debug: print('skipping {:4d} {:4d} {:.03f} {:.03f}'.format( letter.x, letter.y, mean, std, )) letter.box(debug, color=lib.RED) else: if lib.debug: letter.box(debug, color=lib.GREEN) good_letters.append(letter) if good_letters: new_lines.append(TextLine(good_letters, underlines=line.underlines)) lib.debug_imwrite("stroke_filter.png", debug) return new_lines
def training_data(font_paths, font_size, W_h): faces = [freetype.Face(font_path) for font_path in font_paths] hi_res = np.concatenate([create_mosaic(face, font_size) for face in faces]) blurred_ims = [ cv2.GaussianBlur(hi_res, (0, 0), 7, 3), cv2.GaussianBlur(hi_res, (0, 0), 3, 7), ] blurred = np.concatenate(blurred_ims, axis=0) hi_res_2 = np.tile(hi_res, (len(blurred_ims), 1)) lib.debug_imwrite('hi.png', hi_res_2) lo_res = cv2.resize(blurred, (0, 0), None, 0.5, 0.5, interpolation=cv2.INTER_AREA) lib.debug_imwrite('lo.png', lo_res) lo_res_hi, filtered_lo = features_lo(lo_res) difference = hi_res_2.astype(np.float64) - lo_res_hi lib.debug_imwrite('diff.png', lib.normalize_u8(difference)) # make sure we're on edges (in hi-res reference) struct = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) gradient = cv2.morphologyEx(hi_res_2, cv2.MORPH_GRADIENT, struct) gradient_means, _ = lib.mean_std(gradient, W_h) patch_mask = gradient_means > np.percentile(gradient_means, 50) # patch_centers should match others' shape. step = 3 center_slice = slice(W_h // 2, -(W_h // 2) - 1, step) patch_centers = patch_mask[center_slice, center_slice] lo_patches = patches(filtered_lo, W_h, step)[patch_centers].transpose(0, 3, 1, 2) hi_patches = patches(difference, W_h, step)[patch_centers] t = lo_patches.shape[0] lo_patches_vec = lo_patches.reshape(t, -1) for i in range(lo_patches.shape[1]): print_dict('lo_sq{}.png'.format(i), lo_patches_vec[:, i * W_h * W_h:(i + 1) * W_h * W_h]) hi_patches_vec = hi_patches.reshape(t, W_h * W_h) print_dict('hi_sq.png', hi_patches_vec) # reduce dimensionality on lo-res patches with PCA. pca = sklearn.decomposition.PCA(n_components=lo_patches_vec.shape[1] // 6) Y_pca = pca.fit_transform(lo_patches_vec) return Y_pca, hi_patches_vec, pca
def kim2014(orig, O=None, split=True, n_points_w=None): lib.debug_imwrite('gray.png', binarize.grayscale(orig)) im = binarize.binarize(orig, algorithm=lambda im: binarize.sauvola_noisy(im, k=0.1)) global bw bw = im im_h, im_w = im.shape AH, lines, _ = get_AH_lines(im) if O is None: O = np.array((im_w / 2.0, im_h / 2.0)) if split: # Test if line start distribution is bimodal. line_xs = np.array([line.left() for line in lines]) bimodal = line_xs.std() / im_w > 0.10 dual = bimodal and im_w > im_h else: dual = False if dual: print('Bimodal! Splitting page!') pages = crop.split_lines(lines) n_points_w = 1.2 * np.percentile(np.array([line.width() for line in lines]), 90) n_points_w = max(n_points_w, 1800) if lib.debug: debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) for page in pages: page_crop = Crop.from_lines(page).expand(0.005) # print(page_crop) page_crop.draw(debug) lib.debug_imwrite('split.png', debug) page_crops = [Crop.from_lines(page) for page in pages] if len(page_crops) == 2: [c0, c1] = page_crops split_x = (c0.x1 + c1.x0) / 2 page_crops = [ c0.union(Crop(0, 0, split_x, im_h)), c1.union(Crop(split_x, 0, im_w, im_h)) ] result = [] for i, (page, page_crop) in enumerate(zip(pages, page_crops)): print('==== PAGE {} ===='.format(i)) lib.debug_prefix.append('page{}'.format(i)) page_image = page_crop.apply(orig) page_bw = page_crop.apply(im) page_AH, page_lines, _ = get_AH_lines(page_bw) new_O = O - np.array((page_crop.x0, page_crop.y0)) lib.debug_imwrite('precrop.png', im) lib.debug_imwrite('page.png', page_image) bw = page_bw dewarper = Kim2014(page_image, page_bw, page_lines, [page_lines], new_O, page_AH, n_points_w) result.append(dewarper.run_retry()[0]) lib.debug_prefix.pop() return result else: lib.debug_prefix.append('page0') dewarper = Kim2014(orig, im, lines, [lines], O, AH, n_points_w) lib.debug_prefix.pop() return dewarper.run_retry()