def make_mesh_2d_indiv(all_lines, corners_XYZ, O, R, g, n_points_w=None): box_XYZ = Crop.from_points(corners_XYZ[:2]).expand(0.01) if lib.debug: print('box_XYZ:', box_XYZ) if n_points_w is None: # 90th percentile line width a good guess n_points_w = 1.2 * np.percentile(np.array([line.width() for line in all_lines]), 90) mesh_XYZ_x = np.linspace(box_XYZ.x0, box_XYZ.x1, 400) mesh_XYZ_z = g(mesh_XYZ_x) mesh_XYZ_xz_arc, total_arc = arc_length_points(mesh_XYZ_x, mesh_XYZ_z, int(n_points_w)) mesh_XYZ_x_arc, _ = mesh_XYZ_xz_arc # TODO: think more about estimation of aspect ratio for mesh n_points_h = n_points_w * box_XYZ.h / total_arc # n_points_h = n_points_w * 1.7 mesh_XYZ_y = np.linspace(box_XYZ.y0, box_XYZ.y1, n_points_h) mesh_XYZ = make_mesh_XYZ(mesh_XYZ_x_arc, mesh_XYZ_y, g) mesh_2d = gcs_to_image(mesh_XYZ, O, R) if lib.debug: print('mesh:', Crop.from_points(mesh_2d)) # make sure meshes are not reversed if mesh_2d[0, :, 0].mean() > mesh_2d[0, :, -1].mean(): mesh_2d = mesh_2d[:, :, ::-1] if mesh_2d[1, 0].mean() > mesh_2d[1, -1].mean(): mesh_2d = mesh_2d[:, ::-1, :] return mesh_2d.transpose(1, 2, 0)
def crop(self): if self.underlines: return Crop.union( Crop.union_all([l.crop() for l in self.letters]), Crop.union_all([u.crop() for u in self.underlines]), ) else: return Crop.union_all([l.crop() for l in self.letters])
def make_mesh_2d(all_lines, O, R, g, n_points_w=None): all_letters = np.concatenate([line.letters for line in all_lines]) corners_2d = np.concatenate([letter.corners() for letter in all_letters]).T assert corners_2d.shape[0] == 2 and corners_2d.shape[1] == 4 * len(all_letters) corners = image_to_focal_plane(corners_2d, O) assert corners.shape[0] == 3 t0s = np.full((corners.shape[1],), np.inf, dtype=np.float64) # Get the (X,Y,Z) on the GCS surface corners_t, corners_XYZ = newton.t_i_k(R, g, corners, t0s) corners_X, _, corners_Z = corners_XYZ relative_Z_error = np.abs(g(corners_X) - corners_Z) / corners_Z # Only leave corners that are not weird # such as: |g(X) - Z| should be close, |Z| should not be extremely large, t < 0 (the GCS should be in front of the camera) corners_XYZ = corners_XYZ[:, np.logical_and(relative_Z_error <= 0.02, abs(corners_Z) < 1e6, corners_t < 0)] corners_X, _, _ = corners_XYZ debug_print_points('corners.png', corners_2d) if lib.debug: try: import matplotlib.pyplot as plt ax = plt.axes() box_XY = Crop.from_points(corners_XYZ[:2]).expand(0.01) x_min, y_min, x_max, y_max = box_XY for y in np.linspace(y_min, y_max, 3): xs = np.linspace(x_min, x_max, 200) ys = np.full(200, y) zs = g(xs) points = np.stack([xs, ys, zs]) points_r = inv(R).dot(points) + Of[:, newaxis] ax.plot(points_r[0], points_r[2]) base_xs = np.array([corners[0].min(), corners[0].max()]) base_zs = np.array([-3270.5, -3270.5]) ax.plot(base_xs, base_zs) ax.set_aspect('equal') plt.savefig('dewarp/camera.png') except Exception as e: print(e) import IPython IPython.embed() if g.split(): meshes = [ make_mesh_2d_indiv(all_lines, corners_XYZ[:, corners_X <= g.T], O, R, g, n_points_w=n_points_w), make_mesh_2d_indiv(all_lines, corners_XYZ[:, corners_X > g.T], O, R, g, n_points_w=n_points_w), ] else: meshes = [make_mesh_2d_indiv(all_lines, corners_XYZ, O, R, g, n_points_w=n_points_w)] for i, mesh in enumerate(meshes): # debug_print_points('mesh{}.png'.format(i), mesh, step=20) pass return meshes
def make_mesh_2d_indiv(all_lines, corners_XYZ, O, R, g, n_points_w=None): # Bound the text region by min and max over x, y axis and then expand the axis-aligned bounding boxes a little bit box_XYZ = Crop.from_points(corners_XYZ[:2]).expand(0.01) if lib.debug: print('box_XYZ:', box_XYZ) # n_points_w == how many discrete points for width if n_points_w is None: # 90th percentile line width a good guess n_points_w = 1.2 * np.percentile(np.array([line.width() for line in all_lines]), 90) n_points_w = max(n_points_w, 1800) mesh_XYZ_x = np.linspace(box_XYZ.x0, box_XYZ.x1, 400) mesh_XYZ_z = g(mesh_XYZ_x) # total_arc will be the true page width (computed from integration using finite difference method) mesh_XYZ_xz_arc, total_arc = arc_length_points(mesh_XYZ_x, mesh_XYZ_z, int(n_points_w)) # we actually only need x, because z can be easily computed from g (i.e. g(x) = z) mesh_XYZ_x_arc, _ = mesh_XYZ_xz_arc assert len(mesh_XYZ_x_arc) == int(n_points_w) # TODO: think more about estimation of aspect ratio for mesh # compute the discrete points for height (show follow the aspect ratio (i.e. box_XYZ.h / total_arc) n_points_h = int(n_points_w * box_XYZ.h / total_arc) # n_points_h = n_points_w * 1.7 mesh_XYZ_y = np.linspace(box_XYZ.y0, box_XYZ.y1, n_points_h) # (3, n_points_h, n_points_w) mesh_XYZ = make_mesh_XYZ(mesh_XYZ_x_arc, mesh_XYZ_y, g) assert mesh_XYZ.shape[0] == 3 # mesh_2d[:, i, j] == a coordinate (x, y) on the original image mesh_2d = gcs_to_image(mesh_XYZ, O, R) if lib.debug: print('mesh:', Crop.from_points(mesh_2d)) # make sure meshes are not reversed # (Note: I think the upside down issue is caused by rotation) # fix left-right reversed (I think it will happen why rotation axis is (0, 1, 0) and rotation degree is pi) # by making sure the first x coordinate is bigger than if mesh_2d[0, :, 0].mean() > mesh_2d[0, :, -1].mean(): mesh_2d = mesh_2d[:, :, ::-1] # fix upside down if mesh_2d[1, 0].mean() > mesh_2d[1, -1].mean(): mesh_2d = mesh_2d[:, ::-1, :] return mesh_2d.transpose(1, 2, 0) # height, width, 2(x,y)
def kim2014(orig, O=None, split=False, n_points_w=None, n_tries=30): lib.debug_imwrite('gray.png', binarize.grayscale(orig)) im = binarize.binarize(orig, algorithm=lambda im: binarize.sauvola_noisy(im, k=0.1)) global bw bw = im im_h, im_w = im.shape AH, lines, _ = get_AH_lines(im) if O is None: O = np.array((im_w / 2.0, im_h / 2.0)) if split: # Test if line start distribution is bimodal. line_xs = np.array([line.left() for line in lines]) bimodal = line_xs.std() / im_w > 0.10 dual = bimodal and im_w > im_h else: dual = False if dual: print('Bimodal! Splitting page!') pages = crop.split_lines(lines) n_points_w = 1.2 * np.percentile(np.array([line.width() for line in lines]), 90) n_points_w = max(n_points_w, 1800) if lib.debug: debug = cv2.cvtColor(bw, cv2.COLOR_GRAY2BGR) for page in pages: page_crop = Crop.from_lines(page).expand(0.005) # print(page_crop) page_crop.draw(debug) lib.debug_imwrite('split.png', debug) page_crops = [Crop.from_lines(page) for page in pages] if len(page_crops) == 2: [c0, c1] = page_crops split_x = (c0.x1 + c1.x0) / 2 page_crops = [ c0.union(Crop(0, 0, split_x, im_h)), c1.union(Crop(split_x, 0, im_w, im_h)) ] result = [] for i, (page, page_crop) in enumerate(zip(pages, page_crops)): print('==== PAGE {} ===='.format(i)) lib.debug_prefix.append('page{}'.format(i)) page_image = page_crop.apply(orig) page_bw = page_crop.apply(im) page_AH, page_lines, _ = get_AH_lines(page_bw) new_O = O - np.array((page_crop.x0, page_crop.y0)) lib.debug_imwrite('precrop.png', im) lib.debug_imwrite('page.png', page_image) bw = page_bw dewarper = Kim2014(page_image, page_bw, page_lines, [page_lines], new_O, page_AH, n_points_w) result.append(dewarper.run_retry()[0]) lib.debug_prefix.pop() return result else: lib.debug_prefix.append('page0') dewarper = Kim2014(orig, im, lines, [lines], O, AH, n_points_w) lib.debug_prefix.pop() return dewarper.run_retry(n_tries=n_tries)
def process_image(original, dpi=None): original_rot90 = original for i in range(args.rotate / 90): original_rot90 = np.rot90(original_rot90) # original_rot90 = cv2.resize(original_rot90, (0, 0), None, 1.5, 1.5) im_h, im_w = original_rot90.shape[:2] # image height should be about 10 inches. round to 100 if not dpi: dpi = int(round(im_h / 1100.0) * 100) print('detected dpi:', dpi) split = im_w > im_h # two pages cropped_images = [] if args.dewarp: lib.debug_prefix.append('dewarp') dewarped_images = dewarp.kim2014(original_rot90) for im in dewarped_images: bw = binarize.binarize(im, algorithm=binarize.sauvola, resize=1.0) lib.debug_prefix.append('crop') _, [lines] = crop(im, bw, split=False) lib.debug_prefix.pop() c = Crop.from_lines(lines) if c.nonempty(): cropped_images.append(Crop.from_whitespace(bw).apply(im)) lib.debug_prefix.pop() else: bw = binarize.binarize(original_rot90, algorithm=binarize.adaptive_otsu, resize=1.0) debug_imwrite('thresholded.png', bw) AH, line_sets = crop(original_rot90, bw, split=split) for lines in line_sets: c = Crop.from_lines(lines) if c.nonempty(): lib.debug = False bw_cropped = c.apply(bw) orig_cropped = c.apply(original_rot90) angle = algorithm.skew_angle(bw_cropped, original_rot90, AH, lines) if not np.isfinite(angle): angle = 0. rotated = algorithm.safe_rotate(orig_cropped, angle) rotated_bw = binarize.binarize(rotated, algorithm=binarize.adaptive_otsu) _, [new_lines] = crop(rotated, rotated_bw, split=False) # dewarped = algorithm.fine_dewarp(rotated, new_lines) # _, [new_lines] = crop(rotated, rotated_bw, split=False) new_crop = Crop.union_all([line.crop() for line in new_lines]) if new_crop.nonempty(): # cropped = new_crop.apply(dewarped) cropped = new_crop.apply(rotated) cropped_images.append(cropped) out_images = [] lib.debug_prefix.append('binarize') for i, cropped in enumerate(cropped_images): lib.debug_prefix.append('page{}'.format(i)) if lib.is_bw(original_rot90): out_images.append(binarize.otsu(cropped)) else: out_images.append( binarize.ng2014_fallback(binarize.grayscale(cropped)) ) lib.debug_prefix.pop() lib.debug_prefix.pop() return dpi, out_images
def crop(self): return Crop(self.x, self.y, self.x + self.w, self.y + self.h)