def draw_epipolar_lines(Irgb1, Irgb2, pts1, pts2, H): """ Draws epipolar lines, and displays them to the user in an interactive manner. Input: nparray Irgb1, Irgb2: H x W x 3 nparray pts1, pts2: N x 2 nparray H: 3x3 """ h, w = Irgb1.shape[0:2] for i, pt1 in enumerate(pts1): Irgb1_ = Irgb1.copy() Irgb2_ = Irgb2.copy() pt2 = pts2[i] pt1_h = np.hstack((pt1, np.array([1.0]))) pt2_h = np.hstack((pt2, np.array([1.0]))) epiline2 = np.dot(util_camera.make_crossprod_mat(pt2_h), np.dot(H, pt1_h)) epiline1 = np.dot(H.T, epiline2) epiline1 = epiline1 / epiline1[2] epiline2 = epiline2 / epiline2[2] print "Epiline1 is: slope={0} y-int={1}".format(-epiline1[0] / epiline1[1], -epiline1[2] / epiline1[1]) print "Epiline2 is: slope={0} y-int={1}".format(-epiline2[0] / epiline2[1], -epiline2[2] / epiline2[1]) Irgb1_ = util_camera.draw_line(Irgb1_, epiline1) Irgb2_ = util_camera.draw_line(Irgb2_, epiline2) cv.Circle(cv.fromarray(Irgb1_), tuple(intrnd(*pts1[i])), 3, (255, 0, 0)) cv.Circle(cv.fromarray(Irgb2_), tuple(intrnd(*pts2[i])), 3, (255, 0, 0)) print "(Displaying epipolar lines from img1 to img2. Press <enter> to continue.)" cv2.namedWindow('display1') cv2.imshow('display1', Irgb1_) cv2.namedWindow('display2') cv2.imshow('display2', Irgb2_) cv2.waitKey(0)
def main(): # K matrix given by the Caltech Lanes dataset (CameraInfo.txt) K = np.array([[309.4362, 0, 317.9034], [0, 344.2161, 256.5352], [0, 0, 1]]) line1 = np.array([1.30459272, 1., -589.16024465]) line2 = np.array([-1.26464497, 1., 228.18829664]) win1 = (0.4, 0.60, 0.2, 0.25) win2 = (0.62, 0.60, 0.2, 0.25) lane_width = 3.66 # 3.66 meters imgpath = 'imgs_sample/f00001.png' I = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_GRAYSCALE) #### Draw lines on _Irgbline.png to sanity check lane detection Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR) Irgb = util_camera.draw_line(Irgb, line1, (0, 255, 0)) Irgb = util_camera.draw_line(Irgb, line2, (255, 0, 0)) cv2.imwrite("_Irgbline.png", Irgb) H = estimate_planar_homography(I, line1, line2, K, win1, win2, lane_width) print H print " rank(H): {0}".format(np.linalg.matrix_rank(H)) print " det(H): {0}".format(np.linalg.det(H)) if np.linalg.matrix_rank(H) == 3: print "The following should be identity (inv(H) * H):" print np.dot(numpy.linalg.inv(H), H) print "(Evaluating a few world points to see where they lie on the image)" Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR) # world point: (X, Z, 1), i.e. point on world plane (road) world_pts = [ # Points in MIDDLE of lane, going towards horizon ((0, 0, 1), (0, 1, 1), (0, 2, 1), (0, 4, 1), (0, 8, 1), (0, 16, 1), (0, 32, 1), (0, 10000, 1)), # Points on LEFT lane, going towards horizon ((-1.83, 0, 1), (-1.83, 2, 1), (-1.83, 4, 1), (-1.83, 8, 1), (-1.83, 10000, 1)), # Points on RIGHT lane, going towards horizon ((1.83, 0, 1), (1.83, 2, 1), (1.83, 4, 1), (1.83, 8, 1), (1.83, 10000, 1)) ] colours = [(255, 0, 0), (0, 255, 0), (0, 0, 255)] for i, sub_pts in enumerate(world_pts): clr = colours[i % len(colours)] for pt in sub_pts: pt_np = np.array(pt) pt_img = np.dot(H, pt_np) pt_img = pt_img / pt_img[2] print "(i={0}) World {1} -> {2}".format(i, pt, pt_img) cv2.circle(Irgb, (intrnd(pt_img[0]), intrnd(pt_img[1])), 3, clr) print cv2.imwrite("_Irgb_pts.png", Irgb) print "Done."
def main(): # K matrix given by the Caltech Lanes dataset (CameraInfo.txt) K = np.array([[309.4362, 0, 317.9034], [0, 344.2161, 256.5352], [0, 0, 1 ]]) line1 = np.array([ 1.30459272, 1., -589.16024465]) line2 = np.array([ -1.26464497, 1., 228.18829664]) win1 = (0.4, 0.60, 0.2, 0.25) win2 = (0.62, 0.60, 0.2, 0.25) lane_width = 3.66 # 3.66 meters imgpath = 'imgs_sample/f00001.png' I = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_GRAYSCALE) #### Draw lines on _Irgbline.png to sanity check lane detection Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR) Irgb = util_camera.draw_line(Irgb, line1, (0, 255, 0)) Irgb = util_camera.draw_line(Irgb, line2, (255, 0, 0)) cv2.imwrite("_Irgbline.png", Irgb) H = estimate_planar_homography(I, line1, line2, K, win1, win2, lane_width) print H print " rank(H): {0}".format(np.linalg.matrix_rank(H)) print " det(H): {0}".format(np.linalg.det(H)) if np.linalg.matrix_rank(H) == 3: print "The following should be identity (inv(H) * H):" print np.dot(numpy.linalg.inv(H), H) print "(Evaluating a few world points to see where they lie on the image)" Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR) # world point: (X, Z, 1), i.e. point on world plane (road) world_pts = [ # Points in MIDDLE of lane, going towards horizon ((0, 0, 1), (0, 1, 1), (0, 2, 1), (0, 4, 1), (0, 8, 1), (0, 16, 1), (0, 32, 1), (0, 10000, 1)), # Points on LEFT lane, going towards horizon ((-1.83, 0, 1), (-1.83, 2, 1), (-1.83, 4, 1), (-1.83, 8, 1), (-1.83, 10000, 1)), # Points on RIGHT lane, going towards horizon ((1.83, 0, 1), (1.83, 2, 1), (1.83, 4, 1), (1.83, 8, 1), (1.83, 10000, 1)) ] colours = [(255, 0, 0), (0, 255, 0), (0, 0, 255)] for i, sub_pts in enumerate(world_pts): clr = colours[i % len(colours)] for pt in sub_pts: pt_np = np.array(pt) pt_img = np.dot(H, pt_np) pt_img = pt_img / pt_img[2] print "(i={0}) World {1} -> {2}".format(i, pt, pt_img) cv2.circle(Irgb, (intrnd(pt_img[0]), intrnd(pt_img[1])), 3, clr) print cv2.imwrite("_Irgb_pts.png", Irgb) print "Done."
def draw_line(Irgb, line, color=(0, 255, 0)): """ Draws a line on an input image. If input image is not a three channeled image, then this will convert it. Input: nparray Irgb: (H x W x 1) or (H x W x 3) nparray line: [float a, float b, float c] tuple color: (int B, int G, int R) Output: nparray Irgb: (H x W x 3) """ if len(Irgb.shape) != 3: Irgb = cv2.cvtColor(Irgb, cv.CV_GRAY2BGR) Irgb = Irgb.copy() h, w = Irgb.shape[0:2] pts = [] for x in xrange(w): y = compute_line_y(line, x) if y > 0 and y < h: pts.append((x,y)) cv.Line(cv.fromarray(Irgb), tuple(intrnd(*pts[0])), tuple(intrnd(*pts[-1])), color) return Irgb
def draw_line(Irgb, line, color=(0, 255, 0)): """ Draws a line on an input image. If input image is not a three channeled image, then this will convert it. Input: nparray Irgb: (H x W x 1) or (H x W x 3) nparray line: [float a, float b, float c] tuple color: (int B, int G, int R) Output: nparray Irgb: (H x W x 3) """ if len(Irgb.shape) != 3: Irgb = cv2.cvtColor(Irgb, cv.CV_GRAY2BGR) Irgb = Irgb.copy() h, w = Irgb.shape[0:2] pts = [] for x in xrange(w): y = compute_line_y(line, x) if y > 0 and y < h: pts.append((x, y)) cv.Line(cv.fromarray(Irgb), tuple(intrnd(*pts[0])), tuple(intrnd(*pts[-1])), color) return Irgb
def draw_epipolar_lines(Irgb1, Irgb2, pts1, pts2, H): """ Draws epipolar lines, and displays them to the user in an interactive manner. Input: nparray Irgb1, Irgb2: H x W x 3 nparray pts1, pts2: N x 2 nparray H: 3x3 """ h, w = Irgb1.shape[0:2] for i, pt1 in enumerate(pts1): Irgb1_ = Irgb1.copy() Irgb2_ = Irgb2.copy() pt2 = pts2[i] pt1_h = np.hstack((pt1, np.array([1.0]))) pt2_h = np.hstack((pt2, np.array([1.0]))) epiline2 = np.dot(util_camera.make_crossprod_mat(pt2_h), np.dot(H, pt1_h)) epiline1 = np.dot(H.T, epiline2) epiline1 = epiline1 / epiline1[2] epiline2 = epiline2 / epiline2[2] print "Epiline1 is: slope={0} y-int={1}".format( -epiline1[0] / epiline1[1], -epiline1[2] / epiline1[1]) print "Epiline2 is: slope={0} y-int={1}".format( -epiline2[0] / epiline2[1], -epiline2[2] / epiline2[1]) Irgb1_ = util_camera.draw_line(Irgb1_, epiline1) Irgb2_ = util_camera.draw_line(Irgb2_, epiline2) cv.Circle(cv.fromarray(Irgb1_), tuple(intrnd(*pts1[i])), 3, (255, 0, 0)) cv.Circle(cv.fromarray(Irgb2_), tuple(intrnd(*pts2[i])), 3, (255, 0, 0)) print "(Displaying epipolar lines from img1 to img2. Press <enter> to continue.)" cv2.namedWindow('display1') cv2.imshow('display1', Irgb1_) cv2.namedWindow('display2') cv2.imshow('display2', Irgb2_) cv2.waitKey(0)
def draw_subwindow(Irgb, win, colour=(125, 125, 0)): """ Draws subwindow on Irgb. Input: nparray Irgb tuple win: (float x, float y, float w, float h) Output: nparray Irgb_out """ xf, yf, wf, hf = win Irgb_out = Irgb.copy() h, w = Irgb_out.shape[0:2] win_w = intrnd(w * wf) win_h = intrnd(h * hf) if win_w % 2 == 0: win_w += 1 if win_h % 2 == 0: win_h += 1 pt1 = (intrnd(w * xf - (win_w / 2)), intrnd(h * yf - (win_h / 2))) pt2 = (intrnd(w * xf + (win_w / 2)), intrnd(h * yf + (win_h / 2))) cv2.rectangle(Irgb_out, pt1, pt2, colour, thickness=1) return Irgb_out
def draw_subwindow(Irgb, win, colour=(125, 125, 0)): """ Draws subwindow on Irgb. Input: nparray Irgb tuple win: (float x, float y, float w, float h) Output: nparray Irgb_out """ xf, yf, wf, hf = win Irgb_out = Irgb.copy() h, w = Irgb_out.shape[0:2] win_w = intrnd(w*wf) win_h = intrnd(h*hf) if win_w % 2 == 0: win_w += 1 if win_h % 2 == 0: win_h += 1 pt1 = (intrnd(w*xf - (win_w/2)), intrnd(h*yf - (win_h/2))) pt2 = (intrnd(w*xf + (win_w/2)), intrnd(h*yf + (win_h/2))) cv2.rectangle(Irgb_out, pt1, pt2, colour, thickness=1) return Irgb_out
def main(): args = parse_args() if not os.path.exists(IMGSDIR_CALIB): print "(ERROR) Calibration images not found. Please place \ the LDWS_calibrate_short/ images in the current directory." exit(1) if not os.path.exists(IMGSDIR_TEST): print "(ERROR) Test images not found. Please place the \ LDWS_test_short/ images in the current directory." exit(1) imgpaths_calib = util.get_imgpaths(IMGSDIR_CALIB) if not os.path.isdir(args.imgsdir): imgpaths_test = [args.imgsdir] else: imgpaths_test = util.get_imgpaths(args.imgsdir) if args.reuse_calib: print "(Reusing camera calibration matrix)" K = np.array([[674.07224154, 0., 262.77722917], [0., 670.26875783, 330.21546389], [0., 0., 1.]]) else: print "(Estimating camera matrix...)" t = time.time() K = calibrate_camera.calibrate_camera(imgpaths_calib, 8, 8, 0.048) dur = time.time() - t print "(Finished. {0:.4f})".format(dur) for i, imgpath in enumerate(imgpaths_test): print "\n==== ({0}/{1}) Detecting lanes... [{2}]====".format( i + 1, len(imgpaths_test), os.path.split(imgpath)[1]) I = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_GRAYSCALE) h, w = I.shape[0:2] t = time.time() line1, line2 = detect_lanes.detect_lanes(I, win1=WIN_LEFT, win2=WIN_RIGHT, threshold1=110, threshold2=220, apertureSize=3, show_edges=False) dur = time.time() - t print " Finished detecting lanes ({0:.4f}s)".format(dur) if line1 == None or line2 == None: print "({0}/{1}) Error: Couldn't find lanes.".format( i + 1, len(imgpaths_test)) continue # Choose 4 points on the lanes to estimate the planar homography y1 = intrnd(0.45 * h) y2 = intrnd(0.65 * h) pts = np.array([ [compute_x(line1, y1), y1], # Left lane, far [compute_x(line2, y1), y1], # Right lane, far [compute_x(line1, y2), y2], # Left lane, close [compute_x(line2, y2), y2] ]) # Right lane, close # These world points have the origin at the middle of the lane, # directly below the camera. # Depths 5.0, 3.0 are chosen arbitrarily, as we don't have depth # information. Thus, the computed H will only be accurate in the # X axis. pts_worldm = np.array([ [-LANE_W / 2.0, 5.0], # Left lane, far [LANE_W / 2.0, 5.0], # Right lane, far [-LANE_W / 2.0, 3.0], # Left lane, close [LANE_W / 2.0, 3.0] ]) # Right lane, close # These world points are scaled+translated to generate a # reasonably-sized image w/ perspective effects removed. # Note: the origin here is not the same origin as in pts_worldm! pts_world = np.array([[0.0, 0.0], [LANE_W * 1e2, 0.0], [0.0, 500], [LANE_W * 1e2, 500.0]]) pts_world[:, 0] += LANE_W * 1e2 # Let's see more of the horizontal image pts_world[:, 1] += 200 # Let's see more down the road # H_metric := Homography mapping the image (pixel coords) to the # world plane defined by pts_worldm H_metric = cv2.getPerspectiveTransform(pts.astype('float32'), pts_worldm.astype('float32')) H = cv2.getPerspectiveTransform(pts.astype('float32'), pts_world.astype('float32')) ## Estimate where the camera is w.r.t. the world ref. frame R, T = estimate_extrinsic_parameters(H_metric) xdist = T[0] - (LANE_W / 2.0) print " Distance from center of lane: X={0:.2f} meters".format( xdist) LEFT_THRESH = -1.0 # Stay within 1.0 meters of the center of the lane RIGHT_THRESH = 1.0 if xdist >= RIGHT_THRESH: print " WARNING: Camera center is awfully close to the \ RIGHT side of the lane!" elif xdist <= LEFT_THRESH: print " WARNING: Camera center is awfully close to the \ LEFT side of the lane!" Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR) Iipm = cv2.warpPerspective(Irgb.astype('float64'), H, (1000, 700)) cv2.namedWindow("win2: Perspective-rectified image") cv2.imshow("win2: Perspective-rectified image", Iipm.astype('uint8')) Irgb = detect_lanes.draw_subwindow(Irgb, WIN_LEFT) Irgb = detect_lanes.draw_subwindow(Irgb, WIN_RIGHT) for _pt in pts: _pt = tuple([intrnd(x) for x in _pt]) cv2.circle(Irgb, _pt, 3, (0, 0, 255)) print " ({0}/{1}) Displaying detected lanes.".format( i + 1, len(imgpaths_test)) show_lanes(Irgb, line1, line2) print "Done."
def test_koopa_singleimage(SHOW_EPIPOLAR=False): """ Here, I want to do "undo" the perspective distortion in the image, based on the fact that I know: - The camera matrix K - The scene is planar - The location of four points in the image that create a rectangle in the world plane with known metric lengths Note: I don't have to necessarily know the metric lengths, but at least I need to know length ratios to construct a corrected image with length ratios preserved. In the literature, this is also known as: perspective rectification, perspective correction. """ imgpath = os.path.join(IMGSDIR_KOOPA_MED, 'DSCN0643.png') pts1 = np.array([ [373.0, 97.0], # A (upper-left red corner) [650.0, 95.0], # B (upper-right red corner) [674.0, 215.0], # C (lower-right red corner) [503.0, 322.0], # D (lower-left green corner) [494.0, 322.0], # E (lower-right blue corner) [303.0, 321.0], # F (lower-left blue corner) [332.0, 225.0], # G (upper-left blue corner) [335.0, 215.0], # H (lower-left red corner) [475.0, 135.0], # K (tip of pen) [507.0, 225.0], # L (upper-left green corner) [362.0, 248.0], # M (tip of glue bottle) ]) worldpts = get_worldplane_coords() assert len(pts1) == len(worldpts) calib_imgpaths = util.get_imgpaths(IMGSDIR_CALIB_MED) print "(Calibrating camera...)" if True: print "(Using pre-computed camera matrix K!)" K = np.array([[158.23796519, 0.0, 482.07814366], [0., 28.53758493, 333.32239125], [0., 0., 1.]]) else: K = calibrate_camera.calibrate_camera(calib_imgpaths, 9, 6, 0.023) print "Finished calibrating camera, K is:" print K Kinv = np.linalg.inv(K) pts1_norm = normalize_coords(pts1, K) HL = estimate_planar_homography(pts1_norm, worldpts) print "Estimated homography, H is:" print HL print "rank(H):", np.linalg.matrix_rank(HL) if np.linalg.matrix_rank(HL) != 3: print " Oh no! H is not full rank!" #### Normalize H := H / sigma_2(H_) U, S, V = np.linalg.svd(HL) H = HL / S[1] #### Enforce positive depth constraint flag_flipped = False for i, pt1 in enumerate(pts1_norm): pt2 = worldpts[i] val = np.dot(np.hstack((pt2, [1.0])), np.dot(H, np.hstack( (pt1, [1.0])))) if val < 0: if flag_flipped: print "WOAH, flipping twice?!" pdb.set_trace() print "FLIP IT! val_{0} was: {1}".format(i, val) H = H * -1 flag_flipped = True # (Sanity check positive depth constraint) for i, pt1 in enumerate(pts1_norm): pt2 = worldpts[i] val = np.dot(np.hstack((pt2, [1.0])), np.dot(H, np.hstack( (pt1, [1.0])))) if val < 0: print "WOAH, positive depth constraint violated!?" pdb.set_trace() #### Check projection error from I1 -> I2. errs = [] for i, pt1 in enumerate(pts1_norm): pt1_h = np.hstack((pt1, np.array([1.0]))) pt2_h = np.hstack((worldpts[i], np.array([1.0]))) pt2_pred = np.dot(H, pt1_h) pt2_pred = pt2_pred / pt2_pred[2] errs.append(np.linalg.norm(pt2_h - pt2_pred)) print "Projection error: {0} (Mean: {1} std: {2})".format( sum(errs), np.mean(errs), np.std(errs)) #### Check if planar epipolar constraint is satisfied: #### x2_hat * H * x1 = 0.0 errs = [] for i, pt1 in enumerate(pts1_norm): pt1_h = np.hstack((pt1, [1.0])) pt2_hat = util_camera.make_crossprod_mat(worldpts[i]) errs.append(np.linalg.norm(np.dot(pt2_hat, np.dot(H, pt1_h)))) print "Epipolar constraint error: {0} (Mean: {1} std: {2})".format( sum(errs), np.mean(errs), np.std(errs)) #### Sanity-check that world points project to pixel points Hinv = np.linalg.inv(H) errs = [] for i, worldpt in enumerate(worldpts): pt_img = np.hstack((pts1[i], [1.0])) p = np.dot(Hinv, np.hstack((worldpt, [1.0]))) p /= p[2] p = np.dot(K, p) errs.append(np.linalg.norm(pt_img - p)) print "world2img errors (in pixels): {0} (mean={1} std={2})".format( sum(errs), np.mean(errs), np.std(errs)) #### Perform Inverse Perspective Mapping (undo perspective effects) ## Pixel coordinates of a region of the image known to: ## a.) Lie on the plane ## b.) Has known metric lengths Hinv = np.linalg.inv(H) # Define hardcoded pixel/world coordinates on the poster plane # TODO: We could try generalizing this to new images by: # a.) Asking the user to choose four image points, and enter # in metric length information (simplest) # b.) Automatically detect four corner points that correspond # to a world rectangle (somehow). If there are no # reference lengths, then I believe the best thing we can # do is do a perspective-correction up to an affine # transformation (since we don't know the correct X/Y # lengths). pts_pix = np.array( [ [372.0, 97.0], # Upperleft of redbox (x,y) [651.0, 95.0], # Upperright of redbox (x,y) [303.0, 321.0], # Lowerleft of bluebox (x,y) [695.0, 324.0] ], # Lowerright of greenbox (x,y) ) pts_world = np.zeros([0, 2]) # Populate pts_world via H (maps image plane -> world plane) for pt_pix in pts_pix: pt_world = homo2pt(np.dot(H, np.dot(Kinv, pt2homo(pt_pix)))) pts_world = np.vstack((pts_world, pt_world)) pts_world *= 1000.0 # Let's get a reasonable-sized image please! # These are hardcoded world coords, but, we can auto-gen them via # H, Kinv as above. Isn't geometry nice? #pts_world = np.array([[0.0, 0.0], # [175.0, 0.0], # [0.0, 134.0], # [175.0, 134.0]]) IPM0, IPM1 = compute_IPM(pts_pix, pts_world) print 'IPM0 is:' print IPM0 print 'IPM1 is:' print IPM1 I = cv2.imread(imgpath, cv.CV_LOAD_IMAGE_COLOR).astype('float64') I = (2.0 * I) + 40 # Up that contrast! Orig. images are super dark. h, w = I.shape[0:2] # Determine bounding box of IPM-corrected image a = homo2pt(np.dot(IPM1, pt2homo([0.0, 0.0]))) # upper-left corner b = homo2pt(np.dot(IPM1, pt2homo([w - 1, 0.0]))) # upper-right corner c = homo2pt(np.dot(IPM1, pt2homo([0.0, h - 1]))) # lower-left corner d = homo2pt(np.dot(IPM1, pt2homo([w - 1, h - 1]))) # lower-right corner w_out = intrnd(max(b[0] - a[0], d[0] - c[0])) h_out = intrnd(max(c[1] - a[1], d[1] - b[1])) print "New image dimensions: ({0} x {1}) (Orig: {2} x {3})".format( w_out, h_out, w, h) # Warp the entire image I with the homography IPM1 Iwarp = cv2.warpPerspective(I, IPM1, (w_out, h_out)) # Draw selected points on I for pt in pts_pix: cv2.circle(I, tupint(pt), 5, (0, 255, 0)) # Draw rectified-rectangle in Iwarp a = homo2pt(np.dot(IPM1, pt2homo(pts_pix[0]))) b = homo2pt(np.dot(IPM1, pt2homo(pts_pix[3]))) cv2.rectangle(Iwarp, tupint(a), tupint(b), (0, 255, 0)) #Iwarp = transform_image.transform_image_forward(I, IPM) print "(Displaying before/after images, press <any key> to exit.)" cv2.namedWindow('original') cv2.imshow('original', I.astype('uint8')) cv2.namedWindow('corrected') cv2.imshow('corrected', Iwarp.astype('uint8')) cv2.waitKey(0)
def main(): args = parse_args() if not os.path.exists(IMGSDIR_CALIB): print "(ERROR) Calibration images not found. Please place \ the LDWS_calibrate_short/ images in the current directory." exit(1) if not os.path.exists(IMGSDIR_TEST): print "(ERROR) Test images not found. Please place the \ LDWS_test_short/ images in the current directory." exit(1) imgpaths_calib = util.get_imgpaths(IMGSDIR_CALIB) if not os.path.isdir(args.imgsdir): imgpaths_test = [args.imgsdir] else: imgpaths_test = util.get_imgpaths(args.imgsdir) if args.reuse_calib: print "(Reusing camera calibration matrix)" K = np.array([[ 674.07224154, 0., 262.77722917], [ 0., 670.26875783, 330.21546389], [ 0., 0., 1. ]]) else: print "(Estimating camera matrix...)" t = time.time() K = calibrate_camera.calibrate_camera(imgpaths_calib, 8, 8, 0.048) dur = time.time() - t print "(Finished. {0:.4f})".format(dur) for i, imgpath in enumerate(imgpaths_test): print "\n==== ({0}/{1}) Detecting lanes... [{2}]====".format(i+1, len(imgpaths_test), os.path.split(imgpath)[1]) I = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_GRAYSCALE) h, w = I.shape[0:2] t = time.time() line1, line2 = detect_lanes.detect_lanes(I, win1=WIN_LEFT, win2=WIN_RIGHT, threshold1=110, threshold2=220, apertureSize=3, show_edges=False) dur = time.time() - t print " Finished detecting lanes ({0:.4f}s)".format(dur) if line1 == None or line2 == None: print "({0}/{1}) Error: Couldn't find lanes.".format(i+1, len(imgpaths_test)) continue # Choose 4 points on the lanes to estimate the planar homography y1 = intrnd(0.45 * h) y2 = intrnd(0.65 * h) pts = np.array([[compute_x(line1, y1), y1], # Left lane, far [compute_x(line2, y1), y1], # Right lane, far [compute_x(line1, y2), y2], # Left lane, close [compute_x(line2, y2), y2]]) # Right lane, close # These world points have the origin at the middle of the lane, # directly below the camera. # Depths 5.0, 3.0 are chosen arbitrarily, as we don't have depth # information. Thus, the computed H will only be accurate in the # X axis. pts_worldm = np.array([[-LANE_W/2.0, 5.0], # Left lane, far [LANE_W/2.0, 5.0], # Right lane, far [-LANE_W/2.0, 3.0], # Left lane, close [LANE_W/2.0, 3.0]]) # Right lane, close # These world points are scaled+translated to generate a # reasonably-sized image w/ perspective effects removed. # Note: the origin here is not the same origin as in pts_worldm! pts_world = np.array([[0.0, 0.0], [LANE_W*1e2, 0.0], [0.0, 500], [LANE_W*1e2, 500.0]]) pts_world[:,0] += LANE_W*1e2 # Let's see more of the horizontal image pts_world[:,1] += 200 # Let's see more down the road # H_metric := Homography mapping the image (pixel coords) to the # world plane defined by pts_worldm H_metric = cv2.getPerspectiveTransform(pts.astype('float32'), pts_worldm.astype('float32')) H = cv2.getPerspectiveTransform(pts.astype('float32'), pts_world.astype('float32')) ## Estimate where the camera is w.r.t. the world ref. frame R, T = estimate_extrinsic_parameters(H_metric) xdist = T[0] - (LANE_W / 2.0) print " Distance from center of lane: X={0:.2f} meters".format(xdist) LEFT_THRESH = -1.0 # Stay within 1.0 meters of the center of the lane RIGHT_THRESH = 1.0 if xdist >= RIGHT_THRESH: print " WARNING: Camera center is awfully close to the \ RIGHT side of the lane!" elif xdist <= LEFT_THRESH: print " WARNING: Camera center is awfully close to the \ LEFT side of the lane!" Irgb = cv2.imread(imgpath, cv2.CV_LOAD_IMAGE_COLOR) Iipm = cv2.warpPerspective(Irgb.astype('float64'), H, (1000, 700)) cv2.namedWindow("win2: Perspective-rectified image") cv2.imshow("win2: Perspective-rectified image", Iipm.astype('uint8')) Irgb = detect_lanes.draw_subwindow(Irgb, WIN_LEFT) Irgb = detect_lanes.draw_subwindow(Irgb, WIN_RIGHT) for _pt in pts: _pt = tuple([intrnd(x) for x in _pt]) cv2.circle(Irgb, _pt, 3, (0, 0, 255)) print " ({0}/{1}) Displaying detected lanes.".format(i+1, len(imgpaths_test)) show_lanes(Irgb, line1, line2) print "Done."
def estimate_planar_homography(I, line1, line2, K, win1, win2, lane_width): """ Estimates the planar homography H between the camera image plane, and the World (ground) plane. The World reference frame is directly below the camera, with: - Z-axis parallel to the road - X-axis on the road plane - Y-axis pointing upward from the road plane - origin is on the road plane (Y=0), and halfway between the two lanes boundaries (center of road). Input: nparray I nparray line1, line2: [a, b, c] nparray K The 3x3 camera intrinsic matrix. tuple win1, win2: (float x, float y, float w, float h) float lane_width Output: nparray H where H is a 3x3 homography. """ h, w = I.shape[0:2] x_win1 = intrnd(w * win1[0]) y_win1 = intrnd(h * win1[1]) x_win2 = intrnd(w * win2[0]) y_win2 = intrnd(h * win2[1]) pts = [] NUM = 10 h_win = intrnd(h * win1[3]) # Assume window heights same btwn left/right for i in xrange(NUM): frac = i / float(NUM) y_cur = intrnd((y_win1 - (h_win / 2) + frac * h_win)) pt_i = (compute_x(line1, y_cur), y_cur) pt_j = (compute_x(line2, y_cur), y_cur) pts.append((pt_i, pt_j)) r1 = solve_for_r1(pts, K, lane_width) vanishing_pt = np.cross(line1, line2) vanishing_pt = vanishing_pt / vanishing_pt[2] vanishing_pt[1] = vanishing_pt[1] ## DEBUG Plot points on image, save to file for visual verification Irgb = util.to_rgb(I) COLOURS = [(255, 0, 0), (0, 255, 0)] for i, (pt_i, pt_j) in enumerate(pts): clr = COLOURS[i % 2] cv2.circle(Irgb, tuple(map(intrnd, pt_i)), 5, clr) cv2.circle(Irgb, tuple(map(intrnd, pt_j)), 5, clr) cv2.circle(Irgb, (intrnd(vanishing_pt[0]), intrnd(vanishing_pt[1])), 5, (0, 0, 255)) cv2.imwrite("_Irgb.png", Irgb) r3 = solve_for_r3(vanishing_pt, line1, line2, K) T = solve_for_t(pts, K, r1, r3, lane_width) print "T_pre:", T T = T * (2.1798 / T[1]) # Height of camera is 2.1798 meters T[2] = 1 # We want the ref. frame to be directly below camera (why 1?!) #T = T / np.linalg.norm(T) print "T_post:", T H = np.zeros([3, 3]) H[:, 0] = r1 H[:, 1] = r3 H[:, 2] = T return np.dot(K, H)
def detect_lanes(I, win1=(0.4, 0.55, 0.2, 0.1), win2=(0.6, 0.55, 0.2, 0.1), threshold1=50, threshold2=100, apertureSize=3, show_edges=False): """ Given a street image I, detect the (parallel) road lanes in image coordinates. Input: nparray I: tuple win1, win2: (float x, float y, float width, float height) Location+sizes of window-left and window-right used to try searching for lanes. Dimensions/positions are gived in percentages of image size. float threshold1, threshold2 threshold1 is low-threshold for hysterisis procedure of Canny. threshold2 is high-threshold. int apertureSize One of (1,3,5,7). Size of the Sobel filter. Output: (line1, line2) Where line1 = (a1, b1,c1) such that: a1x + b1y + c1 = 0 Similarly, line2 = (a2, b2, c2). """ h, w = np.shape(I)[0:2] #edgemap = cv2.Canny(I, threshold1, threshold2, apertureSize=apertureSize) # Compute left window dimensions x_left = intrnd(win1[0] * w) y_left = intrnd(win1[1] * h) w_left = intrnd(win1[2] * w) h_left = intrnd(win1[3] * h) if w_left % 2 == 0: w_left += 1 if h_left % 2 == 0: h_left += 1 # Compute right window dimensions x_right = intrnd(win2[0] * w) y_right = intrnd(win2[1] * h) w_right = intrnd(win2[2] * w) h_right = intrnd(win2[3] * h) if w_right % 2 == 0: w_right += 1 if h_right % 2 == 0: h_right += 1 Iwin_left = I[(y_left - (h_left / 2)):(y_left + (h_left / 2)), (x_left - (w_left / 2)):(x_left + (w_left / 2))] Iwin_rght = I[(y_right - (h_right / 2)):(y_right + (h_right / 2)), (x_right - (w_right / 2)):(x_right + (w_right / 2))] edges_left = cv2.Canny(Iwin_left, threshold1, threshold2, apertureSize=apertureSize) edges_right = cv2.Canny(Iwin_rght, threshold1, threshold2, apertureSize=apertureSize) if show_edges: cv2.namedWindow('edgeleft') cv2.imshow('edgeleft', edges_left) cv2.namedWindow('edgeright') cv2.imshow('edgeright', edges_right) ''' edges_left = edgemap[(y_left-(h_left/2)):(y_left+(h_left/2)), (x_left-(w_left/2)):(x_left+(w_left/2))] edges_right = edgemap[(y_right-(h_right/2)):(y_right+(h_right/2)), (x_right-(w_right/2)):(x_right+(w_right/2))] ''' # Find dominant line in each window res1 = estimate_line(edges_left, MAX_ITERS=300, ALPHA=4, T=1.0) res2 = estimate_line(edges_right, MAX_ITERS=300, ALPHA=4, T=1.0) if res1 == None: line1, inliers1 = None, None else: line1, inliers1 = res1 if res2 == None: line2, inliers2 = None, None else: line2, inliers2 = res2 if line1 != None and line1[1] != 0: line1_norm = np.array([line1[0] / line1[1], 1, line1[2] / line1[1]]) else: line1_norm = line1 if line2 != None and line2[1] != 0: line2_norm = np.array([line2[0] / line2[1], 1, line2[2] / line2[1]]) else: line2_norm = line2 # Fix line to be in image coordinate system (not window coord sys) if line1_norm != None: a1, b1, c1 = line1_norm c1_out = -a1 * (x_left - (w_left / 2)) - b1 * ((y_left) - (h_left / 2)) + c1 line1_out = np.array([a1, b1, c1_out]) else: line1_out = None if line2_norm != None: a2, b2, c2 = line2_norm c2_out = -a2 * (x_right - (w_right / 2)) - b2 * ((y_right) - (h_right / 2)) + c2 line2_out = np.array([a2, b2, c2_out]) else: line2_out = None return line1_out, line2_out
def estimate_planar_homography(I, line1, line2, K, win1, win2, lane_width): """ Estimates the planar homography H between the camera image plane, and the World (ground) plane. The World reference frame is directly below the camera, with: - Z-axis parallel to the road - X-axis on the road plane - Y-axis pointing upward from the road plane - origin is on the road plane (Y=0), and halfway between the two lanes boundaries (center of road). Input: nparray I nparray line1, line2: [a, b, c] nparray K The 3x3 camera intrinsic matrix. tuple win1, win2: (float x, float y, float w, float h) float lane_width Output: nparray H where H is a 3x3 homography. """ h, w = I.shape[0:2] x_win1 = intrnd(w * win1[0]) y_win1 = intrnd(h * win1[1]) x_win2 = intrnd(w * win2[0]) y_win2 = intrnd(h * win2[1]) pts = [] NUM = 10 h_win = intrnd(h*win1[3]) # Assume window heights same btwn left/right for i in xrange(NUM): frac = i / float(NUM) y_cur = intrnd((y_win1-(h_win/2) + frac*h_win)) pt_i = (compute_x(line1, y_cur), y_cur) pt_j = (compute_x(line2, y_cur), y_cur) pts.append((pt_i, pt_j)) r1 = solve_for_r1(pts, K, lane_width) vanishing_pt = np.cross(line1, line2) vanishing_pt = vanishing_pt / vanishing_pt[2] vanishing_pt[1] = vanishing_pt[1] ## DEBUG Plot points on image, save to file for visual verification Irgb = util.to_rgb(I) COLOURS = [(255, 0, 0), (0, 255, 0)] for i, (pt_i, pt_j) in enumerate(pts): clr = COLOURS[i % 2] cv2.circle(Irgb, tuple(map(intrnd, pt_i)), 5, clr) cv2.circle(Irgb, tuple(map(intrnd, pt_j)), 5, clr) cv2.circle(Irgb, (intrnd(vanishing_pt[0]), intrnd(vanishing_pt[1])), 5, (0, 0, 255)) cv2.imwrite("_Irgb.png", Irgb) r3 = solve_for_r3(vanishing_pt, line1, line2, K) T = solve_for_t(pts, K, r1, r3, lane_width) print "T_pre:", T T = T * (2.1798 / T[1]) # Height of camera is 2.1798 meters T[2] = 1 # We want the ref. frame to be directly below camera (why 1?!) #T = T / np.linalg.norm(T) print "T_post:", T H = np.zeros([3,3]) H[:, 0] = r1 H[:, 1] = r3 H[:, 2] = T return np.dot(K, H)
def test_koopa_singleimage(SHOW_EPIPOLAR=False): """ Here, I want to do "undo" the perspective distortion in the image, based on the fact that I know: - The camera matrix K - The scene is planar - The location of four points in the image that create a rectangle in the world plane with known metric lengths Note: I don't have to necessarily know the metric lengths, but at least I need to know length ratios to construct a corrected image with length ratios preserved. In the literature, this is also known as: perspective rectification, perspective correction. """ imgpath = os.path.join(IMGSDIR_KOOPA_MED, 'DSCN0643.png') pts1 = np.array([[373.0, 97.0], # A (upper-left red corner) [650.0, 95.0], # B (upper-right red corner) [674.0, 215.0], # C (lower-right red corner) [503.0, 322.0], # D (lower-left green corner) [494.0, 322.0], # E (lower-right blue corner) [303.0, 321.0], # F (lower-left blue corner) [332.0, 225.0], # G (upper-left blue corner) [335.0, 215.0], # H (lower-left red corner) [475.0, 135.0], # K (tip of pen) [507.0, 225.0], # L (upper-left green corner) [362.0, 248.0], # M (tip of glue bottle) ]) worldpts = get_worldplane_coords() assert len(pts1) == len(worldpts) calib_imgpaths = util.get_imgpaths(IMGSDIR_CALIB_MED) print "(Calibrating camera...)" if True: print "(Using pre-computed camera matrix K!)" K = np.array([[ 158.23796519, 0.0, 482.07814366], [ 0., 28.53758493, 333.32239125], [ 0., 0., 1. ]]) else: K = calibrate_camera.calibrate_camera(calib_imgpaths, 9, 6, 0.023) print "Finished calibrating camera, K is:" print K Kinv = np.linalg.inv(K) pts1_norm = normalize_coords(pts1, K) HL = estimate_planar_homography(pts1_norm, worldpts) print "Estimated homography, H is:" print HL print "rank(H):", np.linalg.matrix_rank(HL) if np.linalg.matrix_rank(HL) != 3: print " Oh no! H is not full rank!" #### Normalize H := H / sigma_2(H_) U, S, V = np.linalg.svd(HL) H = HL / S[1] #### Enforce positive depth constraint flag_flipped = False for i, pt1 in enumerate(pts1_norm): pt2 = worldpts[i] val = np.dot(np.hstack((pt2, [1.0])), np.dot(H, np.hstack((pt1, [1.0])))) if val < 0: if flag_flipped: print "WOAH, flipping twice?!" pdb.set_trace() print "FLIP IT! val_{0} was: {1}".format(i, val) H = H * -1 flag_flipped = True # (Sanity check positive depth constraint) for i, pt1 in enumerate(pts1_norm): pt2 = worldpts[i] val = np.dot(np.hstack((pt2, [1.0])), np.dot(H, np.hstack((pt1, [1.0])))) if val < 0: print "WOAH, positive depth constraint violated!?" pdb.set_trace() #### Check projection error from I1 -> I2. errs = [] for i, pt1 in enumerate(pts1_norm): pt1_h = np.hstack((pt1, np.array([1.0]))) pt2_h = np.hstack((worldpts[i], np.array([1.0]))) pt2_pred = np.dot(H, pt1_h) pt2_pred = pt2_pred / pt2_pred[2] errs.append(np.linalg.norm(pt2_h - pt2_pred)) print "Projection error: {0} (Mean: {1} std: {2})".format(sum(errs), np.mean(errs), np.std(errs)) #### Check if planar epipolar constraint is satisfied: #### x2_hat * H * x1 = 0.0 errs = [] for i, pt1 in enumerate(pts1_norm): pt1_h = np.hstack((pt1, [1.0])) pt2_hat = util_camera.make_crossprod_mat(worldpts[i]) errs.append(np.linalg.norm(np.dot(pt2_hat, np.dot(H, pt1_h)))) print "Epipolar constraint error: {0} (Mean: {1} std: {2})".format(sum(errs), np.mean(errs), np.std(errs)) #### Sanity-check that world points project to pixel points Hinv = np.linalg.inv(H) errs = [] for i, worldpt in enumerate(worldpts): pt_img = np.hstack((pts1[i], [1.0])) p = np.dot(Hinv, np.hstack((worldpt, [1.0]))) p /= p[2] p = np.dot(K, p) errs.append(np.linalg.norm(pt_img - p)) print "world2img errors (in pixels): {0} (mean={1} std={2})".format(sum(errs), np.mean(errs), np.std(errs)) #### Perform Inverse Perspective Mapping (undo perspective effects) ## Pixel coordinates of a region of the image known to: ## a.) Lie on the plane ## b.) Has known metric lengths Hinv = np.linalg.inv(H) # Define hardcoded pixel/world coordinates on the poster plane # TODO: We could try generalizing this to new images by: # a.) Asking the user to choose four image points, and enter # in metric length information (simplest) # b.) Automatically detect four corner points that correspond # to a world rectangle (somehow). If there are no # reference lengths, then I believe the best thing we can # do is do a perspective-correction up to an affine # transformation (since we don't know the correct X/Y # lengths). pts_pix = np.array([[372.0, 97.0], # Upperleft of redbox (x,y) [651.0, 95.0], # Upperright of redbox (x,y) [303.0, 321.0], # Lowerleft of bluebox (x,y) [695.0, 324.0]], # Lowerright of greenbox (x,y) ) pts_world = np.zeros([0, 2]) # Populate pts_world via H (maps image plane -> world plane) for pt_pix in pts_pix: pt_world = homo2pt(np.dot(H, np.dot(Kinv, pt2homo(pt_pix)))) pts_world = np.vstack((pts_world, pt_world)) pts_world *= 1000.0 # Let's get a reasonable-sized image please! # These are hardcoded world coords, but, we can auto-gen them via # H, Kinv as above. Isn't geometry nice? #pts_world = np.array([[0.0, 0.0], # [175.0, 0.0], # [0.0, 134.0], # [175.0, 134.0]]) IPM0, IPM1 = compute_IPM(pts_pix, pts_world) print 'IPM0 is:' print IPM0 print 'IPM1 is:' print IPM1 I = cv2.imread(imgpath, cv.CV_LOAD_IMAGE_COLOR).astype('float64') I = (2.0*I) + 40 # Up that contrast! Orig. images are super dark. h, w = I.shape[0:2] # Determine bounding box of IPM-corrected image a = homo2pt(np.dot(IPM1, pt2homo([0.0, 0.0]))) # upper-left corner b = homo2pt(np.dot(IPM1, pt2homo([w-1, 0.0]))) # upper-right corner c = homo2pt(np.dot(IPM1, pt2homo([0.0, h-1]))) # lower-left corner d = homo2pt(np.dot(IPM1, pt2homo([w-1, h-1]))) # lower-right corner w_out = intrnd(max(b[0] - a[0], d[0] - c[0])) h_out = intrnd(max(c[1] - a[1], d[1] - b[1])) print "New image dimensions: ({0} x {1}) (Orig: {2} x {3})".format(w_out, h_out, w, h) # Warp the entire image I with the homography IPM1 Iwarp = cv2.warpPerspective(I, IPM1, (w_out, h_out)) # Draw selected points on I for pt in pts_pix: cv2.circle(I, tupint(pt), 5, (0, 255, 0)) # Draw rectified-rectangle in Iwarp a = homo2pt(np.dot(IPM1, pt2homo(pts_pix[0]))) b = homo2pt(np.dot(IPM1, pt2homo(pts_pix[3]))) cv2.rectangle(Iwarp, tupint(a), tupint(b), (0, 255, 0)) #Iwarp = transform_image.transform_image_forward(I, IPM) print "(Displaying before/after images, press <any key> to exit.)" cv2.namedWindow('original') cv2.imshow('original', I.astype('uint8')) cv2.namedWindow('corrected') cv2.imshow('corrected', Iwarp.astype('uint8')) cv2.waitKey(0)
def detect_lanes(I, win1=(0.4, 0.55, 0.2, 0.1), win2=(0.6, 0.55, 0.2, 0.1), threshold1=50, threshold2=100, apertureSize=3, show_edges=False): """ Given a street image I, detect the (parallel) road lanes in image coordinates. Input: nparray I: tuple win1, win2: (float x, float y, float width, float height) Location+sizes of window-left and window-right used to try searching for lanes. Dimensions/positions are gived in percentages of image size. float threshold1, threshold2 threshold1 is low-threshold for hysterisis procedure of Canny. threshold2 is high-threshold. int apertureSize One of (1,3,5,7). Size of the Sobel filter. Output: (line1, line2) Where line1 = (a1, b1,c1) such that: a1x + b1y + c1 = 0 Similarly, line2 = (a2, b2, c2). """ h, w = np.shape(I)[0:2] #edgemap = cv2.Canny(I, threshold1, threshold2, apertureSize=apertureSize) # Compute left window dimensions x_left = intrnd(win1[0]*w) y_left = intrnd(win1[1]*h) w_left = intrnd(win1[2]*w) h_left = intrnd(win1[3]*h) if w_left % 2 == 0: w_left += 1 if h_left % 2 == 0: h_left += 1 # Compute right window dimensions x_right = intrnd(win2[0]*w) y_right = intrnd(win2[1]*h) w_right = intrnd(win2[2]*w) h_right = intrnd(win2[3]*h) if w_right % 2 == 0: w_right += 1 if h_right % 2 == 0: h_right += 1 Iwin_left = I[(y_left-(h_left/2)):(y_left+(h_left/2)), (x_left-(w_left/2)):(x_left+(w_left/2))] Iwin_rght = I[(y_right-(h_right/2)):(y_right+(h_right/2)), (x_right-(w_right/2)):(x_right+(w_right/2))] edges_left = cv2.Canny(Iwin_left, threshold1, threshold2, apertureSize=apertureSize) edges_right = cv2.Canny(Iwin_rght, threshold1, threshold2, apertureSize=apertureSize) if show_edges: cv2.namedWindow('edgeleft') cv2.imshow('edgeleft', edges_left) cv2.namedWindow('edgeright') cv2.imshow('edgeright', edges_right) ''' edges_left = edgemap[(y_left-(h_left/2)):(y_left+(h_left/2)), (x_left-(w_left/2)):(x_left+(w_left/2))] edges_right = edgemap[(y_right-(h_right/2)):(y_right+(h_right/2)), (x_right-(w_right/2)):(x_right+(w_right/2))] ''' # Find dominant line in each window res1 = estimate_line(edges_left, MAX_ITERS=300, ALPHA=4, T=1.0) res2 = estimate_line(edges_right, MAX_ITERS=300, ALPHA=4, T=1.0) if res1 == None: line1, inliers1 = None, None else: line1, inliers1 = res1 if res2 == None: line2, inliers2 = None, None else: line2, inliers2 = res2 if line1 != None and line1[1] != 0: line1_norm = np.array([line1[0] / line1[1], 1, line1[2] / line1[1]]) else: line1_norm = line1 if line2 != None and line2[1] != 0: line2_norm = np.array([line2[0] / line2[1], 1, line2[2] / line2[1]]) else: line2_norm = line2 # Fix line to be in image coordinate system (not window coord sys) if line1_norm != None: a1, b1, c1 = line1_norm c1_out = -a1*(x_left-(w_left/2)) - b1*((y_left)-(h_left/2)) + c1 line1_out = np.array([a1, b1, c1_out]) else: line1_out = None if line2_norm != None: a2, b2, c2 = line2_norm c2_out = -a2*(x_right-(w_right/2)) - b2*((y_right)-(h_right/2)) + c2 line2_out = np.array([a2, b2, c2_out]) else: line2_out = None return line1_out, line2_out