def main(): args = parse_args() image_path = os.path.join(args.path, 'image_02/', args.seq) oxts_path = os.path.join(args.path, 'oxts/', args.seq + '.txt') itms = [ os.path.join(image_path, it) for it in os.listdir(image_path) if it.endswith('.png') ] annotation = get_anno_tracking(oxts_path) img_shape = cv2.imread(itms[0], 0).shape cam = PinholeCamera(img_shape[1], img_shape[0], 718.8560, 718.8560, 607.1928, 185.2157) vo = VisualOdometry(cam, annotation, args.use_abs_scale, args.skip_frame) wb = Whiteboard() for img_id in range(0, len(itms), args.skip_frame): img = cv2.imread( args.path + 'image_02/' + args.seq + '/' + str(img_id).zfill(6) + '.png', 0) vo.update(img, img_id) if (img_id > 0): cur_t = vo.cur_t.squeeze() x, y, z = cur_t[0], cur_t[2], -cur_t[1] else: x, y, z = 0., 0., 0. t_loc = np.array([vo.trueX, vo.trueY, vo.trueZ]) t_color = (0, 0, 255) p_loc = np.array([x, y, z]) p_color = (img_id * 255.0 / len(itms), 255 - img_id * 255.0 / len(itms), 0) wb.draw(img_id, 'True', t_loc, t_color) wb.draw(img_id, 'Pred', p_loc, p_color) wb.show(img) if args.save_name: wb.save(args.save_name)
def getVO(data_path, rgb_txt): cam = PinholeCamera(640.0, 480.0, 517.3, 516.5, 318.6, 255.3, 0.2624, -0.9531, -0.0054, 0.0026, 1.1633) imgList = getImageLists() vo = VisualOdometry(cam, imgList) # xyz= np.array([[0],[0],[0]]) img = cv2.imread('./Data/' + imgList[0], 0) f = open("teat2.txt", "w") for img_id in range(len(imgList)): # img = cv2.imread('./Data/'+imgList[img_id], 0) vo.update(img, img_id) if img_id > 1: cur_t = vo.cur_t cur_r = vo.cur_R # xyz=cur_r.dot(xyz)+cur_t name = imgList[img_id].split('/')[1].split('.')[0] pose = transRo2Qu(cur_t, cur_r) f.write(name + ' ' + pose + '\n') print(img_id)
from visual_odometry import FisheyeCamera, VisualOdometry cam = FisheyeCamera(640.0, 480.0, 320, 240, -179.47183, 0, 0.002316744, -3.6359684 * 10**(-6), 2.0546507 * 10**(-8), 256) vo = VisualOdometry( cam, '/home/dongk/다운로드/rpg_urban_fisheye_info/info/groundtruth.txt' ) # CHANGE THIS DIRECTORY PATH traj = np.zeros((600, 600, 3), dtype=np.uint8) for img_id in range(2500): img = cv2.imread('/home/dongk/다운로드/rpg_urban_fisheye_data/data/img/' + 'img' + str(img_id + 390 + 1).zfill(4) + '_0.png', 0) # CHANGE THIS DIRECTORY PATH vo.update(img, img_id) cur_t = vo.cur_t if (img_id > 2): x, y, z = cur_t[0, 0], cur_t[1, 0], cur_t[2, 0] else: x, y, z = 0., 0., 0. draw_x, draw_y = int(x) + 290, int(z) + 90 true_x, true_y = int(-vo.trueX) + 290, int(vo.trueY) + 90 cv2.circle(traj, (draw_x, draw_y), 1, (255.0, 0), 2) cv2.circle(traj, (true_x, true_y), 1, (0, 0, 255), 1) cv2.rectangle(traj, (10, 20), (600, 60), (0, 0, 0), -1) text = "x=%2fm y=%2fm z=%2fm" % (x, y, z) + "x=%2fm z=%2fm" % (-vo.trueX, vo.trueY) cv2.putText(traj, text, (20, 40), cv2.FONT_HERSHEY_PLAIN, 1,
i = 0 #cv2.namedWindow("x") cv2.namedWindow("map") ret, rgb_img = cap.read() print(rgb_img.shape) while True: i = i + 1 ret, rgb_img = cap.read() img = cv2.cvtColor(cv2.resize(rgb_img, (320, 180)), cv2.COLOR_BGR2GRAY) vo.update(img) if (i > 2): x, y, z = vo.cur_t[0], vo.cur_t[1], vo.cur_t[2] else: x, y, z = 0., 0., 0. print("Coordinates: x=%2fm y=%2fm z=%2fm" % (x, y, z)) # cv2.imwrite('map.png', traj) draw_x, draw_y = (int(x) * 5) + 300, (int(y) * 5) + 300 cv2.circle(traj, (draw_x, draw_y), 2, (0, 255, 0), 3) #cv2.imshow('x', rgb_img)
def visual_odometry(): # ---------------- Initial definitions ----------------------- drone_states = DroneStates() ic = image_converter() #pose = Reference() rospy.Subscriber('sensor_depth/depth', Float32, drone_states.set_depth) rospy.Subscriber('sensor_imu_1/data', ImuData, drone_states.imu_data) rospy.Subscriber('camera/image_raw', Image, ic.callback) rospy.Subscriber('observer/state_estimate', StateEstimate, drone_states.set_estimate) rospy.init_node('visual_odometry', anonymous=True) #pub = rospy.Publisher('reference_generator/reference', Reference, queue_size=1) # Camera matrix is for image size 1920 x 1080 mtx = np.array([[1.35445761E+03, 0.00000000E+00, 8.91069717E+02], [0.00000000E+00, 1.37997405E+03, 7.56192877E+02], [0.00000000E+00, 0.00000000E+00, 1.00000000E+00]]) dist = np.array( [-0.2708139, 0.20052465, 2.08302E-02, 0.0002806, -0.10134601]) cam = PinholeCamera(960, 540, mtx[0, 0] / 2, mtx[1, 1] / 2, mtx[0, 2] / 2, mtx[1, 2] / 2) # -------------------------------------------------------------- vo = VisualOdometry(cam) kf = VOKalmanFilter() traj = np.zeros((600, 650, 3), np.uint8) init = True row = [ 'p_x', 'p_y', 'p_z', 'time', 'x_hat', 'y_hat', 'z_hat', 'a_x', 'a_y', 'a_z' ] f = open('VOestimates.csv', 'w+') writer = csv.writer(f) writer.writerow(row) x_hat = np.zeros((12, 1)) drone_t = np.zeros((3, 1)) reference = Reference() # controller setup: surge_controller = controller(0.1, 0., 0.) sway_controller = controller(0.08, 0., 0.) yaw_controller = controller(0.009, 0., 0.005) depth_controller = controller(3.5, 0., 0.0) depth_controller.init_ref = 0.5 rate = rospy.Rate(7.5) while not rospy.is_shutdown(): # while i < len(depth_data.depth): t = time.time() #ret, frame = cap.read() #if not ret: # continue frame = ic.cv_image #print(frame) frame = cv2.undistort(frame, mtx, dist) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # run visual odometry vo.update(frame) if init: x, y, z, = 0., 0., 0. dt = time.time() - t init = False else: dt = time.time() - t cur_t = drone_states.R_cd.dot(vo.cur_t) x, y, z = cur_t[0, 0], cur_t[1, 0], cur_t[2, 0] roll, pitch, yaw = rotationMatrixToEulerAngles(vo.cur_R) keypoints = [] for m in vo.good: keypoints.append(vo.kp_cur[m.trainIdx]) frame = cv2.drawKeypoints(frame, keypoints, np.array([]), color=(0, 255, 0), flags=0) # Kalman filtering of the estimates #if isinstance(vo.scale, np.float64): # dt = time.time() - t # drone_t = drone_states.R_cd.dot(vo.t) # u = np.array([[x], [y], [drone_states.z], [drone_states.p], [drone_states.q], [drone_states.r]]) # kf.update(u, dt) # x_hat = kf.x_hat * dt # write estimates to file for plotting row = [x, y, z, roll, pitch, yaw] writer.writerow(row) draw_x, draw_y = int(y) * (1) + 290, int(x) * (-1) + 290 cv2.circle(traj, (draw_x, draw_y), 1, (0, 255, 0), 1) cv2.rectangle(traj, (10, 20), (650, 60), (0, 0, 0), -1) text = "Coordinates: x=%2fm y=%2fm z=%2fm fps: %f" % ( x_filtered, y_filtered, z_filtered, 1 / dt) cv2.putText(traj, text, (20, 40), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, 8) cv2.imshow('Trajectory', traj) cv2.imshow('Video', frame) ch = cv2.waitKey(1) if ch & 0xFF == ord('q'): f.close() break # -------------CONTROLLERS------------------- if vo.scale == 1.0: reference.heave = depth_controller.pid(drone_states.z, dt) depth_scale(depth_controller, drone_states.z, z, vo) reference.surge = 0. reference.yaw = -0.05 * reference.heave reference.sway = -0.3 * reference.heave if isinstance(vo.scale, np.float64): reference.surge = surge_controller.pid(x_filtered, dt) reference.sway = sway_controller.pid(y_filtered, dt) if -0.1 < reference.sway < 0: reference.sway = -0.1 reference.yaw = yaw_controller.pid(drone_states.psi, dt) reference.heave = depth_controller.pid(drone_states.z, dt) reference.depth = 0. reference.depth_rate = 0. reference.heading = 0. reference.heading_rate = 0. rate.sleep() reference.surge = 0. reference.sway = 0. reference.yaw = 0. reference.depth = 0. reference.depth_rate = 0. reference.heading = 0. reference.heading_rate = 0. pub.publish(reference)
print("window_width, window_height = ", window_width, window_height) traj = np.zeros((window_height, window_width, 3), dtype=np.uint8) errors = np.empty((1, 0), dtype=np.float32) num_imgs = len([ img_name for img_name in os.listdir(img_dir) if os.path.isfile(os.path.join(img_dir, img_name)) ]) for img_id in range(num_imgs): img_path = img_dir + str(img_id).zfill(6) + ".png" img = cv2.imread(img_path, 0) vo_learning.update(img, img_id, img_path) vo_fast.update(img, img_id, img_path) cur_t = vo_learning.cur_t if (img_id > 2): x, y, z = cur_t[0], cur_t[1], cur_t[2] else: x, y, z = 0., 0., 0. draw_x, draw_y = int(x), int(z) cur_t = vo_fast.cur_t if (img_id > 2): x, y, z = cur_t[0], cur_t[1], cur_t[2] else: x, y, z = 0., 0., 0.
yaw = 0 while not rospy.is_shutdown(): t = time.time() ret, frame = cap.read() if not ret: continue #frame = ic.cv_image frame = cv2.undistort(frame, mtx, dist) #frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # run visual odometry vo.update(frame) if init: x, y, z, = 0., 0., 0. dt = time.time() - t init = False else: vo.scale = 0.088152 #Median from scale tests dt = time.time() - t cur_t = drone_states.R_cd.dot(vo.cur_t) roll, pitch, yaw = rotationMatrixToEulerAngles(vo.cur_R) x, y, z = cur_t[0, 0], cur_t[1, 0], cur_t[2, 0] keypoints = []
return mat, dist #traj = np.zeros((600,600,3), dtype=np.uint8) mat, dist = get_cameramat_dist("cam_calib.pkl") img1 = cv2.imread('../../res/cap0.png', 0) img2 = cv2.imread('../../res/cap1.png', 0) print(img1.shape) cam = PinholeCamera(img1.shape[1], img1.shape[0], fx=mat[0, 0], fy=mat[1, 1], cx=mat[0, 2], cy=mat[1, 2], k1=dist[0, 0], k2=dist[0, 1], p1=dist[0, 2], p2=dist[0, 3], k3=dist[0, 4]) vo = VisualOdometry(cam) vo.update(img1, vo.frame_stage) vo.update(img2, vo.frame_stage) print("R matrix") print(vo.cur_R) print("T vector") print(vo.cur_t)
def main(): parser = argparse.ArgumentParser( prog='test_video.py', description='visual odometry script in python') parser.add_argument('--type', type=str, default='KITTI', choices=['KITTI', 'image', 'video'], metavar='<type of input>') parser.add_argument('--source', type=str, metavar='<path to source>') parser.add_argument('--camera_width', type=float, metavar='resolution in x direction') parser.add_argument('--camera_height', type=float, metavar='resolution in y direction') parser.add_argument('--focal', type=float, metavar='focal length in pixels') parser.add_argument('--pp_x', type=float, metavar='x coordinate of pricipal point') parser.add_argument('--pp_y', type=float, metavar='y coordinate of pricipal point') parser.add_argument('--skip', type=int, default=1) parser.add_argument('--verbose', type=bool, default=True, metavar='information of computing E') parser.add_argument('--show', type=bool, default=False, metavar='show the odometry result in real-time') args = parser.parse_args() print(args) if args.type == 'KITTI': cam = PinholeCamera(1241.0, 376.0, 718.8560, 718.8560, 607.1928, 185.2157) vo = VisualOdometry( cam, 'KITTI', '/home/r05525060/sharedfolder/indoorirPano/Dataset/dataset/poses/00.txt' ) for img_id in xrange(4541): img = cv2.imread( '/home/r05525060/sharedfolder/indoorirPano/Dataset/dataset/sequences/00/image_0/{:06d}.png' .format(img_id), 0) vo.update(img, img_id) cur_t = vo.cur_t if (img_id > 2): x, y, z = cur_t[0], cur_t[1], cur_t[2] else: x, y, z = 0., 0., 0. draw_x, draw_y = int(x) + 290, int(z) + 90 true_x, true_y = int(vo.trueX) + 290, int(vo.trueZ) + 90 cv2.circle(traj, (draw_x, draw_y), 1, (img_id * 255 / 4540, 255 - img_id * 255 / 4540, 0), 1) #TODO: 4540 to be changed cv2.circle(traj, (true_x, true_y), 1, (0, 0, 255), 2) cv2.rectangle(traj, (10, 20), (600, 60), (0, 0, 0), -1) text = "Coordinates: x=%2fm y=%2fm z=%2fm" % (x, y, z) cv2.putText(traj, text, (20, 40), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, 8) cv2.imshow('Road facing camera', img) cv2.imshow('Trajectory', traj) cv2.waitKey(1) if args.type == 'video': cap = cv2.VideoCapture(args.source) cam = PinholeCamera(args.camera_width, args.camera_height, args.focal, args.focal, args.pp_x, args.pp_y) vo = VisualOdometry(cam, type='video', annotations=None) frame_idx = 0 frame_count = 0 while (True): ret, frame = cap.read() frame_count += 1 if not ret: print 'Video finished!' break if not frame_count % args.skip == 0: continue print 'processing... frame_count:', frame_count print 'processing... frame_index:', frame_idx gray_img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # print("shape of gray img:", gray_img.shape) vo.update(gray_img, frame_idx) cur_t = vo.cur_t if (frame_idx > 2): x, y, z = cur_t[0], cur_t[1], cur_t[2] else: x, y, z = 0., 0., 0. draw_x, draw_y = int(x) + 590, int(z) + 290 true_x, true_y = int(vo.trueX) + 590, int(vo.trueZ) + 290 # cv2.circle(traj, (draw_x,draw_y), 1, (frame_idx*255/4540,255-frame_idx*255/4540,0), 1) cv2.circle(traj, (draw_x, draw_y), 1, (0, 255, 0), 2) cv2.circle(traj, (true_x, true_y), 1, (0, 0, 255), 2) cv2.rectangle(traj, (10, 20), (1200, 60), (0, 0, 0), -1) # black backgroud for text text = "Coordinates: x=%2fm y=%2fm z=%2fm" % (x, y, z) cv2.putText(traj, text, (20, 40), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1, 8) if args.show: cv2.imshow('Road facing camera', gray_img) cv2.imshow('Trajectory', traj) cv2.waitKey(1) frame_idx += 1 cv2.putText(traj, "fps: {}.".format(30 / args.skip), (20, 100), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 2, 8) cv2.putText(traj, "focal length: {} pixels".format(args.focal), (20, 140), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 2, 8) if args.type == "KITTI": cv2.imwrite('KITTI_map.jpg', traj) elif args.type == "video": cv2.imwrite( 'output_skip{}/{}_map_f{}_pp{}-{}.jpg'.format( args.skip, os.path.basename(args.source), args.focal, args.pp_x, args.pp_y), traj)
prev_draw_x, prev_draw_y = 300, 300 then = cv2.getTickCount() while True: sensor.update() if cam.is_opened(): frame = cam.get_frame() if vo.optical_flow is None: vo.optical_flow = np.zeros_like(frame) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) now = cv2.getTickCount() delta = ((now - then) / cv2.getTickFrequency()) then = now vo.update(gray, delta) cur_t = vo.cur_t if vo.frame_stage > 1: x, y, z = cur_t[0], cur_t[1], cur_t[2] else: x, y, z = 0., 0., 0. draw_scale = 10 draw_x, draw_y = int(x * draw_scale) + 300, int(z * draw_scale) + 300 # cv2.circle(traj, (draw_x, draw_y), 1, (255, 0, 0), -1) cv2.line(traj, (draw_x, draw_y), (prev_draw_x, prev_draw_y), (255, 0, 0), 1) prev_draw_x = draw_x prev_draw_y = draw_y cv2.rectangle(traj, (10, 20), (600, 60), (0, 0, 0), -1)
if frame_id == skip_to: first_frame = smoothed elif frame_id == skip_to + 1: second_frame = smoothed else: roll, pitch, heading = gyro_to_angles(cur_imu_data[1], cur_imu_data[2], cur_imu_data[3], cur_imu_data[4]) angles = (roll, pitch, heading) if not vo: try: vo = VisualOdometry(first_frame, second_frame, initial_R, cam) except: initial_R = angles_to_R(roll, pitch, heading) vo = VisualOdometry(first_frame, second_frame, initial_R, cam) vo.update(smoothed, angles, (gps_data[frame_id - 1], cur_gps_data)) tt = vo.total_t tt2 = vo.noskips_total_t gps_assist_tt = vo.gps_total_t x = tt[2] + X_START y = Y_START - tt[0] x2 = tt2[2] + X_START y2 = Y_START - tt2[0] gps_assist_x = gps_assist_tt[2] + X_START gps_assist_y = Y_START - gps_assist_tt[0] gps_x, gps_y = convert_gps_to_coords(cur_gps_data[2], cur_gps_data[1])
'r') as f: for line in f: img_seq.append(list(line.strip('\n').split(','))) PATH = "/home/ubuntu/users/tongpinmo/dataset/KITTI_odometry_dataset/dataset/sequences/05/image_2/" for img_id in range(len(img_seq) - 2): # start = time.clock() print 'NO.', img_id + 1, 'frame.' img_seq_str = ''.join(img_seq[img_id]) img_dir = os.path.join(PATH + img_seq_str) img_id = img_id + 1 img = imread(img_dir, 0) / 255. cv2.imshow('Road facing camera', img) img = np.array([img]).astype(np.float32) vo.update(img, img_id, sess, pred_flow, image_ref_tensor, image_cur_tensor) # end = time.clock() # print("time:", str(end - start)) cur_t = vo.cur_t cur_R = vo.cur_R pose = vo.annotations[img_id].strip().split() true_R = np.array(map(float, pose)).reshape(3, 4) true_R = true_R[:3, :3] # print "true_R:", true_R cur_R = np.array(cur_R, dtype=np.float32) # print "cur_R:", cur_R if (np.linalg.norm(true_R - cur_R) > 0.0087266): rmse = np.linalg.norm(true_R - cur_R) print "Rotation_error:", rmse
dum = dumbbell.Dumbbell(m1=500, m2=500, l=0.003) # initialize an estimated trajectory vector est_time = np.zeros(max_images) est_pos = np.zeros((max_images, 3)) est_R = np.zeros((max_images, 9)) # do some math based on how many images there are and skipping frames # loop over the images for ii in range(max_images): time_index = (ii + 1) * step_size img = images[:, :, :, ii] # convert image to gray scale img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) vo.update(img, time_index) # save the estimates est_time[ii] = time[time_index] est_pos[ii, :] = vo.cur_t est_R[ii, :] = vo.cur_R.reshape(-1) plotting.plot_controlled_blender_inertial(time, i_state, ast, dum, fwidth=1) # plot the estimated position vector fig, axarr = plt.subplots(3) axarr[0].plot(est_time, est_pos[:, 0])
import numpy as np import cv2 from visual_odometry import PinholeCamera, VisualOdometry cam = PinholeCamera(1241.0, 376.0, 718.8560, 718.8560, 607.1928, 185.2157) vo = VisualOdometry(cam, '/home/xxx/datasets/KITTI_odometry_poses/00.txt') traj = np.zeros((600,600,3), dtype=np.uint8) for img_id in xrange(4541): img = cv2.imread('/home/xxx/datasets/KITTI_odometry_gray/00/image_0/'+str(img_id).zfill(6)+'.png', 0) vo.update(img, img_id) cur_t = vo.cur_t if(img_id > 2): x, y, z = cur_t[0], cur_t[1], cur_t[2] else: x, y, z = 0., 0., 0. draw_x, draw_y = int(x)+290, int(z)+90 true_x, true_y = int(vo.trueX)+290, int(vo.trueZ)+90 cv2.circle(traj, (draw_x,draw_y), 1, (img_id*255/4540,255-img_id*255/4540,0), 1) cv2.circle(traj, (true_x,true_y), 1, (0,0,255), 2) cv2.rectangle(traj, (10, 20), (600, 60), (0,0,0), -1) text = "Coordinates: x=%2fm y=%2fm z=%2fm"%(x,y,z) cv2.putText(traj, text, (20,40), cv2.FONT_HERSHEY_PLAIN, 1, (255,255,255), 1, 8) cv2.imshow('Road facing camera', img)