def test_it_should_set_rotation_from_euler(self): transform = Transform() transform.set_rotation(1.0, 0.707, 3.1) truth = np.array([[-0.7597, -0.0316, 0.6496, 0], [-0.5236, -0.5626, -0.6398, 0], [0.3856, -0.8262, 0.4108, 0], [0, 0, 0, 1]]) np.testing.assert_almost_equal(transform.matrix, truth, 4)
def test_it_should_set_translation(self): transform = Transform() transform.set_translation(10, 1, 2.2) truth = np.array([[1, 0, 0, 10], [0, 1, 0, 1], [0, 0, 1, 2.2], [0, 0, 0, 1]]) np.testing.assert_almost_equal(transform.matrix, truth)
def test_it_should_rotate_from_other_transform(self): transform = Transform.from_parameters(0, 0, 0, 1, 0.707, 3.1) rotation = Transform.from_parameters(0, 0, 0, -1, -0.707, -3.1) transform.rotate(transform=rotation) truth = np.array([[0.39, 0.5479, 0.74, 0.], [0.9196, -0.2729, -0.2826, 0.], [0.0471, 0.7908, -0.6103, 0.], [0., 0., 0., 1.]]) np.testing.assert_almost_equal(transform.matrix, truth, 4)
def test_it_should_translate_from_other_transform(self): transform = Transform.from_parameters(10, 1, 2.2, 1, 0.707, 3.1) translation = Transform.from_parameters(10, -1, 2.2, 0, 0, 0) transform.translate(transform=translation) truth = np.array([[-0.7597, -0.0316, 0.6496, 20], [-0.5236, -0.5626, -0.6398, 0], [0.3856, -0.8262, 0.4108, 4.4], [0, 0, 0, 1]]) np.testing.assert_almost_equal(transform.matrix, truth, 4)
def transform_pointcloud(points, pose): transform = pose.inverse() scale = Transform.scale(1, -1, -1) transform.combine(scale) points = transform.rotation.dot(points) points = transform.translation.dot(points) return points
def test_it_should_convert_to_parameters(self): matrix = np.array([[-0.7597, -0.0316, 0.6496, 10], [-0.5236, -0.5626, -0.6398, 1], [0.3856, -0.8262, 0.4108, 2.2], [0, 0, 0, 1]]) transform = Transform.from_matrix(matrix) np.testing.assert_almost_equal(transform.to_parameters(), np.array([10, 1, 2.2, 1, 0.707, 3.1]), 4)
def test_it_should_init_from_parameters(self): transform = Transform.from_parameters(10, 1, 2.2, 1, 0.707, 3.1) truth = np.array([[-0.7597, -0.0316, 0.6496, 10], [-0.5236, -0.5626, -0.6398, 1], [0.3856, -0.8262, 0.4108, 2.2], [0, 0, 0, 1]]) np.testing.assert_almost_equal(transform.matrix, truth, 4)
def estimate_current_pose(self, previous_pose, current_rgb, current_depth, debug=False): render_rgb, render_depth = self.renderer.render(previous_pose.transpose()) # todo implement this part on gpu... rgbA, depthA = normalize_scale(render_rgb, render_depth, previous_pose.inverse(), self.camera, self.image_size, self.object_width) rgbB, depthB = normalize_scale(current_rgb, current_depth, previous_pose.inverse(), self.camera, self.image_size, self.object_width) depthA = normalize_depth(depthA, previous_pose) depthB = normalize_depth(depthB, previous_pose) if debug: show_frames(rgbA, depthA, rgbB, depthB) rgbA, depthA = normalize_channels(rgbA, depthA, self.mean[:4], self.std[:4]) rgbB, depthB = normalize_channels(rgbB, depthB, self.mean[4:], self.std[4:]) self.input_buffer[0, 0:3, :, :] = rgbA self.input_buffer[0, 3, :, :] = depthA self.input_buffer[0, 4:7, :, :] = rgbB self.input_buffer[0, 7, :, :] = depthB self.prior_buffer[0] = np.array(previous_pose.to_parameters(isQuaternion=True)) prediction = self.tracker_model.test([self.input_buffer, self.prior_buffer]).asNumpyTensor() prediction = unnormalize_label(prediction, self.translation_range, self.rotation_range) if debug: print("Prediction : {}".format(prediction)) prediction = Transform.from_parameters(*prediction[0], is_degree=True) current_pose = combine_view_transform(previous_pose, prediction) self.debug_rgb = render_rgb return current_pose
def detect(self, img): if len(img.shape) > 2: raise Exception("ChessboardDetector uses gray image as input") detection = None ret, corners = cv2.findChessboardCorners(img, self.chess_shape, None) if ret: ret, rvec, tvec = cv2.solvePnP(self.obj_points, corners, self.camera.matrix(), np.array([0, 0, 0, 0, 0])) # invert axis convention rvec[1] = -rvec[1] rvec[2] = -rvec[2] tvec[1] = -tvec[1] tvec[2] = -tvec[2] detection = Transform() detection.matrix[0:3, 0:3] = cv2.Rodrigues(rvec)[0] detection.set_translation(tvec[0] / 1000, tvec[1] / 1000, tvec[2] / 1000) return detection
def random_z_rotation(rgb, depth, pose, camera): rotation = random.uniform(-180, 180) rotation_matrix = Transform() rotation_matrix.set_rotation(0, 0, math.radians(rotation)) pixel = center_pixel(pose, camera) new_rgb = rotate_image(rgb, rotation, pixel[0]) new_depth = rotate_image(depth, rotation, pixel[0]) # treshold below 50 means we remove some interpolation noise, which cover small holes mask = (new_depth >= 50).astype(np.uint8)[:, :, np.newaxis] rgb_mask = np.all(new_rgb != 0, axis=2).astype(np.uint8) kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], np.uint8) # erode rest of interpolation noise which will affect negatively future blendings eroded_mask = cv2.erode(mask, kernel, iterations=2) eroded_rgb_mask = cv2.erode(rgb_mask, kernel, iterations=2) new_depth = new_depth * eroded_mask new_rgb = new_rgb * eroded_rgb_mask[:, :, np.newaxis] new_pose = combine_view_transform(pose, rotation_matrix) return new_rgb, new_depth, new_pose
def __next__(self): rgb, depth = self.sensor.get_frame() rgb = cv2.resize(rgb, (self.sensor.camera.width, self.sensor.camera.height)) depth = cv2.resize( depth, (self.sensor.camera.width, self.sensor.camera.height)) frame = Frame(rgb, depth, self.count) self.count += 1 pose = None if self.do_compute: pose = self.detector.detect(rgb.copy()) if pose is None: pose = Transform.from_parameters(0, 0, -1, 0, 0, 0) return frame, pose
def detect(self, img): self.likelihood = self.detector.detect_mat(img) detection = None if self.likelihood > 0.1: # get board and draw it board = self.detector.getDetectedBoard() rvec = board.Rvec.copy() tvec = board.Tvec.copy() matrix = cv2.Rodrigues(rvec)[0] rodrigues = mat2euler(matrix) detection = Transform.from_parameters(tvec[0], -tvec[1], -tvec[2], rodrigues[0], -rodrigues[1], -rodrigues[2]) return detection
def load(self): """ Load a viewpoints.json to dataset's structure Todo: datastructure should be more similar to json structure... :return: return false if the dataset is empty. """ # Load viewpoints file and camera file try: with open(os.path.join(self.path, "viewpoints.json")) as data_file: data = json.load(data_file) self.camera = Camera.load_from_json(self.path) except FileNotFoundError: return False self.metadata = data["metaData"] self.set_save_type(self.metadata["save_type"]) count = 0 # todo this is not clean! while True: try: id = str(count) pose = Transform.from_parameters( *[float(data[id]["vector"][str(x)]) for x in range(6)]) self.add_pose(None, None, pose) if "pairs" in data[id]: for i in range(int(data[id]["pairs"])): pair_id = "{}n{}".format(id, i) pair_pose = Transform.from_parameters(*[ float(data[pair_id]["vector"][str(x)]) for x in range(6) ]) self.add_pair(None, None, pair_pose, count) count += 1 except KeyError: break return True
def icp(A, B, init_pose=None, max_iterations=20, tolerance=0.001): ''' The Iterative Closest Point method Input: A: Nx3 numpy array of source 3D points B: Nx3 numpy array of destination 3D point init_pose: 4x4 homogeneous transformation max_iterations: exit algorithm after max_iterations tolerance: convergence criteria Output: T: final homogeneous transformation distances: Euclidean distances (errors) of the nearest neighbor ''' # make points homogeneous, copy them so as to maintain the originals src = np.ones((4, A.shape[0])) dst = np.ones((4, B.shape[0])) src[0:3, :] = np.copy(A.T) dst[0:3, :] = np.copy(B.T) # apply the initial pose estimation if init_pose is not None: src = np.dot(init_pose, src) prev_error = 0 for i in range(max_iterations): # find the nearest neighbours between the current source and destination points distances, indices = nearest_neighbor(src[0:3, :].T, dst[0:3, :].T) # compute the transformation between the current source and nearest destination points T, _, _ = best_fit_transform(src[0:3, :].T, dst[0:3, indices].T) # update the current source src = np.dot(T, src) # check error mean_error = np.sum(distances) / distances.size if abs(prev_error - mean_error) < tolerance: break prev_error = mean_error # calculate final transformation T, _, _ = best_fit_transform(A, src[0:3, :].T) return Transform.from_matrix(T), distances
def get_random(self): # Random pose on a sphere : https://www.jasondavies.com/maps/random-points/ eye = UniformSphereSampler.random_direction() distance = random.uniform( 0, 1) * (self.max_radius - self.min_radius) + self.min_radius eye *= distance view = Transform.lookAt(eye, np.zeros(3), self.up) # Random z rotation angle = random.uniform(0, 1) * math.pi * 2 cosa = math.cos(angle) sina = math.sin(angle) rotation = Transform() rotation.matrix[0, 0] = cosa rotation.matrix[1, 0] = -sina rotation.matrix[0, 1] = sina rotation.matrix[1, 1] = cosa ret = view.transpose() ret.rotate(transform=rotation.transpose()) return ret.transpose()
def estimate_current_pose(self, previous_pose, current_rgb, current_depth, debug=False, debug_time=False): if debug_time: start_time = time.time() bb = compute_2Dboundingbox(previous_pose, self.camera, self.object_width, scale=(1000, 1000, -1000)) bb2 = compute_2Dboundingbox(previous_pose, self.camera, self.object_width, scale=(1000, -1000, -1000)) if debug_time: print("Compute BB : {}".format(time.time() - start_time)) start_time = time.time() rgbA, depthA = self.compute_render(previous_pose, bb) if debug_time: print("Render : {}".format(time.time() - start_time)) start_time = time.time() rgbB, depthB = normalize_scale(current_rgb, current_depth, bb2, self.camera, self.image_size) debug_info = (rgbA, bb2, np.hstack((rgbA, rgbB))) rgbA = rgbA.astype(np.float) rgbB = rgbB.astype(np.float) depthA = depthA.astype(np.float) depthB = depthB.astype(np.float) depthA = normalize_depth(depthA, previous_pose) depthB = normalize_depth(depthB, previous_pose) if debug: show_frames(rgbA, depthA, rgbB, depthB) rgbA, depthA = normalize_channels(rgbA, depthA, self.mean[:4], self.std[:4]) rgbB, depthB = normalize_channels(rgbB, depthB, self.mean[4:], self.std[4:]) self.input_buffer[0, 0:3, :, :] = rgbA self.input_buffer[0, 3, :, :] = depthA self.input_buffer[0, 4:7, :, :] = rgbB self.input_buffer[0, 7, :, :] = depthB self.prior_buffer[0] = np.array( previous_pose.to_parameters(isQuaternion=True)) if debug_time: print("Normalize : {}".format(time.time() - start_time)) start_time = time.time() prediction = self.tracker_model.test( [self.input_buffer, self.prior_buffer]).asNumpyTensor() if debug_time: print("Network time : {}".format(time.time() - start_time)) prediction = unnormalize_label(prediction, self.translation_range, self.rotation_range) if debug: print("Prediction : {}".format(prediction)) prediction = Transform.from_parameters(*prediction[0], is_degree=True) current_pose = combine_view_transform(previous_pose, prediction) return current_pose, debug_info
def test_two_identity_should_be_equal(self): transform1 = Transform() transform2 = Transform() self.assertEqual(transform1, transform2) transform3 = Transform.from_parameters(1, 0, 0, 0, 0, 0) self.assertNotEqual(transform1, transform3)
for model in MODELS: metadata["object_width"][model["name"]] = str(model["object_width"]) metadata["min_radius"] = str(SPHERE_MIN_RADIUS) metadata["max_radius"] = str(SPHERE_MAX_RADIUS) for i in range(real_dataset.size()): frame, pose = real_dataset.data_pose[i] rgb_render, depth_render = vpRender.render(pose.transpose()) rgb, depth = frame.get_rgb_depth(real_dataset.path) masked_rgb, masked_depth = mask_real_image(rgb, depth, depth_render) for j in range(SAMPLE_QUANTITY): rotated_rgb, rotated_depth, rotated_pose = random_z_rotation( masked_rgb, masked_depth, pose, real_dataset.camera) random_transform = Transform.random( (-TRANSLATION_RANGE, TRANSLATION_RANGE), (-ROTATION_RANGE, ROTATION_RANGE)) inverted_random_transform = Transform.from_parameters( *(-random_transform.to_parameters())) previous_pose = rotated_pose.copy() previous_pose = combine_view_transform(previous_pose, inverted_random_transform) rgbA, depthA = vpRender.render(previous_pose.transpose()) bb = compute_2Dboundingbox(previous_pose, real_dataset.camera, OBJECT_WIDTH, scale=(1000, -1000, -1000)) rgbA, depthA = normalize_scale(rgbA, depthA, bb, real_dataset.camera, IMAGE_SIZE)
def setUpClass(cls): # run these once as they take time cls.dummy_rgb = np.zeros((150, 150, 3), dtype=np.float32) cls.dummy_depth = np.zeros((150, 150), dtype=np.float32) cls.dummy_pose = Transform()
def test_two_similar_transform_should_be_equal(self): transform1 = Transform.from_parameters(1.1, 1.2, 1.3, 1.1, 1.2, 1.3) transform2 = Transform.from_parameters(1.10001, 1.2, 1.30001, 1.1, 1.200001, 1.3) self.assertEqual(transform1, transform2)
from deeptracking.utils.transform import Transform import cv2 import os import numpy as np from deeptracking.detector.detector_aruco import ArucoDetector if __name__ == '__main__': dataset_path = "/media/mathieu/e912e715-2be7-4fa2-8295-5c3ef1369dd0/dataset/deeptracking/sequences/skull" detector_path = "../deeptracking/detector/aruco_layout.xml" model_path = "/home/mathieu/Dataset/3D_models/skull/skull.ply" model_ao_path = "/home/mathieu/Dataset/3D_models/skull/skull_ao.ply" shader_path = "../deeptracking/data/shaders" dataset = Dataset(dataset_path) offset = Transform.from_matrix( np.load(os.path.join(dataset.path, "offset.npy"))) camera = Camera.load_from_json(dataset_path) dataset.camera = camera files = [ f for f in os.listdir(dataset_path) if os.path.splitext(f)[-1] == ".png" and 'd' not in os.path.splitext(f)[0] ] detector = ArucoDetector(camera, detector_path) window = InitOpenGL(camera.width, camera.height) vpRender = ModelRenderer(model_path, shader_path, camera, window, (camera.width, camera.height)) vpRender.load_ambiant_occlusion_map(model_ao_path) ground_truth_pose = None for i in range(len(files)):
camera.set_ratio(ratio) sensor.start() dataset = Dataset(OUTPUT_PATH) dataset.camera = camera window = InitOpenGL(camera.width, camera.height) detector = ArucoDetector(camera, DETECTOR_PATH) vpRender = ModelRenderer(MODELS[0]["model_path"], SHADER_PATH, camera, window, (camera.width, camera.height)) vpRender.load_ambiant_occlusion_map(MODELS[0]["ambiant_occlusion_model"]) cv2.namedWindow('image') cv2.createTrackbar('transparency', 'image', 0, 100, trackbar) # todo, read from file? detection_offset = Transform() rgbd_record = False save_next_rgbd_pose = False lock_offset = False if PRELOAD: dataset.load() offset_path = os.path.join(dataset.path, "offset.npy") if os.path.exists(offset_path): detection_offset = Transform.from_matrix(np.load(offset_path)) lock_offset = True while True: start_time = time.time() bgr, depth = sensor.get_frame() bgr = cv2.resize(bgr, (int(1920 / ratio), int(1080 / ratio))) depth = cv2.resize(depth, (int(1920 / ratio), int(1080 / ratio)))
preload_count = 0 if PRELOAD: if dataset.load(): preload_count = dataset.size() print("This Dataset already contains {} samples".format( preload_count)) # Iterate over all models from config files for model in MODELS: vpRender = ModelRenderer(model["model_path"], SHADER_PATH, dataset.camera, window, window_size) vpRender.load_ambiant_occlusion_map(model["ambiant_occlusion_model"]) OBJECT_WIDTH = int(model["object_width"]) for i in tqdm(range(SAMPLE_QUANTITY - preload_count)): random_pose = sphere_sampler.get_random() random_transform = Transform.random( (-TRANSLATION_RANGE, TRANSLATION_RANGE), (-ROTATION_RANGE, ROTATION_RANGE)) pair = combine_view_transform(random_pose, random_transform) rgbA, depthA = vpRender.render(random_pose.transpose()) rgbB, depthB = vpRender.render(pair.transpose(), sphere_sampler.random_direction()) bb = compute_2Dboundingbox(random_pose, dataset.camera, OBJECT_WIDTH, scale=(1000, -1000, -1000)) rgbA, depthA = normalize_scale(rgbA, depthA, bb, dataset.camera, IMAGE_SIZE) rgbB, depthB = normalize_scale(rgbB, depthB, bb, dataset.camera, IMAGE_SIZE)
def test_it_should_initialise_as_identity(self): transform = Transform() truth = np.eye(4) np.testing.assert_almost_equal(transform.matrix, truth)