def estimate_current_pose(self, previous_pose, current_rgb, current_depth, debug=False): render_rgb, render_depth = self.renderer.render(previous_pose.transpose()) # todo implement this part on gpu... rgbA, depthA = normalize_scale(render_rgb, render_depth, previous_pose.inverse(), self.camera, self.image_size, self.object_width) rgbB, depthB = normalize_scale(current_rgb, current_depth, previous_pose.inverse(), self.camera, self.image_size, self.object_width) depthA = normalize_depth(depthA, previous_pose) depthB = normalize_depth(depthB, previous_pose) if debug: show_frames(rgbA, depthA, rgbB, depthB) rgbA, depthA = normalize_channels(rgbA, depthA, self.mean[:4], self.std[:4]) rgbB, depthB = normalize_channels(rgbB, depthB, self.mean[4:], self.std[4:]) self.input_buffer[0, 0:3, :, :] = rgbA self.input_buffer[0, 3, :, :] = depthA self.input_buffer[0, 4:7, :, :] = rgbB self.input_buffer[0, 7, :, :] = depthB self.prior_buffer[0] = np.array(previous_pose.to_parameters(isQuaternion=True)) prediction = self.tracker_model.test([self.input_buffer, self.prior_buffer]).asNumpyTensor() prediction = unnormalize_label(prediction, self.translation_range, self.rotation_range) if debug: print("Prediction : {}".format(prediction)) prediction = Transform.from_parameters(*prediction[0], is_degree=True) current_pose = combine_view_transform(previous_pose, prediction) self.debug_rgb = render_rgb return current_pose
def random_z_rotation(rgb, depth, pose, camera): rotation = random.uniform(-180, 180) rotation_matrix = Transform() rotation_matrix.set_rotation(0, 0, math.radians(rotation)) pixel = center_pixel(pose, camera) new_rgb = rotate_image(rgb, rotation, pixel[0]) new_depth = rotate_image(depth, rotation, pixel[0]) # treshold below 50 means we remove some interpolation noise, which cover small holes mask = (new_depth >= 50).astype(np.uint8)[:, :, np.newaxis] rgb_mask = np.all(new_rgb != 0, axis=2).astype(np.uint8) kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], np.uint8) # erode rest of interpolation noise which will affect negatively future blendings eroded_mask = cv2.erode(mask, kernel, iterations=2) eroded_rgb_mask = cv2.erode(rgb_mask, kernel, iterations=2) new_depth = new_depth * eroded_mask new_rgb = new_rgb * eroded_rgb_mask[:, :, np.newaxis] new_pose = combine_view_transform(pose, rotation_matrix) return new_rgb, new_depth, new_pose
rgb_render, depth_render = vpRender.render(pose.transpose()) rgb, depth = frame.get_rgb_depth(real_dataset.path) masked_rgb, masked_depth = mask_real_image(rgb, depth, depth_render) for j in range(SAMPLE_QUANTITY): rotated_rgb, rotated_depth, rotated_pose = random_z_rotation( masked_rgb, masked_depth, pose, real_dataset.camera) random_transform = Transform.random( (-TRANSLATION_RANGE, TRANSLATION_RANGE), (-ROTATION_RANGE, ROTATION_RANGE)) inverted_random_transform = Transform.from_parameters( *(-random_transform.to_parameters())) previous_pose = rotated_pose.copy() previous_pose = combine_view_transform(previous_pose, inverted_random_transform) rgbA, depthA = vpRender.render(previous_pose.transpose()) bb = compute_2Dboundingbox(previous_pose, real_dataset.camera, OBJECT_WIDTH, scale=(1000, -1000, -1000)) rgbA, depthA = normalize_scale(rgbA, depthA, bb, real_dataset.camera, IMAGE_SIZE) rgbB, depthB = normalize_scale(rotated_rgb, rotated_depth, bb, real_dataset.camera, IMAGE_SIZE) index = output_dataset.add_pose(rgbA, depthA, previous_pose) output_dataset.add_pair(rgbB, depthB, random_transform, index) iteration = i * SAMPLE_QUANTITY + j sys.stdout.write(
if dataset.load(): preload_count = dataset.size() print("This Dataset already contains {} samples".format( preload_count)) # Iterate over all models from config files for model in MODELS: vpRender = ModelRenderer(model["model_path"], SHADER_PATH, dataset.camera, window, window_size) vpRender.load_ambiant_occlusion_map(model["ambiant_occlusion_model"]) OBJECT_WIDTH = int(model["object_width"]) for i in tqdm(range(SAMPLE_QUANTITY - preload_count)): random_pose = sphere_sampler.get_random() random_transform = Transform.random( (-TRANSLATION_RANGE, TRANSLATION_RANGE), (-ROTATION_RANGE, ROTATION_RANGE)) pair = combine_view_transform(random_pose, random_transform) rgbA, depthA = vpRender.render(random_pose.transpose()) rgbB, depthB = vpRender.render(pair.transpose(), sphere_sampler.random_direction()) bb = compute_2Dboundingbox(random_pose, dataset.camera, OBJECT_WIDTH, scale=(1000, -1000, -1000)) rgbA, depthA = normalize_scale(rgbA, depthA, bb, dataset.camera, IMAGE_SIZE) rgbB, depthB = normalize_scale(rgbB, depthB, bb, dataset.camera, IMAGE_SIZE) index = dataset.add_pose(rgbA, depthA, random_pose) dataset.add_pair(rgbB, depthB, random_transform, index)
def estimate_current_pose(self, previous_pose, current_rgb, current_depth, debug=False, debug_time=False): if debug_time: start_time = time.time() bb = compute_2Dboundingbox(previous_pose, self.camera, self.object_width, scale=(1000, 1000, -1000)) bb2 = compute_2Dboundingbox(previous_pose, self.camera, self.object_width, scale=(1000, -1000, -1000)) if debug_time: print("Compute BB : {}".format(time.time() - start_time)) start_time = time.time() rgbA, depthA = self.compute_render(previous_pose, bb) if debug_time: print("Render : {}".format(time.time() - start_time)) start_time = time.time() rgbB, depthB = normalize_scale(current_rgb, current_depth, bb2, self.camera, self.image_size) debug_info = (rgbA, bb2, np.hstack((rgbA, rgbB))) rgbA = rgbA.astype(np.float) rgbB = rgbB.astype(np.float) depthA = depthA.astype(np.float) depthB = depthB.astype(np.float) depthA = normalize_depth(depthA, previous_pose) depthB = normalize_depth(depthB, previous_pose) if debug: show_frames(rgbA, depthA, rgbB, depthB) rgbA, depthA = normalize_channels(rgbA, depthA, self.mean[:4], self.std[:4]) rgbB, depthB = normalize_channels(rgbB, depthB, self.mean[4:], self.std[4:]) self.input_buffer[0, 0:3, :, :] = rgbA self.input_buffer[0, 3, :, :] = depthA self.input_buffer[0, 4:7, :, :] = rgbB self.input_buffer[0, 7, :, :] = depthB self.prior_buffer[0] = np.array( previous_pose.to_parameters(isQuaternion=True)) if debug_time: print("Normalize : {}".format(time.time() - start_time)) start_time = time.time() prediction = self.tracker_model.test( [self.input_buffer, self.prior_buffer]).asNumpyTensor() if debug_time: print("Network time : {}".format(time.time() - start_time)) prediction = unnormalize_label(prediction, self.translation_range, self.rotation_range) if debug: print("Prediction : {}".format(prediction)) prediction = Transform.from_parameters(*prediction[0], is_degree=True) current_pose = combine_view_transform(previous_pose, prediction) return current_pose, debug_info