예제 #1
0
    def estimate_current_pose(self, previous_pose, current_rgb, current_depth, debug=False):
        render_rgb, render_depth = self.renderer.render(previous_pose.transpose())
        # todo implement this part on gpu...
        rgbA, depthA = normalize_scale(render_rgb, render_depth, previous_pose.inverse(), self.camera, self.image_size,
                                       self.object_width)
        rgbB, depthB = normalize_scale(current_rgb, current_depth, previous_pose.inverse(), self.camera, self.image_size,
                                       self.object_width)

        depthA = normalize_depth(depthA, previous_pose)
        depthB = normalize_depth(depthB, previous_pose)

        if debug:
            show_frames(rgbA, depthA, rgbB, depthB)
        rgbA, depthA = normalize_channels(rgbA, depthA, self.mean[:4], self.std[:4])
        rgbB, depthB = normalize_channels(rgbB, depthB, self.mean[4:], self.std[4:])
        self.input_buffer[0, 0:3, :, :] = rgbA
        self.input_buffer[0, 3, :, :] = depthA
        self.input_buffer[0, 4:7, :, :] = rgbB
        self.input_buffer[0, 7, :, :] = depthB
        self.prior_buffer[0] = np.array(previous_pose.to_parameters(isQuaternion=True))
        prediction = self.tracker_model.test([self.input_buffer, self.prior_buffer]).asNumpyTensor()
        prediction = unnormalize_label(prediction, self.translation_range, self.rotation_range)
        if debug:
            print("Prediction : {}".format(prediction))
        prediction = Transform.from_parameters(*prediction[0], is_degree=True)
        current_pose = combine_view_transform(previous_pose, prediction)
        self.debug_rgb = render_rgb
        return current_pose
예제 #2
0
def random_z_rotation(rgb, depth, pose, camera):
    rotation = random.uniform(-180, 180)
    rotation_matrix = Transform()
    rotation_matrix.set_rotation(0, 0, math.radians(rotation))

    pixel = center_pixel(pose, camera)
    new_rgb = rotate_image(rgb, rotation, pixel[0])
    new_depth = rotate_image(depth, rotation, pixel[0])
    # treshold below 50 means we remove some interpolation noise, which cover small holes
    mask = (new_depth >= 50).astype(np.uint8)[:, :, np.newaxis]
    rgb_mask = np.all(new_rgb != 0, axis=2).astype(np.uint8)
    kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], np.uint8)
    # erode rest of interpolation noise which will affect negatively future blendings
    eroded_mask = cv2.erode(mask, kernel, iterations=2)
    eroded_rgb_mask = cv2.erode(rgb_mask, kernel, iterations=2)
    new_depth = new_depth * eroded_mask
    new_rgb = new_rgb * eroded_rgb_mask[:, :, np.newaxis]
    new_pose = combine_view_transform(pose, rotation_matrix)
    return new_rgb, new_depth, new_pose
예제 #3
0
        rgb_render, depth_render = vpRender.render(pose.transpose())
        rgb, depth = frame.get_rgb_depth(real_dataset.path)
        masked_rgb, masked_depth = mask_real_image(rgb, depth, depth_render)

        for j in range(SAMPLE_QUANTITY):
            rotated_rgb, rotated_depth, rotated_pose = random_z_rotation(
                masked_rgb, masked_depth, pose, real_dataset.camera)
            random_transform = Transform.random(
                (-TRANSLATION_RANGE, TRANSLATION_RANGE),
                (-ROTATION_RANGE, ROTATION_RANGE))
            inverted_random_transform = Transform.from_parameters(
                *(-random_transform.to_parameters()))

            previous_pose = rotated_pose.copy()
            previous_pose = combine_view_transform(previous_pose,
                                                   inverted_random_transform)

            rgbA, depthA = vpRender.render(previous_pose.transpose())
            bb = compute_2Dboundingbox(previous_pose,
                                       real_dataset.camera,
                                       OBJECT_WIDTH,
                                       scale=(1000, -1000, -1000))
            rgbA, depthA = normalize_scale(rgbA, depthA, bb,
                                           real_dataset.camera, IMAGE_SIZE)
            rgbB, depthB = normalize_scale(rotated_rgb, rotated_depth, bb,
                                           real_dataset.camera, IMAGE_SIZE)

            index = output_dataset.add_pose(rgbA, depthA, previous_pose)
            output_dataset.add_pair(rgbB, depthB, random_transform, index)
            iteration = i * SAMPLE_QUANTITY + j
            sys.stdout.write(
예제 #4
0
        if dataset.load():
            preload_count = dataset.size()
            print("This Dataset already contains {} samples".format(
                preload_count))
    # Iterate over all models from config files
    for model in MODELS:
        vpRender = ModelRenderer(model["model_path"], SHADER_PATH,
                                 dataset.camera, window, window_size)
        vpRender.load_ambiant_occlusion_map(model["ambiant_occlusion_model"])
        OBJECT_WIDTH = int(model["object_width"])
        for i in tqdm(range(SAMPLE_QUANTITY - preload_count)):
            random_pose = sphere_sampler.get_random()
            random_transform = Transform.random(
                (-TRANSLATION_RANGE, TRANSLATION_RANGE),
                (-ROTATION_RANGE, ROTATION_RANGE))
            pair = combine_view_transform(random_pose, random_transform)

            rgbA, depthA = vpRender.render(random_pose.transpose())
            rgbB, depthB = vpRender.render(pair.transpose(),
                                           sphere_sampler.random_direction())
            bb = compute_2Dboundingbox(random_pose,
                                       dataset.camera,
                                       OBJECT_WIDTH,
                                       scale=(1000, -1000, -1000))
            rgbA, depthA = normalize_scale(rgbA, depthA, bb, dataset.camera,
                                           IMAGE_SIZE)
            rgbB, depthB = normalize_scale(rgbB, depthB, bb, dataset.camera,
                                           IMAGE_SIZE)

            index = dataset.add_pose(rgbA, depthA, random_pose)
            dataset.add_pair(rgbB, depthB, random_transform, index)
예제 #5
0
    def estimate_current_pose(self,
                              previous_pose,
                              current_rgb,
                              current_depth,
                              debug=False,
                              debug_time=False):
        if debug_time:
            start_time = time.time()
        bb = compute_2Dboundingbox(previous_pose,
                                   self.camera,
                                   self.object_width,
                                   scale=(1000, 1000, -1000))
        bb2 = compute_2Dboundingbox(previous_pose,
                                    self.camera,
                                    self.object_width,
                                    scale=(1000, -1000, -1000))
        if debug_time:
            print("Compute BB : {}".format(time.time() - start_time))
            start_time = time.time()
        rgbA, depthA = self.compute_render(previous_pose, bb)
        if debug_time:
            print("Render : {}".format(time.time() - start_time))
            start_time = time.time()
        rgbB, depthB = normalize_scale(current_rgb, current_depth, bb2,
                                       self.camera, self.image_size)
        debug_info = (rgbA, bb2, np.hstack((rgbA, rgbB)))

        rgbA = rgbA.astype(np.float)
        rgbB = rgbB.astype(np.float)
        depthA = depthA.astype(np.float)
        depthB = depthB.astype(np.float)

        depthA = normalize_depth(depthA, previous_pose)
        depthB = normalize_depth(depthB, previous_pose)

        if debug:
            show_frames(rgbA, depthA, rgbB, depthB)
        rgbA, depthA = normalize_channels(rgbA, depthA, self.mean[:4],
                                          self.std[:4])
        rgbB, depthB = normalize_channels(rgbB, depthB, self.mean[4:],
                                          self.std[4:])

        self.input_buffer[0, 0:3, :, :] = rgbA
        self.input_buffer[0, 3, :, :] = depthA
        self.input_buffer[0, 4:7, :, :] = rgbB
        self.input_buffer[0, 7, :, :] = depthB
        self.prior_buffer[0] = np.array(
            previous_pose.to_parameters(isQuaternion=True))
        if debug_time:
            print("Normalize : {}".format(time.time() - start_time))
            start_time = time.time()
        prediction = self.tracker_model.test(
            [self.input_buffer, self.prior_buffer]).asNumpyTensor()
        if debug_time:
            print("Network time : {}".format(time.time() - start_time))
        prediction = unnormalize_label(prediction, self.translation_range,
                                       self.rotation_range)
        if debug:
            print("Prediction : {}".format(prediction))
        prediction = Transform.from_parameters(*prediction[0], is_degree=True)
        current_pose = combine_view_transform(previous_pose, prediction)
        return current_pose, debug_info