def _get_cameramodel(self, noise=True):
        params = self.cameraparams
        cameramodel = CameraModel(self.resolution, params)
        cameramodel.update_point_cloud(self.mod.points)
        points2d = cameramodel.points2d

        if noise:
            points2d = self._add_noise(points2d)
            points2d = self._do_missclassifcation(points2d)
            cameramodel.points2d = points2d
        return cameramodel
Beispiel #2
0
 def __init__(self, resolution, points2d, points3d, logger=None):
     self._points2d = points2d
     self._points3d = points3d
     self._points2d_inliers = []
     self._points3d_inliers = []
     self._inliers = []
     self._resolution = resolution
     self._reduce_dist_param = False
     self._cm = CameraModel(resolution)
     self._max_iter = 5000
     if logger == None:
         self.logger = logging.getLogger()
     else:
         self.logger = logger
Beispiel #3
0
def run_application(args):
    poc = ProofOfConcept(args)

    image = poc.get_image()
    plt.imshow(image)

    cameraparams = poc.cameraparams
    cameraparams_est, res, cme = poc.estimate()

    print("Cost (reduced data): " + str(res.cost))
    print("Optimality, gradient value (reduced data): " + str(res.optimality))
    inliers = np.array(cme._inliers)
    points = np.array(poc.mod.points)
    print("Inliers in percent: " +
          str(100.0 * inliers.shape[0] / points.shape[0]))

    print(
        array_string([
            " ", "fx", "fy", "cx", "cy", "thetax", "thetay", "thetaz", "tx",
            "ty", "tz", "k1", "k2", "k3", "p1", "p2"
        ], 8))
    print("shuld:  " + cameraparams.get_string(8))
    print("is:     " + cameraparams_est.get_string(8))

    dist = np.array(cameraparams.get_as_array()) - np.array(
        cameraparams_est.get_as_array())

    print("diff:   " + array_string(np.round(dist, 2), 8))
    print("Tot. diff: {}".format(np.linalg.norm(dist)))

    cm_est = CameraModel(poc.resolution, cameraparams_est)
    cm_est.update_point_cloud(poc.mod.points)
    image_est = cm_est.get_image()

    image1 = np.zeros((poc.resolution[0], poc.resolution[1], 3))
    image2 = np.zeros((poc.resolution[0], poc.resolution[1], 3))
    image1[:, :, 2] = image_est
    image2[:, :, 1] = image
    kernel = np.ones((3, 3), np.uint8)
    diff_img = image1 + image2
    diff_img = cv2.dilate(diff_img, kernel, iterations=1)

    cv2.imwrite("diff_img.png", diff_img * 255)
    plt.figure()
    plt.imshow(diff_img)
    plt.show()
    def __init__(self, params, scene, initial_pos):
        self.scene = scene
        self.params = params
        self.initial_pos = initial_pos
        self.status = "none"

        self.camera = CameraModel(scene, initial_pos,
            simulate_backlash=self.params.backlash,
            simulate_noise=self.params.noise)

        if params.perfect_classification is None:
            self.perfect_classification = None
        else:
            self.perfect_classification = \
                params.perfect_classification[scene.filename]
class Simulator(object):

    def __init__(self, params, scene, initial_pos):
        self.scene = scene
        self.params = params
        self.initial_pos = initial_pos
        self.status = "none"

        self.camera = CameraModel(scene, initial_pos,
            simulate_backlash=self.params.backlash,
            simulate_noise=self.params.noise)

        if params.perfect_classification is None:
            self.perfect_classification = None
        else:
            self.perfect_classification = \
                params.perfect_classification[scene.filename]

    def _do_local_search(self, direction, rev_direction):
        """Perform a local search (incremental hillclimbing in a 
        given direction). The hillclimbing has a tolerance of two steps.
        i.e., Up to two steps that don't increase the focus value can be taken
        before we stop climbing."""
        while not self.camera.will_hit_edge(direction):
            prev_fmeasure = self.camera.last_fmeasure()
            self.camera.move_fine(direction)

            if self.camera.last_fmeasure() < prev_fmeasure:
                if (two_step_tolerance and 
                    not self.camera.will_hit_edge(direction)):
                    # We've seen a decrease. Consider moving one extra step.
                    prev_fmeasure = self.camera.last_fmeasure()
                    self.camera.move_fine(direction)

                    if (self.camera.last_fmeasure() < prev_fmeasure):
                        # Seen a decrease again, backtrack and stop.
                        self.camera.move_fine(rev_direction, 2)
                        break
                else:
                    # Backtrack and stop.
                    self.camera.move_fine(rev_direction)
                    break

    def _go_to_max(self):
        """Return to the location of the largest focus value seen so far and
        perform a local search to find the exact location of the peak."""
        current_pos = self.camera.last_position()
        maximum_pos = max(self.camera.visited_positions,
            key=(lambda pos : self.camera.get_fvalue(pos)))

        if maximum_pos < current_pos:
            direction = Direction("left")
        elif maximum_pos > current_pos:
            direction = Direction("right")
        elif current_pos < self.camera.visited_positions[-2]:
            direction = Direction("left")
        else:
            direction = Direction("right")
        rev_direction = direction.reverse()

        # Take as many coarse steps as needed to go back to the maximum
        # without going over it.
        distance = abs(current_pos - maximum_pos)
        coarse_steps = distance / 8

        self.camera.move_coarse(direction, coarse_steps)

        # Keep going in fine steps to see if we can find a higher position.
        start_pos = self.camera.last_position()
        self._do_local_search(direction, rev_direction)

        # If we didn't move further, we might want to look in the other
        # direction too.
        if start_pos == self.camera.last_position():
            self._do_local_search(rev_direction, direction)

        self.status = "foundmax"

    def _get_first_direction(self):
        """Direction in which we should start sweeping initially."""
        first, second, third = self.camera.get_fvalues(
            self.camera.visited_positions[-3:])
        norm_lens_pos = float(self.initial_pos) / (self.scene.step_count - 1)

        evaluator = featuresfirststep.firststep_feature_evaluator(
            first, second, third, norm_lens_pos)
        return Direction(evaluatetree.evaluate_tree(
            self.params.left_right_tree, evaluator))

    def _sweep(self, direction):
        """Sweep the lens in one direction and return a
        tuple (success state, number of steps taken) along the way.
        """
        initial_position = self.camera.last_position()
        sweep_fvalues = [ self.camera.last_fmeasure() ]

        while not self.camera.will_hit_edge(direction):
            # Move the lens forward.
            self.camera.move_coarse(direction)
            sweep_fvalues.append(self.camera.last_fmeasure())

            # Take at least two steps before we allow turning back.
            if len(sweep_fvalues) < 3:
                continue
       
            if self.perfect_classification is None:
                # Obtain the ML classification at the new lens position.
                evaluator = featuresturn.action_feature_evaluator(
                    sweep_fvalues, self.scene.step_count)
                classification = evaluatetree.evaluate_tree(
                    self.params.action_tree, evaluator)
            else:
                key = featuresturn.make_key(str(direction), initial_position, 
                                            self.camera.last_position())
                classification = self.perfect_classification[key]

            if classification != "continue":
                assert (classification == "turn_peak" or
                        classification == "backtrack")
                return classification, len(sweep_fvalues) - 1

        # We've reached an edge, but the decision tree still does not want
        # to turn back, so what do we do now?
        # After thinking a lot about it, I think the best thing to do is to
        # introduce a condition manually. It's a bit ad-hoc, but we really need
        # to be able to handle this case robustly, as there are lot of cases
        # (i.e., landscape shots) where peaks will be at the edge.
        min_val = min(self.camera.get_fvalues(self.camera.visited_positions))
        max_val = max(self.camera.get_fvalues(self.camera.visited_positions))
        if float(min_val) / max_val > 0.8:
            return "backtrack", len(sweep_fvalues) - 1
        else:
            return "turn_peak", len(sweep_fvalues) - 1


    def _backtrack(self, previous_direction, step_count):
        """From the current lens position, go back to the lens position we
        were at before and look on the other side."""

        new_direction = previous_direction.reverse()

        # Go back to where we started.
        self.camera.move_coarse(new_direction, step_count)

        # Sweep again the other way.
        result, step_count = self._sweep(new_direction)

        if result == "turn_peak":
            self._go_to_max()
        elif result == "backtrack":
            # If we need to backtrack a second time, we failed.
            self.status = "failed"
        else:
            assert False

    def evaluate(self):
        """For every scene and every lens position, run a simulation and
        store the statistics."""

        # Take the first two steps, as to get three focus measures with which
        # to decide which direction to sweep.
        self.camera.move_fine(Direction("right"), 2)

        # Decide initial direction in which to look.
        direction = self._get_first_direction()
            
        # Search in that direction.
        result, step_count = self._sweep(direction)

        if result == "turn_peak":
            self._go_to_max()
        elif result == "backtrack":
            self._backtrack(direction, step_count)
        else:
            assert False

    def is_true_positive(self):
        """Whether a peak was found and the peak is close to a real peak."""
        return (self.status == "foundmax" and 
                self.scene.distance_to_closest_peak(
                    self.camera.last_position()) <= 1)

    def is_false_positive(self):
        """Whether a peak was found and the peak not close to a real peak."""
        return (self.status == "foundmax" and 
                self.scene.distance_to_closest_peak(
                    self.camera.last_position()) > 1)

    def is_true_negative(self):
        """Whether we failed to find a peak and we didn't come 
        close to a real peak."""
        return (self.status == "failed" and 
                all(self.scene.distance_to_closest_peak(pos) > 1
                    for pos in self.camera.visited_positions))

    def is_false_negative(self):
        """Whether we failed to find a peak but we did come 
        close to a real peak."""
        return (self.status == "failed" and 
                any(self.scene.distance_to_closest_peak(pos) <= 1
                    for pos in self.camera.visited_positions))

    def get_evaluation(self):
        """Return whether a simulation for this scene starting at the given
        lens position gave a true/false positive/negative.
        """
        if self.is_true_positive():
            return "true positive"
        if self.is_false_positive():
            return "false positive"
        if self.is_true_negative():
            return "true negative"
        if self.is_false_negative():
            return "false negative"
Beispiel #6
0
class CameraModelEstimator:
    """docstring for CameraModelEstimator"""
    def __init__(self, resolution, points2d, points3d, logger=None):
        self._points2d = points2d
        self._points3d = points3d
        self._points2d_inliers = []
        self._points3d_inliers = []
        self._inliers = []
        self._resolution = resolution
        self._reduce_dist_param = False
        self._cm = CameraModel(resolution)
        self._max_iter = 5000
        if logger == None:
            self.logger = logging.getLogger()
        else:
            self.logger = logger

    def _loss_full(self, x):
        fx = x[0]
        fy = x[1]
        cx = x[2]
        cy = x[3]
        thetax = x[4]
        thetay = x[5]
        thetaz = x[6]
        tx = x[7]
        ty = x[8]
        tz = x[9]
        k1 = x[10]
        k2 = x[11]
        k3 = x[12]
        p1 = x[13]
        p2 = x[14]
        self._cm.set_c([cx, cy])
        self._cm.set_f([fx, fy])
        self._cm.create_extrinsic([thetax, thetay, thetaz], [tx, ty, tz])
        self._cm.add_distortion([k1, k2, k3], [p1, p2])

        self._cm.update_point_cloud(self._points3d_inliers)
        points2d_est = self._cm.points2d

        dists = points2d_est - self._points2d_inliers

        return dists.flatten()

    def _rq(self, M):
        # User algorithm for RQ from QR decomposition from
        # https://math.stackexchange.com/questions/1640695/rq-decomposition
        P = np.fliplr(np.diag(np.ones(len(M))))
        Mstar = np.matmul(P, M)
        Qstar, Rstar = np.linalg.qr(np.transpose(Mstar))
        Q = np.matmul(P, np.transpose(Qstar))
        R = np.matmul(P, np.matmul(np.transpose(Rstar), P))

        # Now make the diagonal of R positiv. This can be done, because
        # we know that f, and c is allways positive
        T = np.diag(np.sign(np.diag(R)))

        R = np.matmul(R, T)
        Q = np.matmul(Q, T)

        return R, Q

    def _convert_for_equation(self, XYZ, uv):
        n = XYZ.shape[0]
        # XYZ and uv must be in rows not in columns!
        A = np.zeros((2 * n, 11))
        uv = np.mat(uv)
        # This is triky we need the form
        # [[P11*X, P12*Y, P13*Z, P14, 0, 0, 0, 0, -u*P31*X, -u*P32*Y, -uP33*Z],
        #  [0, 0, 0, 0, P21*X, P22*y, P23*Z, P24, -u*P31*X, -u*P32*Y, -uP33*Z]]
        # The following black magic will do the trick
        A[0::2] = np.concatenate((XYZ, np.ones((n, 1)), np.zeros(
            (n, 4)), -np.multiply(np.multiply(np.ones(
                (n, 3)), uv[:, 0]), XYZ)),
                                 axis=1)
        A[1::2] = np.concatenate((np.zeros((n, 4)), XYZ, np.ones(
            (n, 1)), -np.multiply(np.multiply(np.ones(
                (n, 3)), uv[:, 1]), XYZ)),
                                 axis=1)

        # This is simple, we need all uvs flattened to a column vector
        B = np.reshape(uv, (2 * n, 1))
        return A, B

    def _select_points(self, points3d, points2d, n=4):
        # Random sample gives back unique values
        sel = random.sample(range(0, len(points3d)), n)
        sel3d = points3d[sel, :]
        sel2d = points2d[sel, :]
        self.sel = sel
        return sel3d, sel2d

    def _guess_transformation(self, points3d, points2d):
        max_matches = 0
        inliers = None
        points2d_est = None
        # Try to find the best match within n tries
        for i in range(0, 3000):
            sel3d, sel2d = self._select_points(points3d, points2d, 10)
            # We can't do a direct linear least square, we first need to create
            # the lineare equation matrix see robotics and control 332 and camcald.m
            # from the corresponding Matlab toolbox
            A, B = self._convert_for_equation(sel3d[:, 0:3], sel2d[:, 0:2])
            # least square should solve the problem for us
            res = np.linalg.lstsq(A, B, rcond=None)
            # Now we have all unknown parameters and we have to bring it to
            # the normal 3x4 matrix. The last parameter C34 is 1!
            P = np.reshape(np.concatenate((res[0], [[1]])), (3, 4))

            # Correct P34 to its actual value
            scale = np.linalg.norm(P[2, 0:3])
            P = P / scale

            points2d_transformed = np.transpose(
                np.matmul(P, np.transpose(points3d)))
            points2d_transformed = points2d_transformed / points2d_transformed[:,
                                                                               2]
            points2d_diff = points2d - points2d_transformed
            reprojection_error = list(
                map(lambda diff: np.linalg.norm(diff), points2d_diff))
            inliers = [
                i for i, err in enumerate(reprojection_error) if err < 9
            ]
            matches = len(inliers)
            if matches > max_matches:
                intrinsic, extrinsic = self._rq(P[0:3, 0:3])
                intrinsic = np.multiply(intrinsic, 1 / intrinsic[2, 2])

                if math.fabs(intrinsic[0, 1]) > 10:
                    continue

                self._P = P
                self._intrinsic = np.asarray(intrinsic)
                t = np.linalg.lstsq(intrinsic, P[:, 3], rcond=None)[0]
                self._extrinsic = np.asarray(
                    np.concatenate((extrinsic, t), axis=1))
                max_matches = matches
                self._inliers = inliers
                points2d_est = points2d_transformed
                self.logger.debug(matches)
                self.logger.debug(sorted(inliers))
        self.logger.debug("i: " + str(i))
        self.logger.debug("Max matches: " + str(max_matches))
        self.logger.debug("Match percentage: " +
                          str((max_matches / len(points2d)) * 100))
        self.logger.debug("Found transformation matrix: {}".format(P))

        # Make rq matrix decomposition, transformation is not taken into account
        self.logger.debug("Intrinsic: {}\nExtrinsic: {}".format(
            intrinsic, self._extrinsic))

        return points2d_est

    def _guess_distortion_lin(self, points2d_are, points2d_est, f, c):
        # uv = [u, v]
        uv = np.asarray((points2d_est[:, 0:2]) - c)
        xy = (uv) / f
        radius = np.linalg.norm(xy, axis=(1))
        x = xy[:, 0]
        y = xy[:, 1]
        # Because uv[:,0] is a onedimensional array we need to transpose M1 so
        # that we have column vectors
        M1 = np.transpose(
            np.array([
                x * radius**2, x * radius**4, x * radius**6, 2 * x * y,
                radius**2 + 2 * x**2
            ]))
        M2 = np.transpose(
            np.array([
                y * radius**2, y * radius**4, y * radius**6,
                radius**2 + 2 * y**2, 2 * y * x
            ]))
        M = np.zeros((M1.shape[0] * 2, M1.shape[1]))
        M[0::2] = M1 * f[0]
        M[1::2] = M2 * f[1]
        delta = points2d_are[:, 0:2] - points2d_est[:, 0:2]
        delta = delta.reshape(delta.shape[0] * 2, 1)
        distortion = np.linalg.lstsq(M, delta, rcond=-1)
        self.logger.debug("Distortion: {}".format(distortion[0]))
        return distortion

    def estimate(self):
        self._points2d_inliers = self._points2d
        self._points3d_inliers = self._points3d

        # Guess initial intrinsic and extrinsic mat with linear least squares
        points2d_est = self._guess_transformation(self._points3d,
                                                  self._points2d)
        self._points2d_inliers = self._points2d[self._inliers]
        self._points3d_inliers = self._points3d[self._inliers]

        # If we don't use the full intrinsic and extrensic matrix,
        # we can safe a lot of parameters to optimize! We pay that with
        # the overhead of calculate sin/cos/tan
        fx = self._intrinsic[0, 0]
        fy = self._intrinsic[1, 1]
        cx = self._intrinsic[0, 2]
        cy = self._intrinsic[1, 2]

        points2d_est_inliers = points2d_est[self._inliers]
        # Calculate angles
        rot_x = np.arctan2(self._extrinsic[1, 2], self._extrinsic[2, 2])
        rot_y = np.arctan2(
            self._extrinsic[0, 2],
            np.sqrt(self._extrinsic[1, 2]**2 + self._extrinsic[2, 2]**2))
        rot_z = np.arctan2(self._extrinsic[0, 1], self._extrinsic[0, 0])
        tx = self._extrinsic[0, 3]
        ty = self._extrinsic[1, 3]
        tz = self._extrinsic[2, 3]

        # Create an array from the intrinsic, extrinsic and k0-k2, p0-p1
        x0 = [
            fx, fy, cx, cy, rot_x, rot_y, rot_z, tx, ty, tz, 0.0, 0.0, 0.0,
            0.0, 0.0
        ]
        self.logger.debug("x0: {}".format(x0))
        res = opt.least_squares(self._loss_full,
                                x0,
                                method='lm',
                                max_nfev=self._max_iter)

        # If fy is < 0 our thetaz points in the wrong direction
        if res.x[1] < 0:
            self.logger.debug("Fix fy")
            res.x[1] = res.x[1] * -1
            res.x[5] = res.x[5] - math.pi

        cameraparams = {}
        cameraparams['f'] = [res.x[0], res.x[1]]
        cameraparams['c'] = [res.x[2], res.x[3]]
        cameraparams['theta'] = [res.x[4], res.x[5], res.x[6]]
        cameraparams['t'] = [res.x[7], res.x[8], res.x[9]]
        cameraparams['k'] = [res.x[10], res.x[11], res.x[12]]
        cameraparams['p'] = [res.x[13], res.x[14]]

        return cameraparams, res
def search_standard(scenes, scene_to_print):
    print ("Perform a standard hill-climbing search, where coarse steps are\n"
           "taken until some stopping condition occurs, at which point the\n"
           "movement is reversed, at which point fine steps are taken to\n"
           "maximize the focus value. This is the method described in\n"
           "[He2003] and [Li2005].\n\n"
           "To visualize the steps taken for simulation of a specific scene,\n"
           "use the command-line argument --scene-to-print=something.txt")

    step_size = 8

    data_rows = [("filename", "success %", "steps")]

    # Redirect stdout to a file for printing R script.
    orig_stdout = sys.stdout
    file_to_print = open("comparison.R", "w+")
    sys.stdout = file_to_print

    total_success = 0

    for scene in scenes:
        success_count = 0
        total_step_count = 0

        initial_positions = range(0, scene.step_count - step_size)
        for initial_position in initial_positions:
            camera = CameraModel(scene, initial_position,
                simulate_backlash=simulate_backlash, 
                simulate_noise=simulate_noise)

            first_measure = camera.last_fmeasure()
            camera.move_coarse(Direction("right"))

            # Determine whether to start moving left or right.
            if camera.last_fmeasure() < first_measure:
                direction = Direction("left")
            else:
                direction = Direction("right")

            # If the first step decreases focus value, switch direction.
            # This is a simple backtracking, basically.
            first_measure = camera.last_fmeasure()
            camera.move_coarse(direction)
            if camera.last_fmeasure() < first_measure:
                direction = direction.reverse()

            # Sweep
            max_value = camera.last_fmeasure()
            while not camera.will_hit_edge(direction):
                camera.move_coarse(direction)
                max_value = max(max_value, camera.last_fmeasure())

                # Have we found a peak?
                if camera.last_fmeasure() < max_value * 0.9:
                    # Stop searching
                    break
                    
            # Hillclimb until we're back at the peak.
            while not camera.will_hit_edge(direction.reverse()):
                prev_measure = camera.last_fmeasure()
                camera.move_fine(direction.reverse())
                if prev_measure > camera.last_fmeasure():
                    camera.move_fine(direction)
                    break

            # Record if we succeeded.
            if scene.distance_to_closest_peak(camera.last_position()) <= 1:
                success_count += 1
                evaluation = "succeeded"
            else:
                evaluation = "failed"
            
            if scene.filename == scene_to_print:
                camera.print_script(evaluation)

            total_step_count += camera.steps_taken

        success = float(success_count) / len(initial_positions) * 100
        line = (scene.name, 
                "%.1f" % success, 
                "%.1f" % (float(total_step_count) / len(initial_positions)))
        data_rows.append(line)
        total_success += success

    # Restore original stdout
    sys.stdout = orig_stdout
    file_to_print.close()

    print_aligned_data_rows(data_rows)
    print "average success : %.1f" % (total_success / len(scenes))