def _get_cameramodel(self, noise=True): params = self.cameraparams cameramodel = CameraModel(self.resolution, params) cameramodel.update_point_cloud(self.mod.points) points2d = cameramodel.points2d if noise: points2d = self._add_noise(points2d) points2d = self._do_missclassifcation(points2d) cameramodel.points2d = points2d return cameramodel
def __init__(self, resolution, points2d, points3d, logger=None): self._points2d = points2d self._points3d = points3d self._points2d_inliers = [] self._points3d_inliers = [] self._inliers = [] self._resolution = resolution self._reduce_dist_param = False self._cm = CameraModel(resolution) self._max_iter = 5000 if logger == None: self.logger = logging.getLogger() else: self.logger = logger
def run_application(args): poc = ProofOfConcept(args) image = poc.get_image() plt.imshow(image) cameraparams = poc.cameraparams cameraparams_est, res, cme = poc.estimate() print("Cost (reduced data): " + str(res.cost)) print("Optimality, gradient value (reduced data): " + str(res.optimality)) inliers = np.array(cme._inliers) points = np.array(poc.mod.points) print("Inliers in percent: " + str(100.0 * inliers.shape[0] / points.shape[0])) print( array_string([ " ", "fx", "fy", "cx", "cy", "thetax", "thetay", "thetaz", "tx", "ty", "tz", "k1", "k2", "k3", "p1", "p2" ], 8)) print("shuld: " + cameraparams.get_string(8)) print("is: " + cameraparams_est.get_string(8)) dist = np.array(cameraparams.get_as_array()) - np.array( cameraparams_est.get_as_array()) print("diff: " + array_string(np.round(dist, 2), 8)) print("Tot. diff: {}".format(np.linalg.norm(dist))) cm_est = CameraModel(poc.resolution, cameraparams_est) cm_est.update_point_cloud(poc.mod.points) image_est = cm_est.get_image() image1 = np.zeros((poc.resolution[0], poc.resolution[1], 3)) image2 = np.zeros((poc.resolution[0], poc.resolution[1], 3)) image1[:, :, 2] = image_est image2[:, :, 1] = image kernel = np.ones((3, 3), np.uint8) diff_img = image1 + image2 diff_img = cv2.dilate(diff_img, kernel, iterations=1) cv2.imwrite("diff_img.png", diff_img * 255) plt.figure() plt.imshow(diff_img) plt.show()
def __init__(self, params, scene, initial_pos): self.scene = scene self.params = params self.initial_pos = initial_pos self.status = "none" self.camera = CameraModel(scene, initial_pos, simulate_backlash=self.params.backlash, simulate_noise=self.params.noise) if params.perfect_classification is None: self.perfect_classification = None else: self.perfect_classification = \ params.perfect_classification[scene.filename]
class Simulator(object): def __init__(self, params, scene, initial_pos): self.scene = scene self.params = params self.initial_pos = initial_pos self.status = "none" self.camera = CameraModel(scene, initial_pos, simulate_backlash=self.params.backlash, simulate_noise=self.params.noise) if params.perfect_classification is None: self.perfect_classification = None else: self.perfect_classification = \ params.perfect_classification[scene.filename] def _do_local_search(self, direction, rev_direction): """Perform a local search (incremental hillclimbing in a given direction). The hillclimbing has a tolerance of two steps. i.e., Up to two steps that don't increase the focus value can be taken before we stop climbing.""" while not self.camera.will_hit_edge(direction): prev_fmeasure = self.camera.last_fmeasure() self.camera.move_fine(direction) if self.camera.last_fmeasure() < prev_fmeasure: if (two_step_tolerance and not self.camera.will_hit_edge(direction)): # We've seen a decrease. Consider moving one extra step. prev_fmeasure = self.camera.last_fmeasure() self.camera.move_fine(direction) if (self.camera.last_fmeasure() < prev_fmeasure): # Seen a decrease again, backtrack and stop. self.camera.move_fine(rev_direction, 2) break else: # Backtrack and stop. self.camera.move_fine(rev_direction) break def _go_to_max(self): """Return to the location of the largest focus value seen so far and perform a local search to find the exact location of the peak.""" current_pos = self.camera.last_position() maximum_pos = max(self.camera.visited_positions, key=(lambda pos : self.camera.get_fvalue(pos))) if maximum_pos < current_pos: direction = Direction("left") elif maximum_pos > current_pos: direction = Direction("right") elif current_pos < self.camera.visited_positions[-2]: direction = Direction("left") else: direction = Direction("right") rev_direction = direction.reverse() # Take as many coarse steps as needed to go back to the maximum # without going over it. distance = abs(current_pos - maximum_pos) coarse_steps = distance / 8 self.camera.move_coarse(direction, coarse_steps) # Keep going in fine steps to see if we can find a higher position. start_pos = self.camera.last_position() self._do_local_search(direction, rev_direction) # If we didn't move further, we might want to look in the other # direction too. if start_pos == self.camera.last_position(): self._do_local_search(rev_direction, direction) self.status = "foundmax" def _get_first_direction(self): """Direction in which we should start sweeping initially.""" first, second, third = self.camera.get_fvalues( self.camera.visited_positions[-3:]) norm_lens_pos = float(self.initial_pos) / (self.scene.step_count - 1) evaluator = featuresfirststep.firststep_feature_evaluator( first, second, third, norm_lens_pos) return Direction(evaluatetree.evaluate_tree( self.params.left_right_tree, evaluator)) def _sweep(self, direction): """Sweep the lens in one direction and return a tuple (success state, number of steps taken) along the way. """ initial_position = self.camera.last_position() sweep_fvalues = [ self.camera.last_fmeasure() ] while not self.camera.will_hit_edge(direction): # Move the lens forward. self.camera.move_coarse(direction) sweep_fvalues.append(self.camera.last_fmeasure()) # Take at least two steps before we allow turning back. if len(sweep_fvalues) < 3: continue if self.perfect_classification is None: # Obtain the ML classification at the new lens position. evaluator = featuresturn.action_feature_evaluator( sweep_fvalues, self.scene.step_count) classification = evaluatetree.evaluate_tree( self.params.action_tree, evaluator) else: key = featuresturn.make_key(str(direction), initial_position, self.camera.last_position()) classification = self.perfect_classification[key] if classification != "continue": assert (classification == "turn_peak" or classification == "backtrack") return classification, len(sweep_fvalues) - 1 # We've reached an edge, but the decision tree still does not want # to turn back, so what do we do now? # After thinking a lot about it, I think the best thing to do is to # introduce a condition manually. It's a bit ad-hoc, but we really need # to be able to handle this case robustly, as there are lot of cases # (i.e., landscape shots) where peaks will be at the edge. min_val = min(self.camera.get_fvalues(self.camera.visited_positions)) max_val = max(self.camera.get_fvalues(self.camera.visited_positions)) if float(min_val) / max_val > 0.8: return "backtrack", len(sweep_fvalues) - 1 else: return "turn_peak", len(sweep_fvalues) - 1 def _backtrack(self, previous_direction, step_count): """From the current lens position, go back to the lens position we were at before and look on the other side.""" new_direction = previous_direction.reverse() # Go back to where we started. self.camera.move_coarse(new_direction, step_count) # Sweep again the other way. result, step_count = self._sweep(new_direction) if result == "turn_peak": self._go_to_max() elif result == "backtrack": # If we need to backtrack a second time, we failed. self.status = "failed" else: assert False def evaluate(self): """For every scene and every lens position, run a simulation and store the statistics.""" # Take the first two steps, as to get three focus measures with which # to decide which direction to sweep. self.camera.move_fine(Direction("right"), 2) # Decide initial direction in which to look. direction = self._get_first_direction() # Search in that direction. result, step_count = self._sweep(direction) if result == "turn_peak": self._go_to_max() elif result == "backtrack": self._backtrack(direction, step_count) else: assert False def is_true_positive(self): """Whether a peak was found and the peak is close to a real peak.""" return (self.status == "foundmax" and self.scene.distance_to_closest_peak( self.camera.last_position()) <= 1) def is_false_positive(self): """Whether a peak was found and the peak not close to a real peak.""" return (self.status == "foundmax" and self.scene.distance_to_closest_peak( self.camera.last_position()) > 1) def is_true_negative(self): """Whether we failed to find a peak and we didn't come close to a real peak.""" return (self.status == "failed" and all(self.scene.distance_to_closest_peak(pos) > 1 for pos in self.camera.visited_positions)) def is_false_negative(self): """Whether we failed to find a peak but we did come close to a real peak.""" return (self.status == "failed" and any(self.scene.distance_to_closest_peak(pos) <= 1 for pos in self.camera.visited_positions)) def get_evaluation(self): """Return whether a simulation for this scene starting at the given lens position gave a true/false positive/negative. """ if self.is_true_positive(): return "true positive" if self.is_false_positive(): return "false positive" if self.is_true_negative(): return "true negative" if self.is_false_negative(): return "false negative"
class CameraModelEstimator: """docstring for CameraModelEstimator""" def __init__(self, resolution, points2d, points3d, logger=None): self._points2d = points2d self._points3d = points3d self._points2d_inliers = [] self._points3d_inliers = [] self._inliers = [] self._resolution = resolution self._reduce_dist_param = False self._cm = CameraModel(resolution) self._max_iter = 5000 if logger == None: self.logger = logging.getLogger() else: self.logger = logger def _loss_full(self, x): fx = x[0] fy = x[1] cx = x[2] cy = x[3] thetax = x[4] thetay = x[5] thetaz = x[6] tx = x[7] ty = x[8] tz = x[9] k1 = x[10] k2 = x[11] k3 = x[12] p1 = x[13] p2 = x[14] self._cm.set_c([cx, cy]) self._cm.set_f([fx, fy]) self._cm.create_extrinsic([thetax, thetay, thetaz], [tx, ty, tz]) self._cm.add_distortion([k1, k2, k3], [p1, p2]) self._cm.update_point_cloud(self._points3d_inliers) points2d_est = self._cm.points2d dists = points2d_est - self._points2d_inliers return dists.flatten() def _rq(self, M): # User algorithm for RQ from QR decomposition from # https://math.stackexchange.com/questions/1640695/rq-decomposition P = np.fliplr(np.diag(np.ones(len(M)))) Mstar = np.matmul(P, M) Qstar, Rstar = np.linalg.qr(np.transpose(Mstar)) Q = np.matmul(P, np.transpose(Qstar)) R = np.matmul(P, np.matmul(np.transpose(Rstar), P)) # Now make the diagonal of R positiv. This can be done, because # we know that f, and c is allways positive T = np.diag(np.sign(np.diag(R))) R = np.matmul(R, T) Q = np.matmul(Q, T) return R, Q def _convert_for_equation(self, XYZ, uv): n = XYZ.shape[0] # XYZ and uv must be in rows not in columns! A = np.zeros((2 * n, 11)) uv = np.mat(uv) # This is triky we need the form # [[P11*X, P12*Y, P13*Z, P14, 0, 0, 0, 0, -u*P31*X, -u*P32*Y, -uP33*Z], # [0, 0, 0, 0, P21*X, P22*y, P23*Z, P24, -u*P31*X, -u*P32*Y, -uP33*Z]] # The following black magic will do the trick A[0::2] = np.concatenate((XYZ, np.ones((n, 1)), np.zeros( (n, 4)), -np.multiply(np.multiply(np.ones( (n, 3)), uv[:, 0]), XYZ)), axis=1) A[1::2] = np.concatenate((np.zeros((n, 4)), XYZ, np.ones( (n, 1)), -np.multiply(np.multiply(np.ones( (n, 3)), uv[:, 1]), XYZ)), axis=1) # This is simple, we need all uvs flattened to a column vector B = np.reshape(uv, (2 * n, 1)) return A, B def _select_points(self, points3d, points2d, n=4): # Random sample gives back unique values sel = random.sample(range(0, len(points3d)), n) sel3d = points3d[sel, :] sel2d = points2d[sel, :] self.sel = sel return sel3d, sel2d def _guess_transformation(self, points3d, points2d): max_matches = 0 inliers = None points2d_est = None # Try to find the best match within n tries for i in range(0, 3000): sel3d, sel2d = self._select_points(points3d, points2d, 10) # We can't do a direct linear least square, we first need to create # the lineare equation matrix see robotics and control 332 and camcald.m # from the corresponding Matlab toolbox A, B = self._convert_for_equation(sel3d[:, 0:3], sel2d[:, 0:2]) # least square should solve the problem for us res = np.linalg.lstsq(A, B, rcond=None) # Now we have all unknown parameters and we have to bring it to # the normal 3x4 matrix. The last parameter C34 is 1! P = np.reshape(np.concatenate((res[0], [[1]])), (3, 4)) # Correct P34 to its actual value scale = np.linalg.norm(P[2, 0:3]) P = P / scale points2d_transformed = np.transpose( np.matmul(P, np.transpose(points3d))) points2d_transformed = points2d_transformed / points2d_transformed[:, 2] points2d_diff = points2d - points2d_transformed reprojection_error = list( map(lambda diff: np.linalg.norm(diff), points2d_diff)) inliers = [ i for i, err in enumerate(reprojection_error) if err < 9 ] matches = len(inliers) if matches > max_matches: intrinsic, extrinsic = self._rq(P[0:3, 0:3]) intrinsic = np.multiply(intrinsic, 1 / intrinsic[2, 2]) if math.fabs(intrinsic[0, 1]) > 10: continue self._P = P self._intrinsic = np.asarray(intrinsic) t = np.linalg.lstsq(intrinsic, P[:, 3], rcond=None)[0] self._extrinsic = np.asarray( np.concatenate((extrinsic, t), axis=1)) max_matches = matches self._inliers = inliers points2d_est = points2d_transformed self.logger.debug(matches) self.logger.debug(sorted(inliers)) self.logger.debug("i: " + str(i)) self.logger.debug("Max matches: " + str(max_matches)) self.logger.debug("Match percentage: " + str((max_matches / len(points2d)) * 100)) self.logger.debug("Found transformation matrix: {}".format(P)) # Make rq matrix decomposition, transformation is not taken into account self.logger.debug("Intrinsic: {}\nExtrinsic: {}".format( intrinsic, self._extrinsic)) return points2d_est def _guess_distortion_lin(self, points2d_are, points2d_est, f, c): # uv = [u, v] uv = np.asarray((points2d_est[:, 0:2]) - c) xy = (uv) / f radius = np.linalg.norm(xy, axis=(1)) x = xy[:, 0] y = xy[:, 1] # Because uv[:,0] is a onedimensional array we need to transpose M1 so # that we have column vectors M1 = np.transpose( np.array([ x * radius**2, x * radius**4, x * radius**6, 2 * x * y, radius**2 + 2 * x**2 ])) M2 = np.transpose( np.array([ y * radius**2, y * radius**4, y * radius**6, radius**2 + 2 * y**2, 2 * y * x ])) M = np.zeros((M1.shape[0] * 2, M1.shape[1])) M[0::2] = M1 * f[0] M[1::2] = M2 * f[1] delta = points2d_are[:, 0:2] - points2d_est[:, 0:2] delta = delta.reshape(delta.shape[0] * 2, 1) distortion = np.linalg.lstsq(M, delta, rcond=-1) self.logger.debug("Distortion: {}".format(distortion[0])) return distortion def estimate(self): self._points2d_inliers = self._points2d self._points3d_inliers = self._points3d # Guess initial intrinsic and extrinsic mat with linear least squares points2d_est = self._guess_transformation(self._points3d, self._points2d) self._points2d_inliers = self._points2d[self._inliers] self._points3d_inliers = self._points3d[self._inliers] # If we don't use the full intrinsic and extrensic matrix, # we can safe a lot of parameters to optimize! We pay that with # the overhead of calculate sin/cos/tan fx = self._intrinsic[0, 0] fy = self._intrinsic[1, 1] cx = self._intrinsic[0, 2] cy = self._intrinsic[1, 2] points2d_est_inliers = points2d_est[self._inliers] # Calculate angles rot_x = np.arctan2(self._extrinsic[1, 2], self._extrinsic[2, 2]) rot_y = np.arctan2( self._extrinsic[0, 2], np.sqrt(self._extrinsic[1, 2]**2 + self._extrinsic[2, 2]**2)) rot_z = np.arctan2(self._extrinsic[0, 1], self._extrinsic[0, 0]) tx = self._extrinsic[0, 3] ty = self._extrinsic[1, 3] tz = self._extrinsic[2, 3] # Create an array from the intrinsic, extrinsic and k0-k2, p0-p1 x0 = [ fx, fy, cx, cy, rot_x, rot_y, rot_z, tx, ty, tz, 0.0, 0.0, 0.0, 0.0, 0.0 ] self.logger.debug("x0: {}".format(x0)) res = opt.least_squares(self._loss_full, x0, method='lm', max_nfev=self._max_iter) # If fy is < 0 our thetaz points in the wrong direction if res.x[1] < 0: self.logger.debug("Fix fy") res.x[1] = res.x[1] * -1 res.x[5] = res.x[5] - math.pi cameraparams = {} cameraparams['f'] = [res.x[0], res.x[1]] cameraparams['c'] = [res.x[2], res.x[3]] cameraparams['theta'] = [res.x[4], res.x[5], res.x[6]] cameraparams['t'] = [res.x[7], res.x[8], res.x[9]] cameraparams['k'] = [res.x[10], res.x[11], res.x[12]] cameraparams['p'] = [res.x[13], res.x[14]] return cameraparams, res
def search_standard(scenes, scene_to_print): print ("Perform a standard hill-climbing search, where coarse steps are\n" "taken until some stopping condition occurs, at which point the\n" "movement is reversed, at which point fine steps are taken to\n" "maximize the focus value. This is the method described in\n" "[He2003] and [Li2005].\n\n" "To visualize the steps taken for simulation of a specific scene,\n" "use the command-line argument --scene-to-print=something.txt") step_size = 8 data_rows = [("filename", "success %", "steps")] # Redirect stdout to a file for printing R script. orig_stdout = sys.stdout file_to_print = open("comparison.R", "w+") sys.stdout = file_to_print total_success = 0 for scene in scenes: success_count = 0 total_step_count = 0 initial_positions = range(0, scene.step_count - step_size) for initial_position in initial_positions: camera = CameraModel(scene, initial_position, simulate_backlash=simulate_backlash, simulate_noise=simulate_noise) first_measure = camera.last_fmeasure() camera.move_coarse(Direction("right")) # Determine whether to start moving left or right. if camera.last_fmeasure() < first_measure: direction = Direction("left") else: direction = Direction("right") # If the first step decreases focus value, switch direction. # This is a simple backtracking, basically. first_measure = camera.last_fmeasure() camera.move_coarse(direction) if camera.last_fmeasure() < first_measure: direction = direction.reverse() # Sweep max_value = camera.last_fmeasure() while not camera.will_hit_edge(direction): camera.move_coarse(direction) max_value = max(max_value, camera.last_fmeasure()) # Have we found a peak? if camera.last_fmeasure() < max_value * 0.9: # Stop searching break # Hillclimb until we're back at the peak. while not camera.will_hit_edge(direction.reverse()): prev_measure = camera.last_fmeasure() camera.move_fine(direction.reverse()) if prev_measure > camera.last_fmeasure(): camera.move_fine(direction) break # Record if we succeeded. if scene.distance_to_closest_peak(camera.last_position()) <= 1: success_count += 1 evaluation = "succeeded" else: evaluation = "failed" if scene.filename == scene_to_print: camera.print_script(evaluation) total_step_count += camera.steps_taken success = float(success_count) / len(initial_positions) * 100 line = (scene.name, "%.1f" % success, "%.1f" % (float(total_step_count) / len(initial_positions))) data_rows.append(line) total_success += success # Restore original stdout sys.stdout = orig_stdout file_to_print.close() print_aligned_data_rows(data_rows) print "average success : %.1f" % (total_success / len(scenes))