def _train(self, train_img, deform_param_gen): img_gray = cv2.cvtColor(train_img, cv2.COLOR_BGR2GRAY) H, W = np.shape(img_gray)[:2] self.logger.debug("Training image size (w, h) = ({}, {})".format(W, H)) corners = list(get_stable_corners(img_gray, self._max_train_corners)) draw_points(train_img, corners, title="Stable corners") cv2.waitKey(10) self._classes_count = len(corners) self.logger.debug( "Allocating probability matrix: ferns x classes x K = {} x {} x {}" .format(len(self._ferns), self._classes_count, self._K)) self._fern_p = np.zeros( (len(self._ferns), self._classes_count, self._K)) self.key_points = [] skipped = 0 train_patches = 0 title = "Training {} classes".format(self._classes_count) for R, _, img in iter_timer( generate_deformations(img_gray, deform_param_gen), title, False): new_corners = flip_points(corners) t = [[1]] * len(new_corners) new_corners = np.transpose(np.hstack((new_corners, t))) deformed_corners = flip_points( np.asarray(np.transpose(np.dot(R, new_corners)))) for class_idx, (corner, deformed_corner) in enumerate( zip(corners, deformed_corners)): self.key_points.append(corner) cy, cx = deformed_corner if not (0 <= cy <= H and 0 <= cx <= W): skipped += 1 continue train_patches += 1 patch = generate_patch(img, deformed_corner, self._patch_size) for fern_idx, fern in enumerate(self._ferns): k = fern.calculate(patch) assert 0 <= k < self._K, "WTF!!!" self._fern_p[fern_idx, class_idx, k] += 1 self.logger.debug("skipped {} / {} deformations".format( skipped, train_patches)) Nr = 1 for fern_idx in iter_timer(range(len(self._ferns)), title="Calculating probs"): for cls_idx in range(self._classes_count): Nc = np.sum(self._fern_p[fern_idx, cls_idx, :]) self._fern_p[fern_idx, cls_idx, :] += Nr self._fern_p[fern_idx, cls_idx, :] /= Nc + self._K * Nr self._fern_p = np.log(self._fern_p)
def solve(world_points_in, image_points_in, pixel_scale, annotate_image=None): """ Find a camera's orientation given a set of world coordinates and corresponding set of camera coordinates. world_points: Dict mapping point names to triples corresponding with world x, y, z coordinates. image_points: Dict mapping point names to triples corresponding with camera x, y coordinates. Coordinates are translated such that 0, 0 corresponds with the centre of the image. Return: 4x4 matrix representing the camera's orientation. """ assert set(world_points_in.keys()) >= set(image_points_in.keys()) keys = sorted(list(image_points_in.keys())) assert len(keys) >= 4 world_points = hstack([matrix(list(world_points_in[k])).T for k in keys]) image_points = hstack([matrix(list(image_points_in[k]) + [pixel_scale]).T for k in keys]) image_points = image_points / pixel_scale control_indices = choose_control_points(world_points) C = make_coeff_matrix(world_points, control_indices) M = make_M(image_points, C) eig_vals, eig_vecs = numpy.linalg.eig(M.T * M) V = eig_vecs.T[eig_vals.argsort()].T world_ctrl_points = util.col_slice(world_points, control_indices) b1 = calc_beta_case_1(V[:, :1], world_ctrl_points) b2 = calc_beta_case_2(V[:, :2], world_ctrl_points) b3 = calc_beta_case_3(V[:, :3], world_ctrl_points) outs = [] errs = [] for b in [b1, b2, b3]: x = V[:, :b.shape[1]] * b.T x = x.reshape((4, 3)) R, offs = util.orientation_from_correspondences(world_ctrl_points, x.T) outs.append((R, offs)) e = calc_reprojection_error(R, offs, world_points, image_points) print "Reprojection error = %f" % e errs.append(e) R, offs = outs[array(errs).argmin()] if annotate_image: P = hstack([R, offs]) all_keys = list(world_points_in.keys()) world_points_mat = hstack([matrix(list(world_points_in[k]) + [1.0]).T for k in all_keys]) image_points_mat = P * world_points_mat image_points_mat = matrix([[r[0,0]/r[0,2], r[0,1]/r[0,2]] for r in image_points_mat.T]).T util.draw_points(annotate_image, dict(zip(all_keys, list(image_points_mat.T)))) return R, offs
def solve(world_points, image_points, annotate_image=None): """ Find a camera's orientation and intrinsic parameters given a set of world coordinates and corresponding set of camera coordinates. world_points: Dict mapping point names to triples corresponding with world x, y, z coordinates. image_points: Dict mapping point names to triples corresponding with camera x, y coordinates. Coordinates are translated such that 0, 0 corresponds with the centre of the image. Return: 4x4 matrix representing the camera's orientation, and a pixel pixel scale. """ assert set(world_points.keys()) >= set(image_points.keys()) keys = list(image_points.keys()) # Need at least 6 points (12 DOF) to solve the 3x4 matrix assert len(keys) >= 6 M = vstack([correspondence_matrix(world_points[key], image_points[key]) for key in keys]) eig_vals, eig_vecs = numpy.linalg.eig(M.T * M) P = (eig_vecs.T[eig_vals.argmin()]).T P = P.reshape((3, 4)) K, R = map(matrix, scipy.linalg.rq(P[:, :3])) t = K.I * P[:, 3:] R = hstack((R, t)) K = K / K[2,2] #K[0, 1] = 0.0 #K[0, 2] = 0.0 #K[1, 2] = 0.0 P = K * R if annotate_image: all_keys = list(world_points.keys()) world_points_mat = hstack([matrix(list(world_points[k]) + [1.0]).T for k in all_keys]) image_points_mat = P * world_points_mat image_points_mat = matrix([[r[0,0]/r[0,2], r[0,1]/r[0,2]] for r in image_points_mat.T]).T util.draw_points(annotate_image, dict(zip(all_keys, list(image_points_mat.T)))) return K, R
def test_ori(h_crop, lane_agent, ori_image, test_images, ratio_w, ratio_h, draw_type, thresh=p.threshold_point): # p.threshold_point:0.81 result = lane_agent.predict_lanes_test(test_images) confidences, offsets, instances = result[-1] num_batch = len(test_images) out_x = [] out_y = [] out_images = [] for i in range(num_batch): # test on test data set image = deepcopy(test_images[i]) image = np.rollaxis(image, axis=2, start=0) image = np.rollaxis(image, axis=2, start=0) * 255.0 image = image.astype(np.uint8).copy() confidence = confidences[i].view(p.grid_y, p.grid_x).cpu().data.numpy() offset = offsets[i].cpu().data.numpy() offset = np.rollaxis(offset, axis=2, start=0) offset = np.rollaxis(offset, axis=2, start=0) instance = instances[i].cpu().data.numpy() instance = np.rollaxis(instance, axis=2, start=0) instance = np.rollaxis(instance, axis=2, start=0) raw_x, raw_y = generate_result(confidence, offset, instance, thresh) # in_x, in_y = eliminate_fewer_points(raw_x, raw_y) in_x, in_y = util.sort_along_y(in_x, in_y) if draw_type == 'line': result_image = util.draw_lines_ori(in_x, in_y, ori_image, ratio_w, ratio_h) # 将最后且后处理后的坐标点绘制与原图上. elif draw_type == 'point': result_image = util.draw_point_ori(in_x, in_y, ori_image, ratio_w, ratio_h, h_crop) # 将最后且后处理后的坐标点绘制与原图上. else: result_image = util.draw_points(in_x, in_y, ori_image, ratio_w, ratio_h) # 将最后且后处理后的坐标点绘制与原图上. out_x.append(in_x) out_y.append(in_y) out_images.append(result_image) return out_x, out_y, out_images
def test(lane_agent, test_images, thresh=p.threshold_point, color=None): result = lane_agent.predict_lanes_test(test_images) confidences, offsets, instances = result[-1] num_batch = len(test_images) out_x = [] out_y = [] out_images = [] for i in range(num_batch): # test on test data set # image = deepcopy(test_images[i]) image = test_images[i] image = np.rollaxis(image, axis=2, start=0) image = np.rollaxis(image, axis=2, start=0) * 255.0 image = image.astype(np.uint8).copy() confidence = confidences[i].view(p.grid_y, p.grid_x).cpu().data.numpy() offset = offsets[i].cpu().data.numpy() offset = np.rollaxis(offset, axis=2, start=0) offset = np.rollaxis(offset, axis=2, start=0) instance = instances[i].cpu().data.numpy() instance = np.rollaxis(instance, axis=2, start=0) instance = np.rollaxis(instance, axis=2, start=0) # generate point and cluster raw_x, raw_y = generate_result(confidence, offset, instance, thresh) # eliminate fewer points in_x, in_y = eliminate_fewer_points(raw_x, raw_y) # sort points along y in_x, in_y = util.sort_along_y(in_x, in_y) in_x, in_y = eliminate_out(in_x, in_y, confidence) #, deepcopy(image)) in_x, in_y = util.sort_along_y(in_x, in_y) in_x, in_y = eliminate_fewer_points(in_x, in_y) result_image = util.draw_points(in_x, in_y, image, color) #deepcopy(image)) out_x.append(in_x) out_y.append(in_y) out_images.append(result_image) return out_x, out_y, out_images
def solve(world_points_in, image_points_in, pixel_scale, annotate_image=None): """ Find a camera's orientation and pixel scale given a set of world coordinates and corresponding set of camera coordinates. world_points: Dict mapping point names to triples corresponding with world x, y, z coordinates. image_points: Array of dicts mapping point names to triples corresponding with camera x, y coordinates. Coordinates are translated such that 0, 0 corresponds with the centre of the image. One array element per source image. annotate_images: Optional array of images to annotate with the fitted points. Return: 4x4 matrix representing the camera's orientation, and a pixel pixel scale. """ assert set(world_points_in.keys()) >= set(image_points_in.keys()) keys = sorted(list(image_points_in.keys())) assert len(keys) >= 4 world_points = hstack([matrix(list(world_points_in[k])).T for k in keys]) # Choose a "good" set of 4 basis indices basis_indices = [0] basis_indices += [argmax([numpy.linalg.norm(world_points[:, i]) for i,k in enumerate(keys)])] def dist_from_line(idx): v = world_points[:, idx] - world_points[:, basis_indices[0]] d = world_points[:, basis_indices[1]] - world_points[:, basis_indices[0]] d = d / numpy.linalg.norm(d) v -= d * (d.T * v)[0, 0] return numpy.linalg.norm(v) basis_indices += [argmax([dist_from_line(i) for i,k in enumerate(keys)])] def dist_from_plane(idx): v = world_points[:, idx] - world_points[:, basis_indices[0]] a = world_points[:, basis_indices[1]] - world_points[:, basis_indices[0]] b = world_points[:, basis_indices[2]] - world_points[:, basis_indices[0]] d = matrix(cross(a.T, b.T).T) d = d / numpy.linalg.norm(d) return abs((d.T * v)[0, 0]) basis_indices += [argmax([dist_from_plane(i) for i,k in enumerate(keys)])] basis_indices = map(keys.index, ['12a', '11a', '9a', '12b']) basis = hstack(world_points[:, i] for i in basis_indices) image_points = hstack([matrix(list(image_points_in[k]) + [pixel_scale]).T for k in keys]) image_points = image_points / pixel_scale print "Basis = %s" % [keys[i] for i in basis_indices] # Choose coeffs such that basis * coeffs = P # where P is world_points relative to the first basis vector def sub_origin(M): return M - hstack([basis[:, :1]] * M.shape[1]) coeffs = sub_origin(basis[:, 1:]).I * sub_origin(world_points) # Compute matrix M and such that M * [z0, z1, z2, ... zN] = 0 # where zi are proportional to the Z-value of the i'th image point def M_for_image_point(idx): assert idx not in basis_indices out = matrix(zeros((3, len(keys)))) # Set d,e,f st: # d * (b[1] - b[0]) + e * (b[2] - b[0]) + f * (b[3] - b[0]) = # world_points[idx] - b[0] d, e, f = [coeffs[i, key_idx] for i in [0,1,2]] out[:, basis_indices[0]:][:,:1] = (1 - d - e - f) * image_points[:,basis_indices[0]:][:,:1] out[:, basis_indices[1]:][:,:1] = d * image_points[:,basis_indices[1]][:,:1] out[:, basis_indices[2]:][:,:1] = e * image_points[:,basis_indices[2]][:,:1] out[:, basis_indices[3]:][:,:1] = f * image_points[:,basis_indices[3]][:,:1] out[:, idx:][:,:1] = -image_points[:,idx][:,:1] return out M = vstack([M_for_image_point(key_idx) for key_idx in xrange(len(keys)) if key_idx not in basis_indices]) # Solve for Z by taking the eigenvector corresponding with the smallest # eigenvalue. eig_vals, eig_vecs = numpy.linalg.eig(M.T * M) Z = (eig_vecs.T[eig_vals.argmin()]).T print "Eig vecs: %s" % repr(eig_vecs) print "Eig vals: %s" % repr(eig_vals) print "Min idx: %d" % eig_vals.argmin() print "Z = %s" % repr(Z) print "M * Z = %s" % repr(M*Z) # Project points. The scale of the projected points will be wrong, and the # orientation is still unknown. camera_points = matrix(array(image_points) * array(vstack([Z.T] * 3))) print "Coeffs: %s" % repr(coeffs) print "Projected basis: %s" % repr(util.col_slice(camera_points, basis_indices + range(len(keys)))) print "World basis: %s" % repr(util.col_slice(world_points, basis_indices + range(len(keys)))) if annotate_image: image_points_mat = matrix([[r[0,0]/r[0,2], r[0,1]/r[0,2]] for r in camera_points.T]).T image_points_mat *= pixel_scale util.draw_points(annotate_image, dict(zip(["%f" % Z[i,0] for i in xrange(Z.shape[0])], list(image_points_mat.T)))) # Compute the rotation and scale from world space to camera space. def sub_first(M): return M - hstack([M[:, basis_indices[0]:][:,:1]] * M.shape[1]) P = sub_first(camera_points) * util.right_inverse(sub_first(world_points)) K, R = map(matrix, scipy.linalg.rq(P)) for i in xrange(3): if K[i,i] < 0: R[i:(i+1), :] = -R[i:(i+1), :] K[:, i:(i+1)] = -K[:, i:(i+1)] print "P = %s" % repr(P) print "K = %s" % repr(K) print "R = %s" % repr(R) scale = 3.0 / sum(K[i,i] for i in xrange(3)) t = scale * camera_points[:, basis_indices[0]:][:, :1] - R * world_points[:, basis_indices[0]:][:, :1] P = hstack((R, t)) # Annotate the image, if we've been asked to do so. if False and annotate_image: all_keys = list(world_points_in.keys()) world_points_mat = hstack([matrix(list(world_points_in[k]) + [1.0]).T for k in all_keys]) image_points_mat = P * world_points_mat image_points_mat = matrix([[r[0,0]/r[0,2], r[0,1]/r[0,2]] for r in image_points_mat.T]).T image_points_mat *= pixel_scale util.draw_points(annotate_image, dict(zip(all_keys, list(image_points_mat.T)))) return P
dino1_score[3] = score(3) dino1_score[5] = score(5) print("score computed") try: dino1_score_mask except: dino1_score_mask = [None]*6 dino1_score_mask[1] = np.zeros_like(dino1) dino1_score_mask[3] = np.zeros_like(dino1) dino1_score_mask[5] = np.zeros_like(dino1) for i in range(1,len(dino1)-1): for j in range(1,len(dino1[0])-1): for sigma in (1,3,5): if dino1_score[sigma][i,j] >= np.amax(dino1_score[sigma][i-1:i+2,j-1:j+2].flatten()): dino1_score_mask[sigma][i,j] = 1 if not i%100: print(i/len(dino1)) try: pts except: pts = list() for i in range(len(dino1_score_mask[5])): for j in range(len(dino1_score_mask[5][0])): if dino1_score_mask[5][i,j]: pts.append([dino1_score[5][i,j],(j,i)]) pts.sort(key=lambda x:x[0]) util.draw_points(dino1_color, [sublist[1] for sublist in pts[-50:]])
def solve(world_points_in, image_points, annotate_images=None, initial_matrices=None, initial_bd=0., initial_ps=3000., change_ps=False, change_bd=False, change_pos=True): """ Find a camera's orientation and pixel scale given a set of world coordinates and corresponding set of camera coordinates. world_points: Map of point names to triples corresponding with world (x, y, z) coordinates. image_points: Iterable of dicts mapping point names to pairs corresponding with camera x, y coordinates. Coordinates should be translated such that 0, 0 corresponds with the centre of the image, and Y coordinates increase going top to bottom. One element per source image. annotate_images: Optional iterable of images to annotate with the fitted points. initial_matrices: Optional iterable of initial rotation matrices. initial_bd: Optional initial barrel distortion to use. initial_ps: Optional initial pixel scale to use. change_ps: If True, allow the pixel scale (zoom) to be varied. Algorithm can be unstable if initial guess is inaccurate. change_bd: If True, allow the barrel distortion to be varied. Algorithm can be unstable if initial guess is inaccurate. change_pos: If True, allow the camera position to be varied. Return: 4x4 matrix representing the camera's orientation, and a pixel pixel scale. """ assert all(set(world_points_in.keys()) >= set(p.keys()) for p in image_points) keys = [list(p.keys()) for p in image_points] world_points = [hstack([matrix(list(world_points_in[k]) + [1.0]).T for k in sub_keys]) for sub_keys in keys] image_points = hstack([hstack([matrix(p[k]).T for k in sub_keys]) for p, sub_keys in zip(image_points, keys)]) print image_points if initial_matrices: current_mat = [m for m in initial_matrices] else: current_mat = [util.matrix_trans(0.0, 0.0, 500.0)] * len(keys) current_ps = initial_ps current_bd = initial_bd def camera_to_image(m, ps, bd): def map_point(c): px, py = calculate_barrel_distortion(bd, ps * c[0, 0] / c[0, 2], ps * c[0, 1] / c[0, 2]) return [px, py] return matrix([map_point(c) for c in m.T]).T last_err_float = None while True: # Calculate the Jacobian camera_points = hstack([m * p for m, p in zip(current_mat, world_points)]) err = image_points - camera_to_image(camera_points, current_ps, current_bd) J = make_jacobian(camera_points.T[:, :3], keys, current_ps, current_bd) if not change_ps: J = hstack([J[:, :-2], J[:, -1:]]) if not change_bd: J = J[:, :-1] if not change_pos: for i in xrange(len(keys)): J[:, (6 * i):(6 * i + 3)] = zeros((J.shape[0], 3)) # Invert the Jacobian and calculate the change in parameters. # Limit angle changes to avoid chaotic behaviour. err = err.T.reshape(2 * sum(len(sub_keys) for sub_keys in keys), 1) param_delta = numpy.linalg.pinv(J) * (STEP_FRACTION * err) # Calculate the error (as sum of squares), and abort if the error has # stopped decreasing. err_float = (err.T * err)[0, 0] print "Error: %f" % err_float print err if last_err_float != None and abs(err_float - last_err_float) < ERROR_CUTOFF: break last_err_float = err_float # Apply the parameter delta. for i in xrange(len(keys)): if change_pos: current_mat[i] = util.matrix_trans(param_delta[6 * i + 0, 0], param_delta[6 * i + 1, 0], param_delta[6 * i + 2, 0]) * current_mat[i] current_mat[i] = util.matrix_rotate_x(param_delta[6 * i + 3, 0]) * current_mat[i] current_mat[i] = util.matrix_rotate_y(param_delta[6 * i + 4, 0]) * current_mat[i] current_mat[i] = util.matrix_rotate_z(param_delta[6 * i + 5, 0]) * current_mat[i] matrix_normalize(current_mat[i]) if change_ps: current_ps += param_delta[6 * len(keys), 0] if change_bd: current_bd += param_delta[6 * len(keys) + 1, 0] if annotate_images: all_keys = list(world_points_in.keys()) all_world_points = hstack([matrix(list(world_points_in[k]) + [1.0]).T for k in all_keys]) for i, annotate_image in enumerate(annotate_images): all_camera_points = current_mat[i] * all_world_points util.draw_points(annotate_image, dict(zip(all_keys, camera_to_image(all_camera_points, current_ps, current_bd).T))) return current_mat, current_ps, current_bd
def draw_reprojected(R, T, pixel_scale, world_points, annotate_image, color=(255, 0, 0)): keys = list(world_points.keys()) reprojected = project_points(R, T, pixel_scale, world_points, keys) for key in keys: util.draw_points(annotate_image, dict(zip(keys, list(reprojected.T))), color=color)
def search_lines(self,b_img): histogram = np.sum(b_img[int(b_img.shape[0]/2):, :], axis=0) monitor = np.dstack((b_img, b_img, b_img)) midpoint = np.int(histogram.shape[0] / 2) left_sk = np.linspace(0.3, 1, 0.85*midpoint) left_sk = np.concatenate([left_sk,np.linspace(1, 0, 0.15*midpoint)]) right_sk = np.linspace(0, 1, 0.15*midpoint) right_sk = np.concatenate([right_sk,np.linspace(1, 0.3, 0.85*midpoint)]) left_x_max = np.argmax(left_sk*histogram[:midpoint]) right_x_max = np.argmax(right_sk*histogram[midpoint:]) + midpoint window_height = np.int(b_img.shape[0]/self.n_windows) #print(b_img.nonzero()) current_left = left_x_max current_right = right_x_max left_lane_x = [] right_lane_x = [] left_lane_y = [] right_lane_y = [] for windows in range(self.n_windows): win_y_low = b_img.shape[0] - (windows+1) * window_height win_y_high = win_y_low + window_height left_x_low = current_left - self.windows_width left_x_high = current_left + self.windows_width right_x_low = current_right - self.windows_width right_x_high = current_right + self.windows_width cv2.rectangle(monitor, (left_x_low, win_y_low), (left_x_high, win_y_high), (0, 255, 0), 2) cv2.rectangle(monitor, (right_x_low, win_y_low), (right_x_high, win_y_high), (0, 255, 255), 2) left_x = np.array(b_img[win_y_low:win_y_high,left_x_low:left_x_high].nonzero()[1]) + left_x_low left_y = np.array(b_img[win_y_low:win_y_high,left_x_low:left_x_high].nonzero()[0]) + win_y_low right_x = b_img[win_y_low:win_y_high,right_x_low:right_x_high].nonzero()[1] + right_x_low right_y = b_img[win_y_low:win_y_high,right_x_low:right_x_high].nonzero()[0] + win_y_low # If you found > minpix pixels, recenter next window on their mean position if len(left_x) > self.min_pixel_num: current_left = np.int(np.mean(left_x)) if len(right_x) > self.min_pixel_num: current_right = np.int(np.mean(right_x)) left_lane_x.append(left_x) right_lane_x.append(right_x) left_lane_y.append(left_y) right_lane_y.append(right_y) lx = np.concatenate(left_lane_x) ly = np.concatenate(left_lane_y) rx = np.concatenate(right_lane_x) ry = np.concatenate(right_lane_y) self.left.current_line = [lx,ly] self.right.current_line = [rx,ry] ploty = np.linspace(0, b_img.shape[0] - 1, b_img.shape[0]) left_fit = np.polyfit(ly, lx, self.poly_order) right_fit = np.polyfit(ry, rx, self.poly_order) line_left = np.poly1d(left_fit) line_right = np.poly1d(right_fit) y1 = line_left(ploty) y2 = line_right(ploty) if (len(rx)>2000) & (len(lx) >2000): self.left.prevx.append(y1) self.right.prevx.append(y2) self.left.detect = True self.right.detect = True else: self.left.detect = False self.right.detect = False num = 5 if len(self.left.prevx) > num: self.left.prevx.pop(0) left_avg_line = smoothing(self.left.prevx, num, self.display) left_avg_fit = np.polyfit(ploty, left_avg_line, self.poly_order) l = np.poly1d(left_avg_fit) left_fit_plotx = l(ploty) self.left.current_fit = left_avg_fit self.left.allx, self.left.ally = left_fit_plotx, ploty else: self.left.current_fit = left_fit self.left.allx, self.left.ally = y1, ploty if len(self.right.prevx) > num: self.right.prevx.pop(0) right_avg_line = smoothing(self.right.prevx, num, self.display) right_avg_fit = np.polyfit(ploty, right_avg_line, self.poly_order) r = np.poly1d(right_avg_fit) right_fit_plotx = r(ploty) self.right.current_fit = right_avg_fit self.right.allx, self.right.ally = right_fit_plotx, ploty else: self.right.current_fit = right_fit self.right.allx, self.right.ally = y2, ploty draw_points(monitor,self.left.allx,ploty,(0,0,255),3) draw_points(monitor,self.right.allx,ploty,(0,255,0),3) cv2.imshow("ss",monitor) return monitor