class SavedModelFormatTestCase(unittest.TestCase): logger = LoggerAdaptor("tests/models/test_sfe.SavedModelFormatTestCase", _logger) @classmethod def setUpClass(cls): pass pass
class StaticGraphFormatTestCase(unittest.TestCase): logger = LoggerAdaptor("tests/models/test_sfe.StaticGraphFormatTestCase", _logger) @classmethod def setUpClass(cls): pass pass # @todo : TODO(change base model)
class OpticalFlowKPntPredictor(OpticalFlowBBoxPredictor): logger = LoggerAdaptor("OpticalFlowKPntPredictor", _logger) def __init__(self): super().__init__() def predict(self, kps, observed_img=None): assert (self._cur_img is not None) flow = self._flow if flow is None: assert (observed_img is not None) params = { 'pyr_scale': 0.8, 'levels': 3, 'winsize': 51, 'iterations': 5, 'poly_n': 7, 'poly_sigma': 1.2, 'flags': 0 } self._pre_img = self._cur_img self._flow = self._impl(self._pre_img, observed_img, None, params['pyr_scale'], params['levels'], params['winsize'], params['iterations'], params['poly_n'], params['poly_sigma'], params['flags']) self._cur_img = observed_img flow = self._flow # see help lib, adapted from opencv4_source_code/samples/python/opt_flow.py # about how to use `flow` object dx, dy = flow[:, :, 0], flow[:, :, 1] l = len(kps) new_pixels = np.zeros((l, 2)) # @todo : TODO impl try: for i, kp in enumerate(kps): x1, y1 = kp x2, y2 = x1 + dx[int(y1), int(x1)], y1 + dy[int(y1), int(x1)] new_pixels[i, 0] = x2 new_pixels[i, 1] = y2 except Exception as e: print(e) print("kp", kp) raise (e) return new_pixels
class OpticalFlowBBoxPredictor(object): logger = LoggerAdaptor("OpticalFlowBBoxPredictor", _logger) def __init__(self): # self._impl = None # self._cur_img = None # self._pre_img = None # self.IMAGE_SHAPE = (None, None) # dense optical flow for the image self._flow = None def Init(self): # I am also considering to use createOptFlow_DualTVL1 algoithm ... # Farneback algorithm has some problem in numeric computing : https://stackoverflow.com/questions/46521885/why-does-cv2-calcopticalflowfarneback-fail-on-simple-synthetic-examples self._impl = cv2.calcOpticalFlowFarneback return self def set_FromImg(self, img): self._cur_img = img self.IMAGE_SHAPE = img.shape[0:2] return self def get_flow(self): return self._flow def set_flow(self, flow): self._flow = flow return self def predict(self, states, observed_img=None): assert (self._cur_img is not None) flow = self._flow if flow is None: assert (observed_img is not None) params = { 'pyr_scale': 0.8, 'levels': 3, 'winsize': 31, 'iterations': 5, 'poly_n': 5, 'poly_sigma': 1.2, 'flags': 0 } self._pre_img = self._cur_img self._flow = self._impl(self._pre_img, observed_img, None, params['pyr_scale'], params['levels'], params['winsize'], params['iterations'], params['poly_n'], params['poly_sigma'], params['flags']) self._cur_img = observed_img flow = self._flow dx, dy = flow[:, :, 0], flow[:, :, 1] x1, y1, w, h = states x2, y2 = x1 + w, y1 + h H, W = self.IMAGE_SHAPE if x2 >= W: x2 = W - 1 if y2 >= H: y2 = H - 1 ret = np.array([ x1 + dx[int(y1), int(x1)], y1 + dy[int(y1), int(x1)], w + dx[int(y2), int(x2)] - dx[int(y1), int(x1)], h + dy[int(y2), int(x2)] - dy[int(y1), int(x1)] ]) return ret def update(self, measures): import sys func_name = sys._getframe().f_code.co_name raise NotImplemented("the method %s is not implemented!" % func_name)
class BBoxKalmanFilter(ExtendedKalmanFilter): # specify states to observe # bounding box : (x, y, w, h) with shape equal to (4,1) observation_dim = 4 states_dim = observation_dim * 3 logger = LoggerAdaptor("BBoxKalmanFilter", _logger) def __init__(self): # we don't know velocity and accelertion of frame changing super().__init__(BBoxKalmanFilter.states_dim, BBoxKalmanFilter.observation_dim) self.bbox = np.zeros((4, 1)) self._states = np.zeros((12, 1)) self._Init() def _Init(self): # setup covariance matrix, borrow parameters by "Simple Object Realtime Tracking" directly] # since this is extremely hacky for different tasks self.setup_P() self.setup_Q() # initalize H and R self.H = self.populate_measures_constrain([0, 1, 2, 3]) self.R = self.populate_priors_variance_constrain([0, 1, 2, 3]) def setup_P(self, factors=None): if factors is None: transition_weight = 1. / 20 velocity_weight = 1. / 160 # freshly added acceleration_weight = 1. / 256 factors_raw = np.array([ transition_weight * 2, velocity_weight * 10, acceleration_weight * 10 ]) factors = np.repeat(factors_raw, 4) # x x x x y y y y z z z z self.P = np.diag(np.square(factors)) def setup_Q(self, factors=None): if factors is None: transition_weight = 1. / 20 velocity_weight = 1. / 160 # freshly added acceleration_weight = 1. / 256 factors_raw = np.array( [transition_weight, velocity_weight, acceleration_weight]) factors = np.repeat(factors_raw, 4) # x x x x y y y y z z z z # also, you can use `populate_states_variance_constrain` method I provided # to populate the variance matrix self.Q = np.diag(np.square(factors)) def predict(self, bbox): self.bbox = bbox self._states[:4, 0] = bbox ret = super().predict(self._states) ret = ret.reshape(ret.shape[0]) return ret def update(self, observed_states): return super().update( np.array(observed_states).reshape( (BBoxKalmanFilter.observation_dim, 1)))
class ExtendedKalmanFilter(object): logger = LoggerAdaptor("ExtendedKalmanFilter", _logger) def __init__(self, states_dim, measures_dim): """ @param states_dim: int @param measures_dim: int """ self.states_dim = states_dim self.measures_dim = measures_dim # states to inspect, change to symbol `x` during computation # timer series of states is maintained by a tracker with array of observation self._states = np.zeros((states_dim, 1)) # usually I used 1/f where f estimated frequences of video sequences or observation sequences self.dt = 1.0 / 24 # Constant Acceleration Physics Model, as long as dt is small enough, this equation holds self._A = self.populate_physics_constrain(self.dt) self._B = None self._u = None # See usage examples from https://github.com/balzer82/Kalman. However pay attention here that # my implementation is generalized to arbirary one-dimensional observations self.P = np.eye(states_dim) # observation variance matrix self.Q = self.populate_states_variance_constrain() # measurements cover matrix self.H = self.populate_measures_constrain() # variance matrix for measurements self.R = self.populate_priors_variance_constrain() def Init(self): pass ## helper func to initate Kalman Filter Computing routines. # The routines are also helpful when we implement multi fusion strategy for different sensors # since the frequencies vary dramatically, we could apply differnt measures when observation data from # sensors arrive. # @todo : TODO impl def populate_physics_constrain(self, dt): """ @return A : np.array with shape of (states_dim, states_dim) """ # for each observation x, we have # x_{k+1} = x_{k} + v_{k} * dt + 1.0/2 * a_{k+1}^2 states_dim = self.states_dim A = np.eye(states_dim) factors = [dt, 0.5 * dt * dt] assert (self.states_dim % 3 == 0) first_order_observation_dim = self.states_dim / 3.0 for i in range(states_dim): k = 1 j = int(i + first_order_observation_dim * k) # print("i,j,k", i,j,k) while j < states_dim and k < len(factors): A[i, j] *= factors[k] k += 1 return A # @todo : TODO impl def populate_measures_constrain(self, measures_indice=None): """ @return H : np.array with shape of (measures_dim, states_dim) """ H = np.zeros((self.measures_dim, self.states_dim)) if measures_indice is not None and isinstance(measures_indice, (list, tuple)): assert (len(measures_indice) == self.measures_dim) # update H for i, idx in enumerate(measures_indice): H[i][idx] = 1 else: if measures_indice is None: logging.warning( "The measures indice is none. please set it using 'Kalmanfilter.populate_measures_constrain(self, indice)' later." ) else: raise Exception( "expect list or tuple, but encounter %s for measures_indice" % str(type(measures_indice))) return H def populate_priors_variance_constrain(self, measures_indice=None, measures_noises=None): """ @return R : np.array with shape of (measures_dim, measures_dim) """ R = np.eye(self.measures_dim) if measures_indice is not None and isinstance(measures_indice, (list, tuple)): if measures_noises is None: measures_noises = np.ones((self.measures_dim, 1)) elif hasattr(measures_noises, "__len__"): # implements list interface pass else: raise Exception( "expect `measures_noise` to be array alike object, but encounter %s" % str(type(measures_noise))) assert (len(measures_indice) == self.measures_dim) # update R R = np.diag(measures_noises) else: if measures_indice is None: logging.warning( "The measures indice is none. please set it using 'Kalmanfilter.populate_measures_constrain(self, indice)' later." ) else: raise Exception( "expect list or tuple, but encounter %s for measures_indice" % str(type(measures_indice))) return R def populate_states_variance_constrain(self, factors=None): """ @return Q : np.array with shape of (states_dim, states_dim) """ if factors is None: factors = np.zeros((self.states_dim, 1)) factors[0, 0] = 1.0 assert (factors.shape == (self.states_dim, 1)) Q = factors * factors.T return Q ## Interfaces to modify the public attributes def set_dt(self, dt): self.dt = dt return self def set_P(self, P): self.P = P return self def set_Q(self, Q): self.Q = Q return self # @todo : TODO impl def predict(self, states): P = self.P A = self._A Q = self.Q # performance states prediction using pure physic models # I am going to embed opticalFlow control here, where magics happen # Suppose our moovement equation is not obeying `rigid body movement` but the estiated from # optcalFlow ? That's it! self._states = A.dot(states) self.P = A.dot(P.dot(A.T)) + Q return self._states # @todo : TODO impl # Not we can change measures dynamically for multi sensor fusion def update(self, observed_states): states = self._states P = self.P H = self.H R = self.R z = observed_states I = np.eye(self.states_dim) def validate_states(states): # check wheter states is an object implements python array alike protcol if not hasattr(states, "__len__"): raise TypeError("states should be an array like object!") assert (len(states) == self.measures_dim) validate_states(z) # perform states update # Kalman Gain from standard EKF theory K = (P.dot(H.T)).dot(np.linalg.pinv(H.dot(P.dot(H.T)) + R)) # Update estimates self.states = states + K.dot(z - H.dot(states)) # Update states error covariance self.P = (I - K.dot(H)).dot(P) return self.states
class PCViewer: logger = LoggerAdaptor("PCViewer", _logger) # these mocked classes will be improved later class Pixel2D: def __init__(self): self.x = None self.y = None class Window: def __init__(self): self.width = None self.height = None self.aspect = None self.fd = None self.pos = None self.name = None self.focused = False class Mouse: def __init__(self): self.press_x = None self.press_y = None self.state = None def __init__(self): # self._map = None # self._tracker = None # attributes queue # self.attributes = JoinableQueue() self.manager = multiprocessing.Manager() self.attributes = self.manager.Queue() # self.is_3dmode = True self.main_window = self.Window() self.main_window.name = "PCViewer" self.main_window.width = 1024 self.main_window.height = 768 # gl camera to update MVP matrice stack, see opengl 2 and opengl SE for details of implementation self.camera = None self.control = None self.layout_root = UICom() # implement event loop interface self._state_locker = Lock() # close event self._stopped = False # stop event self._stopping = False # to obtain true parallel compute ability # async gl renderer self._gl_renderer_executor = Process(target=self.glMainLoop) self._gl_renderer_executor.daemon = True pass def Start(self): self._gl_renderer_executor.start() pass def set_FromTracker(self, tracker): self._tracker = tracker return self def set_FromMap(self, map): self._map = map return self def set_stopping(self): pass def isStopping(self): return self._stopping def Init(self): self.InitWindow() self.Clear() if self.is_3dmode: # setup 3D perspective camera self.logger.info("Setting up a 3d camera") self.Setup3DCamera() pass else: # setup 2D Camera raise Exception("2D Othogonal Camera is not implemented yet!") self.logger.info("Creating viewp ports ...") self.camera.create_viewport(self.camera.width, self.camera.height) self.InitControl() self.SetLights() # Register polling events self.InitPangolin() # self.Start() pass def InitWindow(self): # set display mode # set windows size and position # create window # self.main_window.fd = pangolin.CreateWindowAndBind( # self.main_window.name, # self.main_window.width, # self.main_window.height) # self.logger.info("Main Window file descriptor (fd) : %s" % self.main_window.fd) self.SetDisplayMode() self.SetWindowLayout() return self def SetWindowLayout(self): panel = UICom.Create(pangolin.CreatePanel("menu")) self.layout_root.attrs["menu"] = panel # @todo : TODO add other UI control components to construct UI components tree return self def SetDisplayMode(self): if self.is_3dmode: gl.glEnable(gl.GL_DEPTH_TEST) gl.glEnable(gl.GL_BLEND) gl.glBlendFunc(gl.GL_SRC_ALPHA, gl.GL_ONE_MINUS_SRC_ALPHA) else: raise Exception("Not Implemented Yet!") return self def Clear(self): gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) gl.glClearColor(1.0, 1.0, 1.0, 1.0) return self def Setup3DCamera(self): self.camera = PerspectiveCamera(None, None, 0.2, 200) self.camera.set_position(np.array([-2., 2, -2.])) self.camera.LookAt(np.array([0, 0, 0])) return self def InitControl(self): # init FirstPersonController self.control = FirstPersonController(self.camera) return self # @todo : TODO def SetLights(self): pass def InitPangolin(self): pass # see tum rgdb_benchmark toolkit def drawDepthImage(self, frame): # dest = os.path.join(TUM_DATA_DIR, frame.depth_img) img = frame.img # depth_img = cv2.imread(dest, 0) depth_img = frame.depth_img H, W = img.shape[0:2] SCALING_FACTOR = 5 # 5000.0 R_PRECISION = 1e-4 datum = Datum() datum.attrs['points-redundant'] = [] datum.attrs['colors-redundant'] = [] K = frame.camera.K def check_eq(retrived, right): left = retrived.data dist = np.linalg.norm(left - right) if dist < R_PRECISION: return True else: return False for y in range(H): for x in range(W): # set RGB color for rendering color = frame.img[int(y), int(x)] Z = depth_img[int(y), int(x)] Z /= SCALING_FACTOR if Z == 0: continue X = (x - K[0, 2]) * Z / K[0, 0] Y = (y - K[1, 2]) * Z / K[1, 1] # get pixel idx = int(y * W + x) px = frame.pixels.get(idx, None) p = None if True: # px is None: p = Point3D(X, Y, Z) else: p_vec = np.array([X, Y, Z]) dist = None best = None for cam_pt in px.cam_pt_array: w = cam_pt if w is None: continue if best is None: best = w dist = np.linalg.norm(best.data - p_vec) else: dist1 = np.linalg.norm(w.data - p_vec) if dist1 < dist: best = w dist = dist1 if best is None: p = Point3D(X, Y, Z) else: p = best print( "[PCViewer.DrawDepthImage] Updating %s to (%f, %f, %f) " % (p, X, Y, Z)) p.x = X p.y = Y p.z = Z # p.set_color(color) # camera to world v = p.data.reshape((3, 1)) # Twc v = frame.R0.dot(v) + frame.t0 v = v.reshape((3, )) p.update(*v) datum.attrs['points-redundant'].append(p) datum.attrs['colors-redundant'].append(color) datum.attrs['pose'] = self._tracker.cur.get_pose_mat() self.logger.info("[Main Thread] put datum to queue") self.attributes.put(datum) pass # @todo : TODO update def Update(self, cur=None, last=None, flow_mask=None, active_frames=None): datum = Datum() cur = cur or self._tracker.cur last_frame = last or self._tracker.last_frame if flow_mask is None: flow_mask = self._tracker.flow_mask if last_frame and hasattr( last_frame, "rendered_img") and last_frame.rendered_img is not None: out_frame = cv2.add(last_frame.rendered_img, flow_mask) datum.attrs['img'] = out_frame print("put Frame#%d rendered image to datum" % last_frame.seq) else: datum.attrs['img'] = cur.img print("put Frame#%d source image to datum" % cur.seq) datum.attrs['pose'] = cur.get_pose_mat() print("put Frame#%d's pose\n%s\n to datum" % (cur.seq, datum.attrs['pose'])) # add curent structure datum.attrs['points'] = [] pointCloud = cur.get_points() frame = cur for point in pointCloud: try: pos = point.frames.get(frame.seq, None) except AttributeError: pos = point.observations.get(frame.seq, None) # g2o module might change the ground truth to strange values! if pos is None: # raise Exception("Wrong Value!") print("Wrong Value!") continue px = frame.pixels[pos] x, y = px.data # if frame.depth_img is not None: # depth_img = frame.depth_img # # SCALING_FACTOR = 5 # # K = frame.camera.K # # Z = depth_img[int(y), int(x)] # Z /= SCALING_FACTOR # if Z == 0: # point.setBadFlag() # continue # X = (x - K[0, 2]) * Z / K[0, 0] # Y = (y - K[1, 2]) * Z / K[1, 1] # # point.update(X, Y, Z) # # # Twc # v = point.data.reshape((3,1)) # v = frame.R0.dot(v) + frame.t0 # v = v.reshape((3,)) # # # print("[PCViewer.Update] Updating from %s to (%f, %f, %f)" % ( # # point, # # v[0], # # v[1], # # v[2] # # )) # point.update(*v) if px.roi.label == "chair": pass # print("chair point: ", point) if px.roi.label == "book": pass # print("book point: ", point) # check if computed projection of that point is inside bbox cam_pt = frame.camera.viewWorldPoint(point) projection = frame.camera.view(cam_pt) if not projection or not px.roi.isIn(projection): # remove the point (mighted be optimized out) outside of roi y1, x1, y2, x2 = px.roi.bbox print( "[PCViewer.Update] %s is out of %s's bbox (%f, %f, %f, %f)" % (projection, px.roi, x1, y1, x2, y2)) px.roi.findParent().remove(point) continue # print("put KeyPoint %s to datum" % point) datum.attrs['points'].append(point) print("put Frame#%d's structure(%d points) to datum" % (cur.seq, len(pointCloud))) # add lines datum.attrs['lines'] = [] frames = active_frames or self._map.get_active_frames() l = len(frames) for i in range(1, l): datum.attrs['lines'].append( (frames[i - 1].t0.reshape(3, ), frames[i].t0.reshape(3, ))) # add landmarks datum.attrs['landmarks'] = [] landmarks = cur.get_landmarks() for landmark in landmarks: # landmark can not be pickled, see docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled # datum.attrs['landmarks'].append(landmark) picklable = LandmarkDatum(landmark) if landmark.label not in ("keyboard", "book", "chair"): pass if landmark.label in ( "table", "dining table", ): continue if picklable.abox is not None: print( "Putting landmark <Landmark#%d, Label %s> to datum, aabox: \n%s" % (landmark.seq, landmark.label, picklable.abox.toArray())) datum.attrs['landmarks'].append(picklable) pass print("put Frame#%d's structure(%d landmarks) to datum" % (cur.seq, len(datum.attrs['landmarks']))) self.logger.info("[Main Thread] put datum to queue") self.attributes.put(datum) pass def setStop(self): with self._state_locker: self._stopping = True # @todo : TODO stop def Stop(self): # # self.attributes.join() self.attributes.close() print("Set glMainLoop stopping") self.setStop() # self._gl_renderer_executor.join() # pass # @todo : TODO def Render(self): pass # @todo : TODO def glMainLoop(self): self.logger.info("Running gl viewer ...") flag = False pangolin.CreateWindowAndBind('Main', 640, 480) gl.glEnable(gl.GL_DEPTH_TEST) gl.glEnable(gl.GL_BLEND) gl.glBlendFunc(gl.GL_SRC_ALPHA, gl.GL_ONE_MINUS_SRC_ALPHA) panel = pangolin.CreatePanel('menu') panel.SetBounds(1.0, 1.0, 0.0, 100 / 640.) # Does not work with initialization of window # self.Init() camera_pose = pangolin.OpenGlMatrix() # create scene graph # scene = pangolin.Renderable() # # x : R # # y : G # # z : B # scene.Add(pangolin.Axis()) # rendered_cam = pangolin.OpenGlRenderState( pangolin.ProjectionMatrix(640, 480, 420, 420, 320, 240, 0.01, 2000), pangolin.ModelViewLookAt(1, 0, 1, 0, 0, 0, 0, 0, 1)) handler = pangolin.Handler3D(rendered_cam) # handler = pangolin.SceneHandler(scene, rendered_cam) # add drawing callback # viewport = pangolin.CreateDisplay() viewport.SetBounds(0.0, 1.0, 0.0, 1.0, -640.0 / 480.0) viewport.SetHandler(handler) image_viewport = pangolin.Display('image') w = 160 h = 120 image_viewport.SetBounds(0, h / 480., 0.0, w / 640., 640. / 480.) image_viewport.SetLock(pangolin.Lock.LockLeft, pangolin.Lock.LockTop) texture = pangolin.GlTexture(w, h, gl.GL_RGB, False, 0, gl.GL_RGB, gl.GL_UNSIGNED_BYTE) # geometries self.lines = [] self.points = {} self.colors = {} self.poses = [] self.redundant_points = [] self.redundant_colors = [] self.landmarks = {} # img = np.ones((h, w, 3), 'uint8') while not pangolin.ShouldQuit(): datum = None if not self.attributes.empty(): self.logger.info( "[GL Process] attributes is not empty, fetching datum ...") datum = self.attributes.get() self.logger.info( "[GL Process] get a datum from the main thread of the parent process" ) if datum is not None: # dispatch instructions if datum.attrs.get('pose', None) is not None: pose = datum.attrs['pose'] # self.camera.pose.m = pose # set Twc camera_pose.m = pose # np.linalg.inv(pose) # camera_pose.m = pose rendered_cam.Follow(camera_pose, True) self.logger.info( "[GL Process] update camera pose matrix got from datum: \n%s" % pose) pass pass # self.Clear() # self.control.Update(self.camera.pose) gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) gl.glClearColor(1.0, 1.0, 1.0, 1.0) viewport.Activate(rendered_cam) # scene.Render() # Just for test # pangolin.glDrawColouredCube(0.1) # render graph if datum is not None: # dispatch data # update graph if datum.attrs.get('lines', None) is not None: lines = datum.attrs['lines'] self.lines.extend(lines) self.logger.info("[GL Process] drawing %d lines ..." % len(lines)) if datum.attrs.get('pose', None) is not None: pose = datum.attrs['pose'] self.poses.append(pose) self.logger.info( "[GL Process] drawing a camera with new pose matrix ..." ) pass # update image if datum.attrs.get('img', None) is not None: img = datum.attrs['img'] # see pangolin issue #180 # change cv BGR channels to norm one # img = img[::-1, : ,::-1].astype(np.uint8) img = img.astype(np.uint8) img = cv2.resize(img, (w, h)) self.logger.info( "[GL Process] drawing image to image viewport ...") # show mappoints if datum.attrs.get('points', None) is not None: points = datum.attrs['points'] if len(points) > 0: # self.points.extend(points) for point in points: self.points[point.seq] = point # colors = np.array([p.color if p.color is not None else np.array([1.0, 0.0, 0.0]) for p in points]).astype(np.float64) colors = [ p.color / 255. if p.color is not None else np.array([1.0, 1.0, 0.0]) for p in points ] # print("colors: \n%s" % np.array(colors)) # colors = [ [1., 1., 0.] for p in points] # self.colors.extend(colors) for i, color in enumerate(colors): point = points[i] self.colors[point.seq] = color self.logger.info("[GL Process] drawing %d points" % len(points)) # print("new mappoints: \n%s" % np.array([ p.data for p in points]).astype(np.float64)) # print("new colors (default): \n%s" % np.array(colors)) else: self.logger.info("[GL Process] no points to be drawn.") # redundant points if datum.attrs.get('points-redundant', None) is not None: points = datum.attrs['points-redundant'] colors = datum.attrs['colors-redundant'] for i, p in enumerate(points): self.redundant_points.append(p) self.redundant_colors.append(colors[i] / 255.) # show landmarks if datum.attrs.get('landmarks', None) is not None: landmarks = datum.attrs['landmarks'] for landmark in landmarks: self.landmarks[landmark.seq] = landmark self.logger.info("[GL Process] drawing %d landmarks" % len(landmarks)) self.attributes.task_done() self.logger.info("[GL Process] datum has been processed.") ############ # draw graph ############ line_geometries = np.array([[*line[0], *line[1]] for line in self.lines]) if len(line_geometries) > 0: gl.glLineWidth(1) gl.glColor3f(0.0, 1.0, 0.0) pangolin.DrawLines(line_geometries, 3) # pose = self.camera.pose pose = camera_pose # GL 2.0 API gl.glPointSize(4) gl.glColor3f(1.0, 0.0, 0.0) gl.glBegin(gl.GL_POINTS) gl.glVertex3d(pose[0, 1], pose[1, 3], pose[2, 3]) gl.glEnd() ############ # draw poses ############ if len(self.poses) > 0: gl.glLineWidth(1) gl.glColor3f(0.0, 0.0, 1.0) # poses: numpy.ndarray[float64], w: float=1.0, h_ratio: float=0.75, z_ratio: float=0.6 pangolin.DrawCameras(np.array(self.poses)) gl.glPointSize(4) gl.glColor3f(0.0, 0.0, 1.0) gl.glBegin(gl.GL_POINTS) for pose in self.poses: gl.glVertex3d(pose[0, 1], pose[1, 3], pose[2, 3]) gl.glEnd() ################ # draw mappoints ################ if len(self.points) > 0: # points_geometries = np.array([p.data for p in self.points]).astype(np.float64) points_geometries = [] colors = [] for point_key, p in self.points.items(): points_geometries.append(p.data) colors.append(self.colors[point_key]) points_geometries = np.array(points_geometries).astype( np.float64) colors = np.array(colors).astype(np.float64) gl.glPointSize(6) # pangolin.DrawPoints(points_geometries, np.array(self.colors).astype(np.float64) ) pangolin.DrawPoints(points_geometries, colors) # gl.glPointSize(4) # gl.glColor3f(1.0, 0.0, 0.0) # # gl.glBegin(gl.GL_POINTS) # for point in points_geometries: # gl.glVertex3d(point[0], point[1], point[2]) # gl.glEnd() #################### # redundant points # #################### if len(self.redundant_points) > 0: points_geometries = [] colors = [] for i, p in enumerate(self.redundant_points): points_geometries.append(p.data) colors.append(self.redundant_colors[i]) points_geometries = np.array(points_geometries).astype( np.float64) colors = np.array(colors).astype(np.float64) gl.glPointSize(3) pangolin.DrawPoints(points_geometries, colors) ################ # draw landmarks ################ for key, landmarkDatum in self.landmarks.items(): # abox = landmark.computeAABB() # drawABox(abox.toArray()) drawABox(landmarkDatum.abox.toArray(), color=landmarkDatum.color) pass ############# # draw images ############# # self.camera.RenderImg(img) texture.Upload(img, gl.GL_RGB, gl.GL_UNSIGNED_BYTE) image_viewport.Activate() gl.glColor3f(1.0, 1.0, 1.0) texture.RenderToViewport() pangolin.FinishFrame() print("gl program loop stopped") with self._state_locker: self._stopped = True
class Frame: Seq = AtomicCounter() logger = LoggerAdaptor("Frame", _logger) def __init__(self): self.identity = Identity(Frame.Seq()) self.isKeyFrame = False ## Content # used to align with ground truth self.timestamp = None # might be a image path or url read by a asynchronous reader self._img_src = None # color constructed from img: cv::Mat or std::vector<byte> self.img = None # computed grey img or collected grey img directly from a camera self._img_grey = None # a camera instance to performance MVP or other image related computation self.camera = None # group of map tiles, where we storage 3d points, measurements and source images # Note: this should be a weak reference to the original data representation self.runtimeBlock = None ## Rigid object movements # later we will move these attributes to Object3D as common practice # in game development area, i.e, class Frame -> class Frame: public Object3D # rotation and translation relative to origins # Rwc. Note the author of ORB_SLAM2 uses the opporsite notation (Rcw) to reprented camera # camera pose, see discuss #226 self.R0 = np.eye(3) # twc self.t0 = np.zeros((3, 1)) # rotation and translation relative to the last frame, updated in each frame self.R1 = np.eye(3) self.t1 = np.zeros((3, 1)) # used to very reprojection error in initialization process self.Rcw = np.eye(3) # inv(R0) self.tcw = np.zeros((3, 1)) # -inv(R0).dot(t0) # GT set by tracker when timestamp and ground truth are all available self.assoc_timestamp = None self.R_gt = None self.t_gt = None ## Covisibility Graph Topology # previous frame self.pre = None # self.pixels = {} ## Features Expression Layer # extracted features # opencv key points self.kps = None # opencv key point features self.kps_feats = None # extracted roi keypoints (defaults to ORB keypoints) self.roi_kps = None # extracted roi features self.roi_feats = None # meta data self._detections = {} # media scene depth self.medianSceneDepth = -1. self.extractors = {} self.is_First = False ### Main Logics Executor ### # self.predictors = { 'OpticalFlow': OpticalFlowBBoxPredictor(), 'OpticalFlowKPnt': OpticalFlowKPntPredictor() } # self.matchers = {} # self.USE_IMAGE_LEVEL_ORB = False # => # self.SHOW_ROI = False # self.sample_size = -1 @property def seq(self): return self.identity.seq @seq.setter def seq(self, val): self.identity.seq = val return self def set_camera(self, camera): self.camera = camera return self def set_timestamp(self, timestamp): self.timestamp = timestamp return self # @todo : TODO def set_FromImg(self, img): self.img = img self.predictors['OpticalFlow'].set_FromImg(self.img_grey()).Init() self.predictors['OpticalFlowKPnt'].set_FromImg(self.img_grey()).Init() return self # getter of self._grey_img def img_grey(self): img_grey = self._img_grey if img_grey is None: img_grey = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY) # suppose the image is undistorted self._img_grey = img_grey return img_grey # @todo : TODO def extract(self): orb_kp, orb_desc = (None, None) if self.USE_IMAGE_LEVEL_ORB: # @todo : REFACTOR the tasks should be running in pararllel # self.logger.info("%s, Extracting Image ORB features ..." % self) print("%s, Extracting Image ORB features ..." % self) orb_kp, orb_desc = self.ExtractORB() # @todo : TODO # add to frame # self.logger.info("Type of image orb key points : %s, size %d" % (type(orb_kp), len(orb_kp))) print("Type of image orb key points : %s, size %d" % (type(orb_kp), len(orb_kp))) # self.logger.info("Type of image orb descriptors : %s, shape %s" % (type(orb_desc), orb_desc.shape)) print("Type of image orb descriptors : %s, shape %s" % (type(orb_desc), orb_desc.shape)) # extract deep features or ROI # self.logger.info("%s, Extracting ROI features ..." % self) print("%s, Extracting ROI features ..." % self) roi_kp, roi_features = self.ExtractROI() kps = [] kps_feats = [] if self.USE_IMAGE_LEVEL_ORB: kps.extend(orb_kp) kps_feats.extend(orb_desc) # catenate orb keypoints and features, see opencv docs for definition # of returned key points and descriptors if self.extractors['sfe'].USE_ROI_LEVEL_ORB: for i, roi_feat_per_box in enumerate(roi_features): desc_per_box, kps_per_box = roi_feat_per_box['roi_orb'] if len(kps_per_box) is 0: label = roi_feat_per_box['label'] print("extract 0 points for detection#%d(%s)." % (i, label)) # raise ValueError("'kps_per_box' should not be an empty list!") kps.extend(kps_per_box) kps_feats.extend(desc_per_box) self.kps = kps self.kps_feats = kps_feats self.roi_kp = roi_kp self.roi_features = roi_features return (kps, kps_feats, roi_kp, roi_features) def ExtractORB(self, bbox=None, mask=None, label=None): # using opencv ORB extractor orb = self.extractors.get('orb', None) if orb is None: orb = cv2.ORB_create(edgeThreshold=5, patchSize=31, nlevels=8, fastThreshold=20, scaleFactor=1.2, WTA_K=4, scoreType=cv2.ORB_HARRIS_SCORE, firstLevel=0, nfeatures=1000) self.extractors['orb'] = orb img_grey = self.img_grey() shp = img_grey.shape if bbox is not None: y1, x1, y2, x2 = bbox ## adjust parameters h = y2 - y1 w = x2 - x1 # print("label %s, h:%d, w:%d" % (label, h, w)) if np.min([w, h]) < 50: orb.setEdgeThreshold(0) orb.setPatchSize(15) orb.setScaleFactor(1.4) # 1.4**8 ~ 8 orb.setWTA_K(2) else: # restore orb.setEdgeThreshold(5) orb.setPatchSize(31) orb.setScaleFactor(1.2) orb.setWTA_K(4) ## # crop image new_img_grey = np.zeros(shp) new_img_grey[y1:y2, x1:x2] = img_grey[y1:y2, x1:x2] if self.SHOW_ROI: display(new_img_grey) img_grey = img_grey[y1:y2, x1:x2] img_grey = cv2.resize(img_grey, (shp[0], shp[1]), interpolation=cv2.INTER_CUBIC) # img_grey = cv2.cvtColor(new_img_grey.astype('uint8'), cv2.COLOR_GRAY2BGR) # compute key points vector kp = orb.detect(img_grey, None) # compute the descriptors with ORB kp, des = orb.compute(img_grey, kp) if bbox is not None: y1, x1, y2, x2 = bbox h = y2 - y1 w = x2 - x1 shp0 = img_grey.shape def _mapping(keypoint): x = keypoint.pt[0] * w / shp0[1] + x1 y = keypoint.pt[1] * h / shp0[0] + y1 keypoint.pt = (x, y) return keypoint kp = list(map(lambda p: _mapping(p), kp)) # kp = list(map(lambda idx: cv2.KeyPoint(kp[idx].x + x1, kp[idx].y + y1), indice)) if bbox is not None and len( kp) > self.sample_size and self.sample_size is not -1: indice = np.random.choice(len(kp), self.sample_size) kp = list(map(lambda idx: kp[idx], indice)) des = list(map(lambda idx: des[idx], indice)) # filter out kp, des with mask # @todo : TODO # assert(len(kp) > 0) if len(kp) == 0: return [], [] return kp, des def ExtractROI(self): # using our semantic features extractor sfe = self.extractors.get('sfe', None) if sfe is None: sfe = SemanticFeatureExtractor() self.extractors['sfe'] = sfe sfe.attach_to(self) # defaults to opencv channel last format img = self.img detections = sfe.detect(img) self._detections = detections # compute the descriptors with our SemanticFeaturesExtractor.encodeDeepFeatures kp, des = sfe.compute(img, detections) return kp, des def mark_as_first(self): self.is_First = True return self def find_px(self, x, y): H, W = self.img.shape[0:2] idx = int(y * W + x) return self.pixels.get(idx, None) def copy_pose_from(self, other_frame): self.R0 = other_frame.R0 self.t0 = other_frame.t0 camera = self.camera if camera is None: camera = Camera.clone(other_frame.camera) self.camera = camera return self def update_pose(self, pose): # see g2opy/python/types/slam3d/se3quat.h for details for the interface # print("R before update shape: %s, data: \n%s\n" % (self.R0.shape, self.R0)) self.R0 = pose.orientation().matrix() # print("R after update shape: %s, data: \n%s\n" % (self.R0.shape, self.R0)) # print("t before update shape: %s, data: \n%s\n" % (self.t0.shape, self.t0)) self.t0 = pose.position().reshape((3, 1)) # print("t after update shape: %s, data: \n%s\n" % (self.t0.shape, self.t0)) # update R1, t1 if self.pre is not None: self.R1 = self.R0.dot(np.linalg.inv(self.pre.R0)) self.t1 = self.t0 - self.R1.dot(self.pre.t0) # update camera self.camera.R0 = self.R0 self.camera.t0 = self.t0 self.camera.R1 = self.R1 self.camera.t1 = self.t1 else: pass return self def get_pose_mat(self): if False: # self.camera is not None: # print("camera.t0 shape: %s, data: \n%s\n" % (self.camera.t0.shape, self.camera.t0)) # print("frame.t0 shape: %s, data: \n%s\n" % (self.t0.shape, self.t0)) pose = g2o.SE3Quat(self.camera.R0, self.camera.t0.reshape(3, )) else: pose = g2o.SE3Quat(self.R0, self.t0.reshape(3, )) return pose.matrix() # retrieved current filled structure def get_points(self): pixels, cam_pts, points = [], [], [] # @todo : TODO return current frame 3d structure for _, pixel in self.pixels.items(): for cam_pt in pixel.sources: world = cam_pt.world if world is None: continue points.append(world) return points def get_measurements(self): pixels, cam_pts, points = [], [], [] # @todo : TODO return current frame 3d structure for _, pixel in self.pixels.items(): for cam_pt in pixel.sources: world = cam_pt.world if world is None: continue points.append(world) pixels.append(pixel) return points, pixels def get_landmarks(self): landmarks = set() for _, pixel in self.pixels.items(): if pixel.roi is not None: landmarks.add(pixel.roi.findParent()) return list(landmarks) # def match(self, reference_frame, kps, kps_feat): # frame_key = reference_frame.seq # img_shp = self.img.shape[0:2] # # H, W = self.img.shape[0:2] # matches = [] # unmatched_detections = [] # THR = 0.7 # 0.95 #0.8 #0.7 # # def _hamming_distance(x, y): # from scipy.spatial import distance # return distance.hamming(x, y) # # def _get_neighbors(R, row, col, feat_map, img_shp): # H, W = img_shp[0:2] # x1, y1 = (col - R, row - R) # x2, y2 = (col + R, row + R) # # if x1 < 0: # x1 = 0 # if y1 < 0: # y1 = 0 # # if x2 >= W: # x2 = W - 1 # if y2 >= H: # y2 = H - 1 # # indice = feat_map[y1:y2, x1:x2] != -1 # return feat_map[y1:y2, x1:x2][indice] # # # # inv_kp = {} # for i, kp in enumerate(kps): # x, y = kp.pt # idx = int(y * W + x) # inv_kp[idx] = i # # # init feat_map # feat_map = np.full(img_shp, -1) # for i, kp in enumerate(kps): # x, y = kp.pt # if int(y) >= img_shp[0] or int(y) < 0 or \ # int(x) >= img_shp[1] or int(x) < 0: # continue # feat_map[int(y), int(x)] = i # # cur_kp, cur_kp_features = self.kps, self.kps_feats # # lost_pixels = 0 # skipped_in_parent_searching = 0 # skipped_in_neighbors_searching = 0 # dist_larger_than_thr = 0 # # print("reference frame #%d key points: %d" % (reference_frame.seq, len(kps))) # print("cur frame #%d key points: %d" % (self.seq, len(cur_kp))) # if frame_key is not self.seq: # for i, kp in enumerate(cur_kp): # x, y = kp.pt # idx = int(y * W + x) # px = self.pixels.get(idx, None) # if px is None: # # print("The pixel (%f,%f) is none!" % (x, y)) # lost_pixels += 1 # continue # # parent = px.parent # # if parent is None: # # print("The parent of the pixel is none!") # unmatched_detections.append(px) # continue # # feat_l = cur_kp_features[i] # # # print("=====================") # # print("searching starts from %s(Frame#%d) 's parent %s(Frame#%d)" % (px, px.frame.seq, parent, parent.frame.seq)) # flag = True # while parent is not None and flag: # if parent.frame.seq == frame_key: # # print("find a matched pixel %s(Frame#%d) for Frame#%d" % (parent, parent.frame.seq, frame_key)) # flag = False # else: # parent = parent.parent # if parent is not None: # # print("trace back to pixel %s(Frame#%d)" % (parent, parent.frame.seq)) # pass # # if flag: # skipped_in_parent_searching += 1 # continue # # # print("\n") # # print("find a match for pixel %s" % px) # # find a match # # jdx = int(parent.r * W + parent.c) # # j = inv_kp[jdx] # # # feat_r = kps_feat[j] # # feat_r = parent.feature # # dist = _hamming_distance(feat_l, feat_r) # # # perform local search # indice = _get_neighbors(8, int(parent.r), int(parent.c), feat_map, img_shp) # if len(indice) == 0: # skipped_in_neighbors_searching += 1 # continue # # # KNN search # feat_l = cur_kp_features[i] # # dist = None # min_dist, min_ind = np.inf, None # # for ind in indice: # feat_r = kps_feat[ind] # dist = _hamming_distance(feat_l, feat_r) # if min_dist > dist: # min_dist = dist # min_ind = ind # # if min_dist >= THR: # dist_larger_than_thr += 1 # continue # # # matches.append(cv2.DMatch(i, j, dist)) # matches.append(cv2.DMatch(i, min_ind, min_dist)) # # else: # # see Tracker._match # raise Exception("Not Implemented Yet") # # matches = sorted(matches, key=lambda mtch: mtch.distance) # import pandas as pd # # print("lost pixels : %d" % lost_pixels) # print("unmatched detections : %d" % len(unmatched_detections)) # print("skipped when searching parent : %d" % skipped_in_parent_searching) # print("skipped when searching neighbors of parents (8PX neighbors) : %d" % skipped_in_neighbors_searching) # print("skipped when min dist is larger than %f : %d" % (THR, dist_larger_than_thr)) # if len(matches) < 10: # raise Exception("Unwanted matching results!") # # distances = [mtch.distance for mtch in matches] # # df = pd.DataFrame({ # "Dist": distances # }) # # print(df) # # l = len(self.kps) # mask = np.ones((l, 1)) # # print("Found %d matches using Union Find algorithm" % len(matches)) # # return matches, mask.tolist() def match(self, reference_frame, kps, kps_feat): frame_key = reference_frame.seq img_shp = self.img.shape[0:2] H, W = self.img.shape[0:2] matches = [] unmatched_detections = [] THR = 0.75 THR2 = 0.75 def _hamming_distance(x, y): from scipy.spatial import distance return distance.hamming(x, y) def _get_neighbors(R, row, col, feat_map, img_shp): H, W = img_shp[0:2] x1, y1 = (col - R, row - R) x2, y2 = (col + R, row + R) if x1 < 0: x1 = 0 if y1 < 0: y1 = 0 # freshly added (!important) if x2 < 0: x2 = 0 if y2 < 0: x2 = 0 if x2 >= W: x2 = W - 1 if y2 >= H: y2 = H - 1 indice = feat_map[y1:y2, x1:x2] != -1 return feat_map[y1:y2, x1:x2][indice] # inv_kp = {} for i, kp in enumerate(kps): x, y = kp.pt idx = int(y * W + x) inv_kp[idx] = i # init feat_map feat_map = np.full(img_shp, -1) for i, kp in enumerate(kps): x, y = kp.pt if int(y) >= img_shp[0] or int(y) < 0 or \ int(x) >= img_shp[1] or int(x) < 0: continue feat_map[int(y), int(x)] = i cur_kp, cur_kp_features = self.kps, self.kps_feats lost_pixels = 0 skipped_in_parent_searching = 0 skipped_in_neighbors_searching = 0 dist_larger_than_thr = 0 print("reference frame #%d key points: %d" % (reference_frame.seq, len(kps))) print("cur frame #%d key points: %d" % (self.seq, len(cur_kp))) dx, dy = 0, 0 cnt = 0 if frame_key is not self.seq: for i, kp in enumerate(cur_kp): x, y = kp.pt idx = int(y * W + x) px = self.pixels.get(idx, None) if px is None: # print("The pixel (%f,%f) is none!" % (x, y)) lost_pixels += 1 continue parent = px.parent if parent is None: # deactivate the following searching method # print("The parent of the pixel is none!") unmatched_detections.append((i, kp, px)) continue feat_l = cur_kp_features[i] # print("=====================") # print("searching starts from %s(Frame#%d) 's parent %s(Frame#%d)" % (px, px.frame.seq, parent, parent.frame.seq)) flag = True while parent is not None and flag: if parent.frame.seq == frame_key: # print("find a matched pixel %s(Frame#%d) for Frame#%d" % (parent, parent.frame.seq, frame_key)) flag = False else: parent = parent.parent if parent is not None: # print("trace back to pixel %s(Frame#%d)" % (parent, parent.frame.seq)) pass if flag: skipped_in_parent_searching += 1 unmatched_detections.append((i, kp, px)) continue # print("\n") # print("find a match for pixel %s" % px) # find a match # jdx = int(parent.r * W + parent.c) # j = inv_kp[jdx] # feat_r = kps_feat[j] # feat_r = parent.feature # dist = _hamming_distance(feat_l, feat_r) dx += parent.c - px.c dy += parent.r - px.r cnt += 1 # perform local search indice = _get_neighbors(10, int(parent.r), int(parent.c), feat_map, img_shp) if len(indice) == 0: skipped_in_neighbors_searching += 1 continue # KNN search feat_l = cur_kp_features[i] dist = None min_dist, min_ind = np.inf, None for ind in indice: feat_r = kps_feat[ind] dist = _hamming_distance(feat_l, feat_r) if min_dist > dist: min_dist = dist min_ind = ind if min_dist >= THR: dist_larger_than_thr += 1 unmatched_detections.append((i, kp, px)) continue # matches.append(cv2.DMatch(i, j, dist)) matches.append(cv2.DMatch(i, min_ind, min_dist)) else: # see Tracker._match raise Exception("Not Implemented Yet") if cnt != 0: dx /= cnt dy /= cnt print("Average dx:", dx) print("Average dy:", dy) # optical_flow = OpticalFlowKPntPredictor() # optical_flow.Init() # optical_flow.set_FromImg(self.img_grey()) # # compute accumulative flows # pre = self.pre # flow = pre.predictors['OpticalFlowKPnt'].get_flow() # assert flow.shape[:2] == img_shp # while pre.seq is not frame_key: # pre = pre.pre # flow1 = pre.predictors['OpticalFlowKPnt'].get_flow() # assert flow.shape == flow1.shape # flow += flow1 # # predict coord # kps_coor_r = list(map(lambda kp: kp.pt, kps)) # # init feat_map # feat_map = np.full(img_shp, -1) # for i, kp in enumerate(kps_coor_r): # x, y = kp # shift_x = flow[int(y), int(x), 0] # shift_y = flow[int(y), int(x), 1] # x += shift_x # y += shift_y # if int(y) >= img_shp[0] or int(y) < 0 or \ # int(x) >= img_shp[1] or int(x) < 0: # continue # feat_map[int(y),int(x)] = i for i, kp, px in unmatched_detections: # if cnt > 10: # target = [px.x+dx, px.y+dy] # slightly bad # else: # target = optical_flow.predict([px.data], reference_frame.img_grey())[0] # not stable s = px.roi.map_keypoint_to_box(px.data) try: ref_roi = px.roi.findParent().findRoI(frame_key) # very good except Exception as e: # print(e) continue if ref_roi.check_box_sim(px.roi): target = ref_roi.map_keypoint_from_box(s) # update dx , dy dx = (dx * cnt + target[0] - px.c) / (cnt + 1) dy = (dy * cnt + target[1] - px.r) / (cnt + 1) cnt += 1 else: # target = [px.x+dx, px.y+dy] continue # target = [px.x, px.y] # very bad indice = _get_neighbors(12, int(target[1]), int(target[0]), feat_map, img_shp) # indice = _get_neighbors(7, int(px.y), int(px.x), feat_map, img_shp) # bad and not stable if len(indice) == 0: continue # KNN search feat_l = cur_kp_features[i] dist = None min_dist, min_in = np.inf, None for ind in indice: feat_r = kps_feat[ind] dist = _hamming_distance(feat_l, feat_r) if min_dist > dist: min_dist = dist min_ind = ind if cnt > 10: if min_dist >= THR: continue else: if min_dist >= THR2: continue kp_in_ref = kps[min_ind] x, y = kp_in_ref.pt jdx = int(y * W + x) px_in_ref = reference_frame.pixels.get(jdx, None) if px_in_ref is not None and px.parent is None: px.parent = px_in_ref matches.append(cv2.DMatch(i, min_ind, min_dist)) matches = sorted(matches, key=lambda mtch: mtch.distance) import pandas as pd print("lost pixels : %d" % lost_pixels) print("unmatched detections : %d" % len(unmatched_detections)) print("skipped when searching parent : %d" % skipped_in_parent_searching) print( "skipped when searching neighbors of parents (8PX neighbors) : %d" % skipped_in_neighbors_searching) print("skipped when min dist is larger than %f : %d" % (THR, dist_larger_than_thr)) if len(matches) < 10: raise Exception("Unwanted matching results!") distances = [mtch.distance for mtch in matches] df = pd.DataFrame({"Dist": distances}) print(df) l = len(self.kps) mask = np.ones((l, 1)) print("Found %d matches using Union Find algorithm" % len(matches)) return matches, mask.tolist() def __repr__(self): return "<Frame %d>" % self.identity.seq def __str__(self): return "<Frame %d>" % self.identity.seq
class SemanticFeatureExtractor: """ Author: LEI WANG ([email protected]) Date: March 1, 2020 """ logger = LoggerAdaptor("SemanticFeatureExtractor", _logger) # shared encoder among all SemanticFeatureExtractor instances label_encoder = None _base_model = None _model = None def __init__(self, base_model=None, config=None): self._config = config or sfe_config self._base_model = base_model or self.get_base_model() # input tensor to the model self.inp = None # output feature tensor self.features = None # FPN feature maps self.feature_maps = None # pool_size self.POOL_SIZE = self._base_model.config.POOL_SIZE # weak ref to frame attached to self._frame = None self.dataset = CocoDataset() # dataset helpers self.LABELS_SET = self.dataset.class_names # pool channel self.POOL_CHANNEL = None # key points control # due to our experiment report, we will no longer use bbox as keypoints self.USE_BBOX_AS_KEY_POINTS = False # key points type self.USE_ROI_LEVEL_ORB = True # Score threshold to exclude the detection result self.thr = 0.85 # choosed subset of detection labels self.TRACK_LABELS_SET = [ 'BG', 'person', # 'bicycle', 'car', 'motorcycle', 'airplane','bus', 'train', 'truck', 'boat', 'traffic light', 'cat', 'dog', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'sports ball', # 'kite', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'hot dog', 'pizza', 'donut', 'cake', 'orange', 'broccoli', 'carrot', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] self._model = self.get_model() def attach_to(self, frame): self._frame = frame return self def get_base_model(self): if SemanticFeatureExtractor._base_model is None: config = self._config if not os.path.exists(config.COCO_MODEL_PATH): utils.download_trained_weights(config.COCO_MODEL_PATH) # build mrcnn model class InferenceConfig(coco.CocoConfig): # Set batch size to 1 since we'll be running inference on # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU GPU_COUNT = 1 IMAGES_PER_GPU = 1 mrcnn_config = InferenceConfig() # config_cpu_device() # Create model object in inference mode. base_model = Model.MaskRCNN(mode="inference", model_dir=config.MODEL_PATH, config=mrcnn_config) # Load weights trained on MS-COCO base_model.load_weights(config.COCO_MODEL_PATH, by_name=True) SemanticFeatureExtractor._base_model = base_model def initKerasModel(model): for layer in model.layers: # layer.trainable = False pass return model self._base_model.keras_model = initKerasModel( self._base_model.keras_model) return self._base_model def get_model(self): model = SemanticFeatureExtractor._model if model is None: # restructure the model base_model = self.get_base_model() # see Keras implementation of MaskRCNN for inference mode # MaskRCNN accepts input image, generated self.inp = base_model.keras_model.inputs # @todo : TODO important! # RPN compute (dx, dy, log(dh), log(dw)) and ProposalLayer generates filtered bbox # of ROIs with topK and Non-Maximal-Suppression algorithms. Then ROIAlign layer aligns ROI with # Pyramid Network Features (generarted in the last step). # # x = PyramidROIAlign([pool_size, pool_size], name="{Task_Name}")([rois, image_meta] + fpn_feature_maps) # # Features vector shape : (batch, size of diferent ratios, vectorized image cropped by bbox(using interpolation algorihtms), ) # Note MaskRCNN only implements resized 244*244 F:RoI -> FPN mapping F (sampling pixels to the level of feature map) and # roi level for Pyramid Network head is computed using # # RoI_level = Tensor.round(4+log2(sqrt(w*h)/(244/sqrt(IMAGE_WIDTH x IMAGE_HEIGHT)))) # # Where w and h are the size of RoI. Note most of 'famous' implementation just "crop and resize by binlinar interpolation". # You don't know how a "statement" is implemented until you see it (feel sad) # from mrcnn.model import PyramidROIAlign, norm_boxes_graph config = self._base_model.config inputs = self.inp input_image = inputs[0] image_meta = inputs[1] rois_inp = KL.Input(shape=[None, 4], name="rois_inp") rois_inp1 = KL.Lambda(lambda x: norm_boxes_graph( x, K.shape(input_image)[1:3]))(rois_inp) feature_maps = self.feature_maps if feature_maps is None: P2 = self._base_model.keras_model.get_layer('fpn_p2').output P3 = self._base_model.keras_model.get_layer('fpn_p3').output P4 = self._base_model.keras_model.get_layer('fpn_p4').output P5 = self._base_model.keras_model.get_layer('fpn_p5').output feature_maps = [P2, P3, P4, P5] self.feature_maps = feature_maps x = PyramidROIAlign( (config.POOL_SIZE, config.POOL_SIZE), name="features_extractor")([rois_inp1, image_meta] + feature_maps) self.features = x self.logger.info("Constructing deep feature extration model ...") class ModelWrapper: def __init__(self, keras_model, base_model): self._keras_model = keras_model self._base_model = base_model def detect(self, img, bboxes): # mold images molded_images, image_metas, windows = self._base_model.mold_inputs( [img]) # get anchors config = self._base_model.config anchors = self._base_model.get_anchors( molded_images[0].shape) anchors = np.broadcast_to(anchors, (config.BATCH_SIZE, ) + anchors.shape) # reshape bbox bboxes = np.broadcast_to(bboxes, (config.BATCH_SIZE, ) + bboxes.shape) features = self._keras_model.predict( [bboxes, molded_images, image_metas, anchors], verbose=0) return features model = ModelWrapper( KM.Model(inputs=[ rois_inp, ] + inputs, outputs=self.features, name="SemanticFeatureExtractor"), self._base_model) SemanticFeatureExtractor._model = model self.logger.info( "Construction of deep feature extraction model complete.") return model def encodeDeepFeatures(self, boxes, masks, roi_features, class_ids, scores): keypoints = [] features = [] n_instances = boxes.shape[0] for i in range(n_instances): keypoints_per_box = [] feature = {} if not np.any(boxes[i]): # Skip this instance. Has no bbox. Likely lost in image cropping. continue y1, x1, y2, x2 = boxes[i] class_id = class_ids[i] score = scores[i] label = self.LABELS_SET[class_id] # our landmark idx starts from 1 print("#%d type(%s), score:%f, bbox:" % (i + 1, label, score), (x1, y1, x2, y2)) if label not in self.TRACK_LABELS_SET: # self.logger.info("Found label unexpected label %s for track, ignore ..." % label) print("Found label unexpected label %s for track, ignore ..." % label) continue if score < self.thr: print("detected %s score is less than %f, ignore ..." % (label, self.thr)) continue if self.USE_BBOX_AS_KEY_POINTS: keypoints_per_box.append( Pixel2D(y1, x1).set_frame(self._frame)) keypoints_per_box.append( Pixel2D(y1, x1).set_frame(self._frame)) if self.USE_ROI_LEVEL_ORB: kp, des = self._frame.ExtractORB(bbox=boxes[i], label=label) for j, p in enumerate(kp): # kp coordiantes are float numbers # assert (p.pt[1] - int(p.pt[1])) != 0 # assert (p.pt[0] - int(p.pt[0])) != 0 keypoints_per_box.append( Pixel2D(p.pt[1], p.pt[0]).set_frame( self._frame).set_kp(p).set_feature(des[j])) # keep a referene to key points associated with the descriptor feature['roi_orb'] = (des, kp) self.logger.info( "extracting orb key points and features for detection") feature['box'] = boxes[i] feature['mask'] = masks[:, :, i] # for vocabulary database of large size, please use one-hot encoding + embedding instead. # encode it to category value vector if SemanticFeatureExtractor.label_encoder is None: from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import OneHotEncoder label_encoder = LabelEncoder() indice = label_encoder.fit_transform(self.TRACK_LABELS_SET) categorical_features_encoder = OneHotEncoder( handle_unknown='ignore') inp = list(zip(self.TRACK_LABELS_SET, indice)) print("categorical_features shp:", np.array(inp).shape) import pandas as pd df = pd.DataFrame({ 'LABEL': self.TRACK_LABELS_SET, 'int': indice }) print(df.head(10)) categorical_features_encoder.fit(inp) encoded_features = categorical_features_encoder.transform( inp).toarray() def encoder(label): new_class_id = self.TRACK_LABELS_SET.index(label) return encoded_features[new_class_id, :] SemanticFeatureExtractor.label_encoder = encoder feature['roi_feature'] = roi_features[i] feature['class_id'] = SemanticFeatureExtractor.label_encoder(label) feature['score'] = score # feature['keypoints_per_box'] = keypoints_per_box # used for constructin of observation feature['label'] = label # add to features list features.append(feature) keypoints.append(keypoints_per_box) return (keypoints, features) def detect(self, img): base_model = self.get_base_model() ret = base_model.detect([img], verbose=1)[0] return ret def compute(self, img, detection): # get keras model model = self.get_model() # BATCH_SIZE is set to 1 roi_features = model.detect(img, detection['rois'])[0] print("roi_features shape: ", roi_features.shape) rois, masks = detection['rois'], detection['masks'] assert (len(rois) == roi_features.shape[0]) assert (self.POOL_SIZE == roi_features.shape[1] == roi_features.shape[2]) self.POOL_CHANNEL = roi_features.shape[3] shp = roi_features.shape roi_features = np.reshape(roi_features, (shp[0], shp[1] * shp[2] * shp[3])) print("=== Detection Results ===") keypoints, features = self.encodeDeepFeatures(rois, masks, roi_features, detection['class_ids'], detection['scores']) return (keypoints, features)
class RuntimeBlock: logger = LoggerAdaptor("RuntimeBlock", _logger) def __init__(self): self._frames = [] self.device = Device() # detected landmarks from sfe self.landmarks = {} # key points self.keypointCloud = {} # active frames selected from dynamic sliding window self.slidingWindow = 7 # active frames stack: see discussion https://github.com/raulmur/ORB_SLAM2/issues/872 self.active_frames = [] # ==== # lock sharing between the main thread (producer) and one of LocalMapping threads or Relocalization threads (consumers) could update the map self.modifying_mapblock_locker = threading.Lock() # automic boolean variable, guarded by self.modifying_mapblock_locker self.complete_modifying = True # default state # self.modifying_completion_condition = threading.Condition( self.modifying_mapblock_locker) # ===== # lock sharing between thread send request to draw the mapblock and thread updating it self.completion_locker = threading.Lock() # atomic boolean variable, guarded by self.completion_locker self.complete_updating = False self.update_map_condition = threading.Condition(self.completion_locker) def set_complete_updating(self): with self.completion_locker: # lock should be required even though the guared variable is atomic self.complete_updating = True self.update_map_condition.notify() def set_complete_modifying(self): with self.modifying_mapblock_locker: self.complete_modifying = True self.modifying_completion_condition.notify() def set_device(self, device): self.device.fx = device.fx self.device.fy = device.fy self.device.cx = device.cx self.device.cy = device.cy self.device.distortion = device.distortion return self def load_device(self, device_path): def parseCalibratedDevice(fn_yaml): import yaml ret = {} skip_lines = 1 with open(fn_yaml) as f: f.readline() content = f.read() parsed = yaml.load(content, Loader=yaml.FullLoader) return parsed parsed = parseCalibratedDevice(device_path) parsed_camera = parsed["Camera"] self.device.fx = parsed_camera["fx"] self.device.fy = parsed_camera["fy"] self.device.cx = parsed_camera["cx"] self.device.cy = parsed_camera["cy"] self.device.distortion = parsed_camera["distortion"] return self def add(self, entity): if isinstance(entity, Frame): if len(self._frames) == 0: entity.mark_as_first() # self.add_new_key_frame(entity) self._frames.append(entity) else: raise ValueError("expect entity to be type of %s but found %s" % (str(self.__class__), str(type(entity)))) def get_frames(self): return self._frames def findFrame(self, frame_key): try: # return self._frames[frame_key - 1] for frame in self._frames: if frame.seq == frame_key: return frame raise Exception("Not found!") except Exception as e: print("map frames:", self._frames) print("query frame key:", frame_key) raise (e) # @todo : TODO def get_active_frames(self): # @todo : TODO modify active frames list return self.active_frames # @todo : TODO def add_new_key_frame(self, entity): if isinstance(entity, Frame): push(self.active_frames, entity) else: raise TypeError("expect type of entity to be Frame, but found %s" % type(entity)) # landmarks registration def register(self, detection): self.landmarks[detection.seq] = detection # keypoints registration def registerKeyPoints(self, cur_frame, kps1, last_frame, kps2, cur_cam_pts3, last_cam_pts3, points, mtched, mask): H, W = cur_frame.img.shape[0:2] newly_add = [] numOfMatched = 0 for i, mtch in enumerate(mtched): left_idx = mtch.queryIdx right_idx = mtch.trainIdx # kp observed by left_px using triangulation kp = points[i] if kp.isBad(): continue # x - columns # y - rows (x1, y1) = kps1[left_idx].pt (x2, y2) = kps2[right_idx].pt left_px_idx = int(y1 * W + x1) right_px_idx = int(y2 * W + x2) # retrieve stored key points left_px = cur_frame.pixels[left_px_idx] right_px = last_frame.pixels[right_px_idx] # check whether kp is already observed by right_px mappoint = right_px.findObservationOf(kp) if mappoint is not None: # numOfMatched += 1 try: left_px.add_camera_point(cur_cam_pts3[i]) except AttributeError: left_px.add(cur_cam_pts3[i]) print("point %s is already observed by mappoint %s" % (kp, mappoint)) try: mappoint.associate_with(cur_frame, left_px_idx) except Exception as e: print(e) # exit(-1) pass # processing ROI landmark = left_px.roi.findParent() try: landmark.associate_with(mappoint, cur_frame, left_px_idx) except Exception as e: print(e) # exit(-1) pass try: landmark.associate_with(mappoint, last_frame, right_px_idx) except: print(e) # exit(-1) pass else: # try: left_px.add_camera_point(cur_cam_pts3[i]) except AttributeError: left_px.add(cur_cam_pts3[i]) cur_cam_pts3[i].px = left_px cur_cam_pts3[i].world = kp try: right_px.add_camera_point(last_cam_pts3[i]) except AttributeError: right_px.add(last_cam_pts3[i]) last_cam_pts3[i].px = right_px last_cam_pts3[i].world = kp # upgrade from camera point to map key point try: kp.associate_with(cur_frame, left_px_idx) # print("Associate %s with #F%d.%s at <%d>" % (kp, cur_frame.seq, left_px, left_px_idx)) kp.associate_with(last_frame, right_px_idx) # print("Associate %s with #F%d.%s at <%d>" % (kp, last_frame.seq, right_px, right_px_idx)) except Exception as e: print(e) raise (e) # compute an uuid key for registration # This varies from one programe to another. For example, if a program # retrieves it from tiles, a key is computed by uint64(tileid) << 32 | seq_id key = self.get_point3d_key(cur_frame.seq, i) kp.key = key # set RGB color for rendering # color = cur_frame.img[int(y1), int(x1)] # kp.set_color(color) # store it in the map block instance # @todo : TODO octree check # store the point into a concurrent hash map. Note since in python an iterator # is guarded by Global Interpretor Lock (GIL) so that to be executed by only one thread, we can # directly use it but suffer from performance issues. self.keypointCloud[key] = kp newly_add.append(kp) # Processing ROI landmark = left_px.roi.findParent() landmark.associate_with(kp, cur_frame, left_px_idx) landmark.associate_with(kp, last_frame, right_px_idx) # print("Associate KeyPoint %s with RoI %s" % (kp, landmark)) landmark.add(kp) kp.set_color(np.array(landmark.color or [1., 1., 0.]) * 255.) if DEBUG: print( "Newly added world points: %d; Points matched to point cloud: %d" % (len(newly_add), numOfMatched)) def get_point3d_key(self, tile_id, seq_id): if tile_id is None: tile_id = 0. return tile_id << 32 | seq_id # @todo : TODO def track(self, frame, detected_objects): if frame.is_First: self.logger.info( "Add %d detected objects to initialize landmarks" % len(detected_objects)) for ob in detected_objects: self.landmarks[ob.seq] = ob return tuple() else: trackList = self.trackList() for landmark in trackList: landmark.predict(frame) matcher = None if frame.matchers.get('ROIMacher', None) is None: matcher = ROIMatcher() frame.matchers['ROIMacher'] = matcher N = len(trackList) M = len(detected_objects) # solving N*M assignment matrix using KM algorithm mtched_indice, unmtched_landmarks_indice, unmtched_detections_indice = matcher.mtch( trackList, detected_objects) print("%d mtches, %d unmtched landmarks, %d unmtched detections" % (len(mtched_indice), len(unmtched_landmarks_indice), len(unmtched_detections_indice))) mtched, unmtched_landmarks, unmtched_detections = [], [], [] # trigger relocalization if len(mtched_indice) == 0: # Not very stable # raise RelocalizationErr() pass # mtched List<Tuple> of (row,col,weights[row,col],distance[row,col])) for match in mtched_indice: landmark = trackList[match[0]] detection = detected_objects[match[1]] detection.parent = landmark landmark.records.append((detection, detection.frame.seq, landmark.predicted_states)) landmark.update(detection) mtched.append((landmark, detection)) # mark unmtched_landmarks # @todo : TODO for idx in unmtched_landmarks_indice: landmark = trackList[idx] unmtched_landmarks.append(landmark) # add unmtched_detections to landmarks l = len(unmtched_detections_indice) if l > 0: self.logger.info("Adding %d new landmarks" % l) for j in unmtched_detections_indice: detection = detected_objects[j] if self.landmarks.get(detection.seq, None) is not None: raise ValueError( "The detection has already been registered") self.landmarks[detection.seq] = detection unmtched_detections.append(detection) else: self.logger.info("No new landmarks found.") # do something with the mtched if DEBUG: # rendering the matches pass return mtched, unmtched_landmarks, unmtched_detections def trackKeyPoints(self, cur_frame, kps, kps_features, last_frame=None, matches1to2=None): if cur_frame.is_First: self.logger.info( "Add %d extracted keypoints to initialize key points cloud" % len(kps)) H, W = cur_frame.img.shape[0:2] for i, kp in enumerate(kps): # create a world, though we don't know depth infomation and camera poses world = WorldPoint3D(-1, -1, -1) # just for demo, don't need to worry about it world.type = "world" # see cv2::KeyPoint for details x, y = kp.pt px_idx = int(y * W + x) px = cur_frame.pixels[px_idx] world.associate_with(cur_frame, px_idx) local = CameraPoint3D(-1, -1, -1) local.world = world local.px = px try: px.add_camera_point(local) except AttributeError: px.add(local) else: # @todo : TODO compare with map points directly raise Exception("Not Implemented Yet!") pass pass def trackList(self): _trackList = [] for key, landmark in self.landmarks.items(): if landmark.is_active() or landmark.viewable(): _trackList.append(landmark) self.logger.info("Retrieve %d active and viewable landmarks" % len(_trackList)) print(self.landmarks) return _trackList # @todo : TODO def trackPointsList(self): return self.keypointCloud def Culling(self): trackList = self.trackList() for landmark in trackList: landmark.culling() pass def UpdateMap(self, R, t): print("Updating matrix: \nR:\n%s\nt:\n%s\n" % (R, t)) # Updating points coordinates pointCloud = self.keypointCloud for k, p in pointCloud.items(): v = p.data.reshape((3, 1)) v = R.dot(v) + t v = v.reshape((3, )) # self.logger.info("Updating points from %s to %s" % ( # p.data, # v # )) p.update(*v) # Updating poses frames = self.get_active_frames() for frame in frames: R0 = R.dot(frame.R0) t0 = R.dot(frame.t0) + t print("Updating pose: \nR:\n%s\nt:\n%s\n" % (R0, t0)) pose = g2o.SE3Quat(R0, t0.reshape((3, ))) frame.update_pose(pose) pass pass # @todo : TODO def Merge(self, other_map_block, matched_rois, unmtched_rois, matched_points, unmatched_points): other = other_map_block # add unseen points for k, p in unmatched_points.items(): if self.keypointCloud.get(k, None) is not None: raise ValueError("Wrong Value!") self.keypointCloud[k] = p # add unseen landmarks for roi in unmtched_rois: if self.landmarks.get(roi.seq, None) is not None: raise ValueError("The roi has already been registered!") self.landmarks[roi.seq] = roi return self