def _get_gauss_response(self, img, gt): # get the shape of the image.. height, width = img.shape # get the mesh grid... xx, yy = np.meshgrid(np.arange(width), np.arange(height)) # get the center of the object... # 得到选定的目标区域的中心点坐标 center_x = gt[0] + 0.5 * gt[2] center_y = gt[1] + 0.5 * gt[3] # cal the distance... # 创建一个以选定的目标中点为中心,且符合二维高斯分布的响应矩阵,矩阵大小等于原图像 img 的大小 # 原始的二维高斯函数中,方差有两个: sigmaX 和 sigmaY,其中 sigmaX 为 x 方向的方差,sigmaY 为 y 方向的方差 # 不过这里取相同的值,使得二维高斯模型在平面上的投影就是一个圆形,意思是与目标中心 (x0, y0) 的距离一样的点的权重是一样的, # 如果取不一样的值,那么投影为一个椭圆形,距离目标中心会得到不一样的权重 exponent = (np.square(xx - center_x) + np.square(yy - center_y)) / (2 * self.args.sigma) # get the response map... # 获取到高斯响应矩阵 response = np.exp(-exponent) # normalize... # 对响应矩阵进行归一化处理: (x - min) / (max - min) response = linear_mapping(response) return response
def track(self, current_frame): # for idx in range(len(frame_list)): frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) # import ipdb;ipdb.set_trace() Hi = self.Ai / self.Bi fi = frame_gray[self.pos[1]:self.pos[1] + self.pos[3], self.pos[0]:self.pos[0] + self.pos[2]] fi = pre_process(fi) Gi = Hi * np.fft.fft2(fi) gi = linear_mapping(np.fft.ifft2(Gi)) # find the max self.pos... max_pos = np.unravel_index(np.argmax(gi, axis=None), gi.shape) # update the position... self.pos[1] += max_pos[0] - gi.shape[0] // 2 self.pos[0] += max_pos[1] - gi.shape[1] // 2 # get the current fi.. fi = frame_gray[self.pos[1]:self.pos[1] + self.pos[3], self.pos[0]:self.pos[0] + self.pos[2]] fi = pre_process(fi) # online update... self.Ai = self.learning_rate * (self.G * np.conjugate( np.fft.fft2(fi))) + (1 - self.learning_rate) * self.Ai self.Bi = self.learning_rate * (np.fft.fft2(fi) * np.conjugate( np.fft.fft2(fi))) + (1 - self.learning_rate) * self.Bi return self.pos
def _get_gauss_response(self, img, gt): # get the shape of the image.. height, width = img.shape # get the mesh grid... xx, yy = np.meshgrid(np.arange(width), np.arange(height)) # get the center of the object... center_x = gt[0] + 0.5 * gt[2] center_y = gt[1] + 0.5 * gt[3] # cal the distance... dist = (np.square(xx - center_x) + np.square(yy - center_y)) / (2 * self.args.sigma) # get the response map... response = np.exp(-dist) # normalize... response = linear_mapping(response) return response
def _get_gauss_response(self, img, gt): # get the shape of the image.. height, width = img.shape # get the mesh grid... xx, yy = np.meshgrid(np.arange(width), np.arange(height)) #获得height行x可能的取值,width列y可能的取值 # get the center of the object... center_x = gt[0] + 0.5 * gt[2] #获取矩形框中心x坐标 center_y = gt[1] + 0.5 * gt[3] #获得矩形框中心y坐标 # cal the distance... dist = (np.square(xx - center_x) + np.square(yy - center_y)) / (2 * self.args.sigma) #记录图片上每个点坐标到中心距离平方从除以2sigama # get the response map... response = np.exp(-dist) #e^-dist # normalize... response = linear_mapping(response) return response
def start_tracking(self): time_list = [] # get the image of the first frame... (read as gray scale image...) init_img = cv2.imread(self.frame_lists[0]) init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY) init_frame = init_frame.astype(np.float32) # get the init ground truth.. [x, y, width, height] init_gt = cv2.selectROI('demo', init_img, False, False) init_gt = np.array(init_gt).astype(np.int64) # start to draw the gaussian response... response_map = self._get_gauss_response(init_frame, init_gt) # start to create the training set ... # get the goal.. g = response_map[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] fi = init_frame[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] G = np.fft.fft2(g) # start to do the pre-training... Ai, Bi = self._pre_training(fi, G) # start the tracking... i=0 for idx in range(len(self.frame_lists)): start = time.time() current_frame = cv2.imread(self.frame_lists[idx]) frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) if idx == 0: Ai = self.args.lr * Ai Bi = self.args.lr * Bi pos = init_gt.copy() clip_pos = np.array([pos[0], pos[1], pos[0]+pos[2], pos[1]+pos[3]]).astype(np.int64) else: Hi = Ai / Bi fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) Gi = Hi * np.fft.fft2(fi) gi = linear_mapping(np.fft.ifft2(Gi)) # find the max pos... max_value = np.max(gi) max_pos = np.where(gi == max_value) dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2) dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2) # update the position... pos[0] = pos[0] + dx pos[1] = pos[1] + dy # trying to get the clipped position [xmin, ymin, xmax, ymax] clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1]) clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0]) clip_pos[2] = np.clip(pos[0]+pos[2], 0, current_frame.shape[1]) clip_pos[3] = np.clip(pos[1]+pos[3], 0, current_frame.shape[0]) clip_pos = clip_pos.astype(np.int64) # get the current fi.. fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) # online update... Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi # visualize the tracking process... cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0]+pos[2], pos[1]+pos[3]), (255, 0, 0), 2) #out.write(current_frame) cv2.imshow('demo11', current_frame) cv2.imwrite('goog/'+str(i)+'.jpg', current_frame) i += 1 cv2.waitKey(10) # if record... save the frames.. # if self.args.record: # frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/' # if not os.path.exists(frame_path): # os.mkdir(frame_path) # cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame) # #out.write(current_frame) end = time.time() time_list.append(end-start) out.release() print('视频写入成功!') return time_list
(255, 255, 255), 2) # white # write down the bounding box location fm.write(str(lpos)) fm.write('\n') else: # mosse Hi = Ai / Bi # subWindow fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] # keep win size unchanged fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) Gi = Hi * np.fft.fft2(fi) gi = linear_mapping(np.fft.ifft2(Gi)) # find the max pos max_value = np.max(gi) max_pos = np.where(gi == max_value) dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2) dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2) # update the position pos[0] = pos[0] + dx pos[1] = pos[1] + dy # trying to get the clipped position [xmin, ymin, xmax, ymax] clip_pos[0] = np.clip(pos[0], 0, frame.shape[1]) clip_pos[1] = np.clip(pos[1], 0, frame.shape[0]) clip_pos[2] = np.clip(pos[0] + pos[2], 0, frame.shape[1]) clip_pos[3] = np.clip(pos[1] + pos[3], 0, frame.shape[0]) clip_pos = clip_pos.astype(np.int64) # get the next fi using the new bounding box
def start_tracking(self): # get the image of the first frame... (read as gray scale image...) init_img = cv2.imread(self.frame_lists[0]) init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY) init_frame = init_frame.astype(np.float32) # get the init ground truth.. [x, y, width, height] init_gt = cv2.selectROI('demo', init_img, False, False) init_gt = np.array(init_gt).astype(np.int64) # start to draw the gaussian response... response_map = self._get_gauss_response(init_frame, init_gt) # start to create the training set ... # get the goal.. g = response_map[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] fi = init_frame[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] G = np.fft.fft2(g) # start to do the pre-training... Ai, Bi = self._pre_training(fi, G) # start the tracking... for idx in range(len(self.frame_lists)): current_frame = cv2.imread(self.frame_lists[idx]) frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) if idx == 0: Ai = self.args.lr * Ai Bi = self.args.lr * Bi pos = init_gt.copy() clip_pos = np.array([pos[0], pos[1], pos[0]+pos[2], pos[1]+pos[3]]).astype(np.int64) else: Hi = Ai / Bi fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) Gi = Hi * np.fft.fft2(fi) gi = linear_mapping(np.fft.ifft2(Gi)) # find the max pos... max_value = np.max(gi) max_pos = np.where(gi == max_value) dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2) dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2) # update the position... pos[0] = pos[0] + dx pos[1] = pos[1] + dy # trying to get the clipped position [xmin, ymin, xmax, ymax] clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1]) clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0]) clip_pos[2] = np.clip(pos[0]+pos[2], 0, current_frame.shape[1]) clip_pos[3] = np.clip(pos[1]+pos[3], 0, current_frame.shape[0]) clip_pos = clip_pos.astype(np.int64) # get the current fi.. fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) # online update... Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi # visualize the tracking process... cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0]+pos[2], pos[1]+pos[3]), (255, 0, 0), 2) cv2.imshow('demo', current_frame) cv2.waitKey(100) # if record... save the frames.. if self.args.record: frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/' if not os.path.exists(frame_path): os.mkdir(frame_path) cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame)
def start_tracking(self): # get the image of the first frame... (read as gray scale image...) init_img = cv2.imread(self.frame_lists[0]) init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY) init_frame = init_frame.astype(np.float32) # get the init ground truth.. [x, y, width, height] init_gt = cv2.selectROI('demo', init_img, False, False) #选取图片中的部分,不用准星,不从随鼠标自动扩展 init_gt = np.array(init_gt).astype(np.int64) #获得选取的矩形框的坐标 # start to draw the gaussian response... response_map = self._get_gauss_response(init_frame, init_gt) # start to create the training set ... # get the goal.. g = response_map[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] #得到高斯化之后矩形框中内容 fi = init_frame[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]] #原图中矩形框内容 G = np.fft.fft2(g) #得到理想输出模板在频域中的响应 # start to do the pre-training... Ai, Bi = self._pre_training(fi, G) #第一帧的滤波器get # start the tracking... time=[] for idx in range(len(self.frame_lists)): start = cv2.getTickCount() current_frame = cv2.imread(self.frame_lists[idx]) frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) if idx == 0: Ai = self.args.lr * Ai #权值 Bi = self.args.lr * Bi pos = init_gt.copy() clip_pos = np.array([pos[0], pos[1], pos[0]+pos[2], pos[1]+pos[3]]).astype(np.int64) else: Hi = Ai / Bi #得到上一帧滤波器 fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] #获取上一帧锁定位置 fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) Gi = Hi * np.fft.fft2(fi) #理想输出模板获取 gi = linear_mapping(np.fft.ifft2(Gi)) #转到空域 # find the max pos... max_value = np.max(gi) #找到相应最大的值 max_pos = np.where(gi == max_value) #响应最大的坐标 dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2) dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2) # update the position... pos[0] = pos[0] + dx #确定新的锁定框的x坐标 pos[1] = pos[1] + dy #确定新的锁定框的y坐标 # trying to get the clipped position [xmin, ymin, xmax, ymax] clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1]) clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0]) clip_pos[2] = np.clip(pos[0]+pos[2], 0, current_frame.shape[1]) clip_pos[3] = np.clip(pos[1]+pos[3], 0, current_frame.shape[0]) #控制矩形框不要超出图片范围 clip_pos = clip_pos.astype(np.int64) # get the current fi.. fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) #根据当前选中的框大小调整滤波器的大小 # online update... Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi #加权下一帧滤波器 # visualize the tracking process... cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0]+pos[2], pos[1]+pos[3]), (255, 0, 0), 2) #圈出锁定的目标 cv2.imshow('demo', current_frame) cv2.waitKey(100) # if record... save the frames.. if self.args.record: frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/' if not os.path.exists(frame_path): os.mkdir(frame_path) cv2.imwrite(frame_path + str(idx).zfill(5) + '.jpg', current_frame) end = cv2.getTickCount() time.append((end - start) / cv2.getTickFrequency()*1000) print(time) print(np.mean(time))
def start_tracking(self): # get the image of the first frame... (read as gray scale image...) # 读取到初始的第一帧图像,然后将图像由 BGR 转变为 GRAY 图像 init_img = cv2.imread(self.frame_lists[0]) init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY) init_frame = init_frame.astype(np.float32) # get the init ground truth.. [x, y, width, height] # 这里通过手工框出想要选择的目标区域 [x, y, width, height],其中 x 和 y 表示的是目标区域左上角顶点的坐标 init_gt = cv2.selectROI('demo', init_img, False, False) init_gt = np.array(init_gt).astype(np.int64) # start to draw the gaussian response... # 得到高斯响应图(输入原始图像以及目标区域的位置 [x, y, width, height])返回高斯函数矩阵,在选定的目标框的中心,其值最大 response_map = self._get_gauss_response(init_frame, init_gt) # start to create the training set ... # get the goal... # 抽取高斯响应矩阵,矩阵的大小和选中的 ROI 的大小相同。 g = response_map[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]] # 抽取目标区域的图像 fi = init_frame[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]] # 对目标区域的高斯响应图做快速傅立叶变换 G = np.fft.fft2(g) # 做滤波器的预训练 # start to do the pre-training... Ai, Bi = self._pre_training(fi, G) # start the tracking... for idx in range(len(self.frame_lists)): current_frame = cv2.imread(self.frame_lists[idx]) frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) if idx == 0: Ai = self.args.lr * Ai Bi = self.args.lr * Bi pos = init_gt.copy() # pos 的内容是 [leftX, topY, roi width, roi height] clip_pos = np.array([pos[0], pos[1], pos[0] + pos[2], pos[1] + pos[3]]).astype(np.int64) else: ''' 在当前帧中,使用上一帧更新后的搜索区域 (clip_pos) 在本帧中截取相同的位置,使用过滤器与截取区域执行相关操作 相关性最大的位置就是响应最大值的位置,然后更新过滤器 (Ai, Bi),更新搜索区域 (clip_pos)。 ''' # Ai 和 Bi 在上一帧中已经更新了,现在重新计算出滤波模板 Hi Hi = Ai / Bi fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) # 使用 Hi 和 fi 进行卷积操作,得到一个响应矩阵 Gi = Hi * np.fft.fft2(fi) # 对于频域下的 Gi 进行逆傅立叶变换得到实际的 gi gi = linear_mapping(np.fft.ifft2(Gi)) # 找到响应矩阵 gi 中的最大值 max_value = np.max(gi) # 获取到 gi 中最大值的坐标,这个位置就是当前帧中被跟踪目标的坐标,只不过这个坐标是相对于 gi,也就是目标区域而言的 max_pos = np.where(gi == max_value) # gi.shape[0] / 2 就是上一个目标的 y 坐标,也是相对于 gi 这个区域而言,相减得到的 dy 就是当前目标与上一个目标在 y 方向的偏移量 dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2) # gi.shape[1] / 2 就是上一个目标的 x 坐标,也是相对于 gi 这个区域而言,相减得到的 dx 就是当前目标与上一个目标在 x 方向的偏移量 dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2) # update the position... # pos 的内容是 [leftX, topY, roi width, roi height],也就是 roi 目标框左上角的坐标与目标框的宽 width 和高 height # 这里只是单纯的将 roi 目标框左上角的坐标进行移动,而对 roi 的长和宽不进行修改,因此 mosse 滤波无法处理跟踪目标的大 # 小发生变化的情况 pos[0] = pos[0] + dx pos[1] = pos[1] + dy # trying to get the clipped position [xmin, ymin, xmax, ymax] # clip_pos 表示的是在这一帧中,目标区域的新位置 [leftX, topY, rightX, bottomY] clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1]) clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0]) clip_pos[2] = np.clip(pos[0] + pos[2], 0, current_frame.shape[1]) clip_pos[3] = np.clip(pos[1] + pos[3], 0, current_frame.shape[0]) clip_pos = clip_pos.astype(np.int64) # get the current fi.. fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) # online update... # 在线更新 Ai, Bi # 这里的 lr 就是 learning rate,学习率,加入 lr 可以使得模型更加重视最近的帧,并且使得先前的帧的效果随时间衰减 Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi # visualize the tracking process... cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0] + pos[2], pos[1] + pos[3]), (0, 0, 255), 2) cv2.imshow('demo', current_frame) cv2.waitKey(100) # if record... save the frames.. if self.args.record: frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/' if not os.path.exists(frame_path): os.mkdir(frame_path) cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame)
def start_tracking(self, coords, frame_num, end_frame_num, bounding_box, output_bbox, Video=None, show_prediction=True, show_timing=True): # get the image of the first frame... (read as gray scale image...) init_img = cv2.imread(self.frame_lists[frame_num]) init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY) init_frame = init_frame.astype(np.float32) # get the init ground truth.. [x, y, width, height] init_gt = coords init_gt = np.array(init_gt).astype(np.int64) # start to draw the gaussian response... response_map = self._get_gauss_response(init_frame, init_gt) # start to create the training set ... # get the goal.. g = response_map[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]] fi = init_frame[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]] G = np.fft.fft2(g) # start to do the pre-training... Ai, Bi = self._pre_training(fi, G) # start the tracking... initial = True x = end_frame_num if end_frame_num > len(self.frame_lists): x = len(self.frame_lists) for idx in range(frame_num, x): start = time.time() current_frame = cv2.imread(self.frame_lists[idx]) frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) if initial: Ai = self.args.lr * Ai Bi = self.args.lr * Bi pos = init_gt.copy() clip_pos = np.array( [pos[0], pos[1], pos[0] + pos[2], pos[1] + pos[3]]).astype(np.int64) initial = False else: Hi = Ai / Bi fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) Gi = Hi * np.fft.fft2(fi) gi = linear_mapping(np.fft.ifft2(Gi)) # find the max pos... max_value = np.max(gi) max_pos = np.where(gi == max_value) dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2) dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2) # update the position... pos[0] = pos[0] + dx pos[1] = pos[1] + dy # trying to get the clipped position [xmin, ymin, xmax, ymax] clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1]) clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0]) clip_pos[2] = np.clip(pos[0] + pos[2], 0, current_frame.shape[1]) clip_pos[3] = np.clip(pos[1] + pos[3], 0, current_frame.shape[0]) clip_pos = clip_pos.astype(np.int64) # get the current fi.. fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) # online update... Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + ( 1 - self.args.lr) * Ai Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate( np.fft.fft2(fi))) + (1 - self.args.lr) * Bi if show_timing: print('[INFO] MOSSE took {} seconds'.format(time.time() - start)) # visualize the tracking process... if show_prediction: cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0] + pos[2], pos[1] + pos[3]), (255, 0, 0), 6) plt.figure(figsize=( 15, 4)) #change figure size here. native aspect ratio is 16:9 plt.imshow(cv2.cvtColor(current_frame, cv2.COLOR_BGR2RGB)) plt.show() if Video is not None: Video.write(current_frame) #Save bounding boxes to the array pos_list = [pos[0], pos[1], pos[2], pos[3]] bounding_box.append(pos_list) #print(bounding_box) # if record... save the frames.. if self.args.record: frame_path = 'record_frames/' + self.img_path.split( '/')[1] + '/' if not os.path.exists(frame_path): os.mkdir(frame_path) cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame) return bounding_box
def start_tracking(self): # get the image of the first frame... (read as gray scale image...) init_img = cv2.imread(self.frame_lists[0]) init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY) init_frame = init_frame.astype(np.float32) # get the init ground truth.. [x, y, width, height] init_gt = cv2.selectROI('demo', init_img, False, False) init_gt = np.array(init_gt).astype(np.int64) print(init_gt) # start to draw the gaussian response... response_map = self._get_gauss_response(init_frame, init_gt) # start to create the training set ... # get the goal.. g = response_map[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]] fi = init_frame[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]] #cv2.imshow('init_frame', init_frame) #cv2.imshow('fi', fi) #cv2.waitKey(100) G = np.fft.fft2(g) # start to do the pre-training... Ai, Bi = self._pre_training(fi, G) # start the tracking... for idx in range(len(self.frame_lists)): current_frame = cv2.imread(self.frame_lists[idx]) frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) frame_gray = frame_gray.astype(np.float32) if idx == 0: Ai = self.args.lr * Ai Bi = self.args.lr * Bi pos = init_gt.copy() clip_pos = np.array( [pos[0], pos[1], pos[0] + pos[2], pos[1] + pos[3]]).astype(np.int64) #print("pos and clip pos are : ") #print(pos) #print(clip_pos) #print("Ai and Bi are") #print(Ai, Bi) else: Hi = Ai / Bi fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] if (idx == 1): print("frame gray and fi before preprocessing") #cv2.imshow("frame_gray", frame_gray) print(fi) fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) if (idx == 1): print("fi after preproc:") print(fi) Gi = Hi * np.fft.fft2(fi) gi = linear_mapping(np.fft.ifft2(Gi)) # find the max pos... max_value = np.max(gi) max_pos = np.where(gi == max_value) dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2) dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2) if (idx == 1): print("gi = ") print(gi) print(max_value) print("max_pos = ") print(max_pos) print(np.mean(max_pos[0])) print(gi.shape[0]) print(max_pos[1]) print(gi.shape[0]) print(dy) print(dx) # update the position... pos[0] = pos[0] + dx pos[1] = pos[1] + dy # trying to get the clipped position [xmin, ymin, xmax, ymax] clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1]) clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0]) clip_pos[2] = np.clip(pos[0] + pos[2], 0, current_frame.shape[1]) clip_pos[3] = np.clip(pos[1] + pos[3], 0, current_frame.shape[0]) clip_pos = clip_pos.astype(np.int64) # get the current fi.. fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]] fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3]))) # online update... Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + ( 1 - self.args.lr) * Ai Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate( np.fft.fft2(fi))) + (1 - self.args.lr) * Bi # visualize the tracking process... cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0] + pos[2], pos[1] + pos[3]), (255, 0, 0), 2) cv2.imshow('demo', current_frame) cv2.waitKey(100) # if record... save the frames.. if self.args.record: frame_path = 'record_frames/' + self.img_path.split( '/')[1] + '/' if not os.path.exists(frame_path): os.mkdir(frame_path) cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame)