コード例 #1
0
    def _get_gauss_response(self, img, gt):
        # get the shape of the image..
        height, width = img.shape
        # get the mesh grid...
        xx, yy = np.meshgrid(np.arange(width), np.arange(height))

        # get the center of the object...
        # 得到选定的目标区域的中心点坐标
        center_x = gt[0] + 0.5 * gt[2]
        center_y = gt[1] + 0.5 * gt[3]

        # cal the distance...
        # 创建一个以选定的目标中点为中心,且符合二维高斯分布的响应矩阵,矩阵大小等于原图像 img 的大小
        # 原始的二维高斯函数中,方差有两个: sigmaX 和 sigmaY,其中 sigmaX 为 x 方向的方差,sigmaY 为 y 方向的方差
        # 不过这里取相同的值,使得二维高斯模型在平面上的投影就是一个圆形,意思是与目标中心 (x0, y0) 的距离一样的点的权重是一样的,
        # 如果取不一样的值,那么投影为一个椭圆形,距离目标中心会得到不一样的权重
        exponent = (np.square(xx - center_x) + np.square(yy - center_y)) / (2 * self.args.sigma)
        # get the response map...
        # 获取到高斯响应矩阵
        response = np.exp(-exponent)

        # normalize...
        # 对响应矩阵进行归一化处理: (x - min) / (max - min)
        response = linear_mapping(response)
        return response
コード例 #2
0
    def track(self, current_frame):
        # for idx in range(len(frame_list)):
        frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
        frame_gray = frame_gray.astype(np.float32)

        # import ipdb;ipdb.set_trace()
        Hi = self.Ai / self.Bi
        fi = frame_gray[self.pos[1]:self.pos[1] + self.pos[3],
                        self.pos[0]:self.pos[0] + self.pos[2]]
        fi = pre_process(fi)
        Gi = Hi * np.fft.fft2(fi)
        gi = linear_mapping(np.fft.ifft2(Gi))

        # find the max self.pos...
        max_pos = np.unravel_index(np.argmax(gi, axis=None), gi.shape)

        # update the position...
        self.pos[1] += max_pos[0] - gi.shape[0] // 2
        self.pos[0] += max_pos[1] - gi.shape[1] // 2

        # get the current fi..
        fi = frame_gray[self.pos[1]:self.pos[1] + self.pos[3],
                        self.pos[0]:self.pos[0] + self.pos[2]]
        fi = pre_process(fi)

        # online update...
        self.Ai = self.learning_rate * (self.G * np.conjugate(
            np.fft.fft2(fi))) + (1 - self.learning_rate) * self.Ai
        self.Bi = self.learning_rate * (np.fft.fft2(fi) * np.conjugate(
            np.fft.fft2(fi))) + (1 - self.learning_rate) * self.Bi

        return self.pos
コード例 #3
0
ファイル: mosse.py プロジェクト: liuxianyi/goog_opencv
 def _get_gauss_response(self, img, gt):
     # get the shape of the image..
     height, width = img.shape
     # get the mesh grid...
     xx, yy = np.meshgrid(np.arange(width), np.arange(height))
     # get the center of the object...
     center_x = gt[0] + 0.5 * gt[2]
     center_y = gt[1] + 0.5 * gt[3]
     # cal the distance...
     dist = (np.square(xx - center_x) + np.square(yy - center_y)) / (2 * self.args.sigma)
     # get the response map...
     response = np.exp(-dist)
     # normalize...
     response = linear_mapping(response)
     return response
コード例 #4
0
 def _get_gauss_response(self, img, gt):
     # get the shape of the image..
     height, width = img.shape
     # get the mesh grid...
     xx, yy = np.meshgrid(np.arange(width), np.arange(height))
     # get the center of the object...
     center_x = gt[0] + 0.5 * gt[2]
     center_y = gt[1] + 0.5 * gt[3]
     # cal the distance...
     dist = (np.square(xx - center_x) + np.square(yy - center_y)) / (2 * self.args.sigma)
     # get the response map...
     response = np.exp(-dist)
     # normalize...
     response = linear_mapping(response)
     return response
コード例 #5
0
ファイル: mosse.py プロジェクト: sgdy3/MOSSE
 def _get_gauss_response(self, img, gt):
     # get the shape of the image..
     height, width = img.shape
     # get the mesh grid...
     xx, yy = np.meshgrid(np.arange(width), np.arange(height))     #获得height行x可能的取值,width列y可能的取值
     # get the center of the object...
     center_x = gt[0] + 0.5 * gt[2]    #获取矩形框中心x坐标
     center_y = gt[1] + 0.5 * gt[3]    #获得矩形框中心y坐标
     # cal the distance...
     dist = (np.square(xx - center_x) + np.square(yy - center_y)) / (2 * self.args.sigma)    #记录图片上每个点坐标到中心距离平方从除以2sigama
     # get the response map...
     response = np.exp(-dist)  #e^-dist
     # normalize...
     response = linear_mapping(response)
     return response
コード例 #6
0
ファイル: mosse.py プロジェクト: liuxianyi/goog_opencv
    def start_tracking(self):

        time_list = []

        # get the image of the first frame... (read as gray scale image...)
        init_img = cv2.imread(self.frame_lists[0])
        init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY)
        init_frame = init_frame.astype(np.float32)
        # get the init ground truth.. [x, y, width, height]
        init_gt = cv2.selectROI('demo', init_img, False, False)
        init_gt = np.array(init_gt).astype(np.int64)
        # start to draw the gaussian response...
        response_map = self._get_gauss_response(init_frame, init_gt)
        # start to create the training set ...
        # get the goal..
        g = response_map[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]]
        fi = init_frame[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]]
        G = np.fft.fft2(g)
        # start to do the pre-training...
        Ai, Bi = self._pre_training(fi, G)
        # start the tracking...
        i=0
        for idx in range(len(self.frame_lists)):
            start = time.time()
            current_frame = cv2.imread(self.frame_lists[idx])
            frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
            frame_gray = frame_gray.astype(np.float32)
            if idx == 0:
                Ai = self.args.lr * Ai
                Bi = self.args.lr * Bi
                pos = init_gt.copy()
                clip_pos = np.array([pos[0], pos[1], pos[0]+pos[2], pos[1]+pos[3]]).astype(np.int64)
            else:
                Hi = Ai / Bi
                fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                Gi = Hi * np.fft.fft2(fi)
                gi = linear_mapping(np.fft.ifft2(Gi))
                # find the max pos...
                max_value = np.max(gi)
                max_pos = np.where(gi == max_value)
                dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2)
                dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2)
                
                # update the position...
                pos[0] = pos[0] + dx
                pos[1] = pos[1] + dy

                # trying to get the clipped position [xmin, ymin, xmax, ymax]
                clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1])
                clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0])
                clip_pos[2] = np.clip(pos[0]+pos[2], 0, current_frame.shape[1])
                clip_pos[3] = np.clip(pos[1]+pos[3], 0, current_frame.shape[0])
                clip_pos = clip_pos.astype(np.int64)

                # get the current fi..
                fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                # online update...
                Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai
                Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi
            
            # visualize the tracking process...
            cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0]+pos[2], pos[1]+pos[3]), (255, 0, 0), 2)

            #out.write(current_frame)

            cv2.imshow('demo11', current_frame)
            cv2.imwrite('goog/'+str(i)+'.jpg', current_frame)
            i += 1
            cv2.waitKey(10)
            # if record... save the frames..
            # if self.args.record:
            #     frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/'
            #     if not os.path.exists(frame_path):
            #         os.mkdir(frame_path)
            #     cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame)
            #     #out.write(current_frame)

            end = time.time()
            time_list.append(end-start)
        out.release()
        print('视频写入成功!')
        return time_list
コード例 #7
0
                    (255, 255, 255), 2)  # white

                # write down the bounding box location
                fm.write(str(lpos))
                fm.write('\n')

            else:
                # mosse
                Hi = Ai / Bi
                # subWindow
                fi = frame_gray[clip_pos[1]:clip_pos[3],
                                clip_pos[0]:clip_pos[2]]
                # keep win size unchanged
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                Gi = Hi * np.fft.fft2(fi)
                gi = linear_mapping(np.fft.ifft2(Gi))
                # find the max pos
                max_value = np.max(gi)
                max_pos = np.where(gi == max_value)
                dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2)
                dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2)
                # update the position
                pos[0] = pos[0] + dx
                pos[1] = pos[1] + dy
                # trying to get the clipped position [xmin, ymin, xmax, ymax]
                clip_pos[0] = np.clip(pos[0], 0, frame.shape[1])
                clip_pos[1] = np.clip(pos[1], 0, frame.shape[0])
                clip_pos[2] = np.clip(pos[0] + pos[2], 0, frame.shape[1])
                clip_pos[3] = np.clip(pos[1] + pos[3], 0, frame.shape[0])
                clip_pos = clip_pos.astype(np.int64)
                # get the next fi using the new bounding box
コード例 #8
0
    def start_tracking(self):
        # get the image of the first frame... (read as gray scale image...)
        init_img = cv2.imread(self.frame_lists[0])
        init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY)
        init_frame = init_frame.astype(np.float32)
        # get the init ground truth.. [x, y, width, height]
        init_gt = cv2.selectROI('demo', init_img, False, False)
        init_gt = np.array(init_gt).astype(np.int64)
        # start to draw the gaussian response...
        response_map = self._get_gauss_response(init_frame, init_gt)
        # start to create the training set ...
        # get the goal..
        g = response_map[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]]
        fi = init_frame[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]]
        G = np.fft.fft2(g)
        # start to do the pre-training...
        Ai, Bi = self._pre_training(fi, G)
        # start the tracking...
        for idx in range(len(self.frame_lists)):
            current_frame = cv2.imread(self.frame_lists[idx])
            frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
            frame_gray = frame_gray.astype(np.float32)
            if idx == 0:
                Ai = self.args.lr * Ai
                Bi = self.args.lr * Bi
                pos = init_gt.copy()
                clip_pos = np.array([pos[0], pos[1], pos[0]+pos[2], pos[1]+pos[3]]).astype(np.int64)
            else:
                Hi = Ai / Bi
                fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                Gi = Hi * np.fft.fft2(fi)
                gi = linear_mapping(np.fft.ifft2(Gi))
                # find the max pos...
                max_value = np.max(gi)
                max_pos = np.where(gi == max_value)
                dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2)
                dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2)
                
                # update the position...
                pos[0] = pos[0] + dx
                pos[1] = pos[1] + dy

                # trying to get the clipped position [xmin, ymin, xmax, ymax]
                clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1])
                clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0])
                clip_pos[2] = np.clip(pos[0]+pos[2], 0, current_frame.shape[1])
                clip_pos[3] = np.clip(pos[1]+pos[3], 0, current_frame.shape[0])
                clip_pos = clip_pos.astype(np.int64)

                # get the current fi..
                fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                # online update...
                Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai
                Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi
            
            # visualize the tracking process...
            cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0]+pos[2], pos[1]+pos[3]), (255, 0, 0), 2)
            cv2.imshow('demo', current_frame)
            cv2.waitKey(100)
            # if record... save the frames..
            if self.args.record:
                frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/'
                if not os.path.exists(frame_path):
                    os.mkdir(frame_path)
                cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame)
コード例 #9
0
ファイル: mosse.py プロジェクト: sgdy3/MOSSE
    def start_tracking(self):
        # get the image of the first frame... (read as gray scale image...)
        init_img = cv2.imread(self.frame_lists[0])
        init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY)
        init_frame = init_frame.astype(np.float32)
        # get the init ground truth.. [x, y, width, height]
        init_gt = cv2.selectROI('demo', init_img, False, False)    #选取图片中的部分,不用准星,不从随鼠标自动扩展
        init_gt = np.array(init_gt).astype(np.int64)     #获得选取的矩形框的坐标
        # start to draw the gaussian response...
        response_map = self._get_gauss_response(init_frame, init_gt)
        # start to create the training set ...
        # get the goal..
        g = response_map[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]]   #得到高斯化之后矩形框中内容
        fi = init_frame[init_gt[1]:init_gt[1]+init_gt[3], init_gt[0]:init_gt[0]+init_gt[2]]    #原图中矩形框内容
        G = np.fft.fft2(g)   #得到理想输出模板在频域中的响应
        # start to do the pre-training...
        Ai, Bi = self._pre_training(fi, G)   #第一帧的滤波器get
        # start the tracking...
        time=[]
        for idx in range(len(self.frame_lists)):
            start = cv2.getTickCount()
            current_frame = cv2.imread(self.frame_lists[idx])
            frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
            frame_gray = frame_gray.astype(np.float32)
            if idx == 0:
                Ai = self.args.lr * Ai    #权值
                Bi = self.args.lr * Bi
                pos = init_gt.copy()
                clip_pos = np.array([pos[0], pos[1], pos[0]+pos[2], pos[1]+pos[3]]).astype(np.int64)
            else:
                Hi = Ai / Bi    #得到上一帧滤波器
                fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]]   #获取上一帧锁定位置
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                Gi = Hi * np.fft.fft2(fi)   #理想输出模板获取
                gi = linear_mapping(np.fft.ifft2(Gi))  #转到空域
                # find the max pos...
                max_value = np.max(gi)  #找到相应最大的值
                max_pos = np.where(gi == max_value)  #响应最大的坐标
                dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2)
                dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2)
                
                # update the position...
                pos[0] = pos[0] + dx   #确定新的锁定框的x坐标
                pos[1] = pos[1] + dy   #确定新的锁定框的y坐标

                # trying to get the clipped position [xmin, ymin, xmax, ymax]
                clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1])
                clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0])
                clip_pos[2] = np.clip(pos[0]+pos[2], 0, current_frame.shape[1])
                clip_pos[3] = np.clip(pos[1]+pos[3], 0, current_frame.shape[0])   #控制矩形框不要超出图片范围
                clip_pos = clip_pos.astype(np.int64)

                # get the current fi..
                fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))   #根据当前选中的框大小调整滤波器的大小
                # online update...
                Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai
                Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi   #加权下一帧滤波器
            
            # visualize the tracking process...
            cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0]+pos[2], pos[1]+pos[3]), (255, 0, 0), 2)  #圈出锁定的目标
            cv2.imshow('demo', current_frame)
            cv2.waitKey(100)
            # if record... save the frames..
            if self.args.record:
                frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/'
                if not os.path.exists(frame_path):
                    os.mkdir(frame_path)
                cv2.imwrite(frame_path + str(idx).zfill(5) + '.jpg', current_frame)
            end = cv2.getTickCount()
            time.append((end - start) / cv2.getTickFrequency()*1000)
        print(time)
        print(np.mean(time))
コード例 #10
0
    def start_tracking(self):
        # get the image of the first frame... (read as gray scale image...)
        # 读取到初始的第一帧图像,然后将图像由 BGR 转变为 GRAY 图像
        init_img = cv2.imread(self.frame_lists[0])
        init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY)
        init_frame = init_frame.astype(np.float32)

        # get the init ground truth.. [x, y, width, height]
        # 这里通过手工框出想要选择的目标区域 [x, y, width, height],其中 x 和 y 表示的是目标区域左上角顶点的坐标
        init_gt = cv2.selectROI('demo', init_img, False, False)
        init_gt = np.array(init_gt).astype(np.int64)

        # start to draw the gaussian response...
        # 得到高斯响应图(输入原始图像以及目标区域的位置 [x, y, width, height])返回高斯函数矩阵,在选定的目标框的中心,其值最大
        response_map = self._get_gauss_response(init_frame, init_gt)

        # start to create the training set ...
        # get the goal...
        # 抽取高斯响应矩阵,矩阵的大小和选中的 ROI 的大小相同。
        g = response_map[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]]
        # 抽取目标区域的图像
        fi = init_frame[init_gt[1]:init_gt[1] + init_gt[3], init_gt[0]:init_gt[0] + init_gt[2]]
        # 对目标区域的高斯响应图做快速傅立叶变换
        G = np.fft.fft2(g)

        # 做滤波器的预训练
        # start to do the pre-training...
        Ai, Bi = self._pre_training(fi, G)

        # start the tracking...
        for idx in range(len(self.frame_lists)):

            current_frame = cv2.imread(self.frame_lists[idx])
            frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
            frame_gray = frame_gray.astype(np.float32)

            if idx == 0:
                Ai = self.args.lr * Ai
                Bi = self.args.lr * Bi
                pos = init_gt.copy()
                # pos 的内容是 [leftX, topY, roi width, roi height]
                clip_pos = np.array([pos[0], pos[1], pos[0] + pos[2], pos[1] + pos[3]]).astype(np.int64)
            else:
                '''
                在当前帧中,使用上一帧更新后的搜索区域 (clip_pos) 在本帧中截取相同的位置,使用过滤器与截取区域执行相关操作
                相关性最大的位置就是响应最大值的位置,然后更新过滤器 (Ai, Bi),更新搜索区域 (clip_pos)。
                '''

                # Ai 和 Bi 在上一帧中已经更新了,现在重新计算出滤波模板 Hi
                Hi = Ai / Bi
                fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))

                # 使用 Hi 和 fi 进行卷积操作,得到一个响应矩阵
                Gi = Hi * np.fft.fft2(fi)
                # 对于频域下的 Gi 进行逆傅立叶变换得到实际的 gi
                gi = linear_mapping(np.fft.ifft2(Gi))

                # 找到响应矩阵 gi 中的最大值
                max_value = np.max(gi)
                # 获取到 gi 中最大值的坐标,这个位置就是当前帧中被跟踪目标的坐标,只不过这个坐标是相对于 gi,也就是目标区域而言的
                max_pos = np.where(gi == max_value)
                # gi.shape[0] / 2 就是上一个目标的 y 坐标,也是相对于 gi 这个区域而言,相减得到的 dy 就是当前目标与上一个目标在 y 方向的偏移量
                dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2)
                # gi.shape[1] / 2 就是上一个目标的 x 坐标,也是相对于 gi 这个区域而言,相减得到的 dx 就是当前目标与上一个目标在 x 方向的偏移量
                dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2)

                # update the position...
                # pos 的内容是 [leftX, topY, roi width, roi height],也就是 roi 目标框左上角的坐标与目标框的宽 width 和高 height
                # 这里只是单纯的将 roi 目标框左上角的坐标进行移动,而对 roi 的长和宽不进行修改,因此 mosse 滤波无法处理跟踪目标的大
                # 小发生变化的情况
                pos[0] = pos[0] + dx
                pos[1] = pos[1] + dy

                # trying to get the clipped position [xmin, ymin, xmax, ymax]
                # clip_pos 表示的是在这一帧中,目标区域的新位置 [leftX, topY, rightX, bottomY]
                clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1])
                clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0])
                clip_pos[2] = np.clip(pos[0] + pos[2], 0, current_frame.shape[1])
                clip_pos[3] = np.clip(pos[1] + pos[3], 0, current_frame.shape[0])
                clip_pos = clip_pos.astype(np.int64)

                # get the current fi..
                fi = frame_gray[clip_pos[1]:clip_pos[3], clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))

                # online update...
                # 在线更新 Ai, Bi
                # 这里的 lr 就是 learning rate,学习率,加入 lr 可以使得模型更加重视最近的帧,并且使得先前的帧的效果随时间衰减
                Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Ai
                Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(np.fft.fft2(fi))) + (1 - self.args.lr) * Bi

            # visualize the tracking process...
            cv2.rectangle(current_frame, (pos[0], pos[1]), (pos[0] + pos[2], pos[1] + pos[3]), (0, 0, 255), 2)
            cv2.imshow('demo', current_frame)
            cv2.waitKey(100)

            # if record... save the frames..
            if self.args.record:
                frame_path = 'record_frames/' + self.img_path.split('/')[1] + '/'
                if not os.path.exists(frame_path):
                    os.mkdir(frame_path)
                cv2.imwrite(frame_path + str(idx).zfill(5) + '.png', current_frame)
コード例 #11
0
ファイル: mosse.py プロジェクト: codeAtharva/TRACAR
    def start_tracking(self,
                       coords,
                       frame_num,
                       end_frame_num,
                       bounding_box,
                       output_bbox,
                       Video=None,
                       show_prediction=True,
                       show_timing=True):
        # get the image of the first frame... (read as gray scale image...)
        init_img = cv2.imread(self.frame_lists[frame_num])
        init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY)
        init_frame = init_frame.astype(np.float32)
        # get the init ground truth.. [x, y, width, height]
        init_gt = coords
        init_gt = np.array(init_gt).astype(np.int64)
        # start to draw the gaussian response...
        response_map = self._get_gauss_response(init_frame, init_gt)
        # start to create the training set ...
        # get the goal..
        g = response_map[init_gt[1]:init_gt[1] + init_gt[3],
                         init_gt[0]:init_gt[0] + init_gt[2]]
        fi = init_frame[init_gt[1]:init_gt[1] + init_gt[3],
                        init_gt[0]:init_gt[0] + init_gt[2]]
        G = np.fft.fft2(g)
        # start to do the pre-training...
        Ai, Bi = self._pre_training(fi, G)
        # start the tracking...
        initial = True
        x = end_frame_num
        if end_frame_num > len(self.frame_lists):
            x = len(self.frame_lists)
        for idx in range(frame_num, x):
            start = time.time()
            current_frame = cv2.imread(self.frame_lists[idx])
            frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
            frame_gray = frame_gray.astype(np.float32)
            if initial:
                Ai = self.args.lr * Ai
                Bi = self.args.lr * Bi
                pos = init_gt.copy()
                clip_pos = np.array(
                    [pos[0], pos[1], pos[0] + pos[2],
                     pos[1] + pos[3]]).astype(np.int64)
                initial = False
            else:
                Hi = Ai / Bi
                fi = frame_gray[clip_pos[1]:clip_pos[3],
                                clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                Gi = Hi * np.fft.fft2(fi)
                gi = linear_mapping(np.fft.ifft2(Gi))
                # find the max pos...
                max_value = np.max(gi)
                max_pos = np.where(gi == max_value)
                dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2)
                dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2)

                # update the position...
                pos[0] = pos[0] + dx
                pos[1] = pos[1] + dy

                # trying to get the clipped position [xmin, ymin, xmax, ymax]
                clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1])
                clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0])
                clip_pos[2] = np.clip(pos[0] + pos[2], 0,
                                      current_frame.shape[1])
                clip_pos[3] = np.clip(pos[1] + pos[3], 0,
                                      current_frame.shape[0])
                clip_pos = clip_pos.astype(np.int64)

                # get the current fi..
                fi = frame_gray[clip_pos[1]:clip_pos[3],
                                clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                # online update...
                Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (
                    1 - self.args.lr) * Ai
                Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(
                    np.fft.fft2(fi))) + (1 - self.args.lr) * Bi

            if show_timing:
                print('[INFO] MOSSE took {} seconds'.format(time.time() -
                                                            start))
            # visualize the tracking process...
            if show_prediction:
                cv2.rectangle(current_frame, (pos[0], pos[1]),
                              (pos[0] + pos[2], pos[1] + pos[3]), (255, 0, 0),
                              6)
                plt.figure(figsize=(
                    15,
                    4))  #change figure size here. native aspect ratio is 16:9
                plt.imshow(cv2.cvtColor(current_frame, cv2.COLOR_BGR2RGB))

                plt.show()
                if Video is not None:
                    Video.write(current_frame)
            #Save bounding boxes to the array
            pos_list = [pos[0], pos[1], pos[2], pos[3]]
            bounding_box.append(pos_list)
            #print(bounding_box)
            # if record... save the frames..
            if self.args.record:
                frame_path = 'record_frames/' + self.img_path.split(
                    '/')[1] + '/'
                if not os.path.exists(frame_path):
                    os.mkdir(frame_path)
                cv2.imwrite(frame_path + str(idx).zfill(5) + '.png',
                            current_frame)

        return bounding_box
コード例 #12
0
    def start_tracking(self):
        # get the image of the first frame... (read as gray scale image...)
        init_img = cv2.imread(self.frame_lists[0])
        init_frame = cv2.cvtColor(init_img, cv2.COLOR_BGR2GRAY)
        init_frame = init_frame.astype(np.float32)
        # get the init ground truth.. [x, y, width, height]
        init_gt = cv2.selectROI('demo', init_img, False, False)
        init_gt = np.array(init_gt).astype(np.int64)
        print(init_gt)
        # start to draw the gaussian response...
        response_map = self._get_gauss_response(init_frame, init_gt)
        # start to create the training set ...
        # get the goal..
        g = response_map[init_gt[1]:init_gt[1] + init_gt[3],
                         init_gt[0]:init_gt[0] + init_gt[2]]
        fi = init_frame[init_gt[1]:init_gt[1] + init_gt[3],
                        init_gt[0]:init_gt[0] + init_gt[2]]
        #cv2.imshow('init_frame', init_frame)
        #cv2.imshow('fi', fi)
        #cv2.waitKey(100)
        G = np.fft.fft2(g)
        # start to do the pre-training...
        Ai, Bi = self._pre_training(fi, G)
        # start the tracking...
        for idx in range(len(self.frame_lists)):
            current_frame = cv2.imread(self.frame_lists[idx])
            frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
            frame_gray = frame_gray.astype(np.float32)
            if idx == 0:
                Ai = self.args.lr * Ai
                Bi = self.args.lr * Bi
                pos = init_gt.copy()
                clip_pos = np.array(
                    [pos[0], pos[1], pos[0] + pos[2],
                     pos[1] + pos[3]]).astype(np.int64)
                #print("pos and clip pos are : ")
                #print(pos)
                #print(clip_pos)
                #print("Ai and Bi are")
                #print(Ai, Bi)
            else:
                Hi = Ai / Bi
                fi = frame_gray[clip_pos[1]:clip_pos[3],
                                clip_pos[0]:clip_pos[2]]
                if (idx == 1):
                    print("frame gray and fi before preprocessing")
                    #cv2.imshow("frame_gray", frame_gray)
                    print(fi)
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                if (idx == 1):
                    print("fi after preproc:")
                    print(fi)
                Gi = Hi * np.fft.fft2(fi)
                gi = linear_mapping(np.fft.ifft2(Gi))

                # find the max pos...
                max_value = np.max(gi)
                max_pos = np.where(gi == max_value)
                dy = int(np.mean(max_pos[0]) - gi.shape[0] / 2)
                dx = int(np.mean(max_pos[1]) - gi.shape[1] / 2)

                if (idx == 1):
                    print("gi = ")
                    print(gi)
                    print(max_value)
                    print("max_pos = ")
                    print(max_pos)
                    print(np.mean(max_pos[0]))
                    print(gi.shape[0])
                    print(max_pos[1])
                    print(gi.shape[0])
                    print(dy)
                    print(dx)

                # update the position...
                pos[0] = pos[0] + dx
                pos[1] = pos[1] + dy

                # trying to get the clipped position [xmin, ymin, xmax, ymax]
                clip_pos[0] = np.clip(pos[0], 0, current_frame.shape[1])
                clip_pos[1] = np.clip(pos[1], 0, current_frame.shape[0])
                clip_pos[2] = np.clip(pos[0] + pos[2], 0,
                                      current_frame.shape[1])
                clip_pos[3] = np.clip(pos[1] + pos[3], 0,
                                      current_frame.shape[0])
                clip_pos = clip_pos.astype(np.int64)

                # get the current fi..
                fi = frame_gray[clip_pos[1]:clip_pos[3],
                                clip_pos[0]:clip_pos[2]]
                fi = pre_process(cv2.resize(fi, (init_gt[2], init_gt[3])))
                # online update...
                Ai = self.args.lr * (G * np.conjugate(np.fft.fft2(fi))) + (
                    1 - self.args.lr) * Ai
                Bi = self.args.lr * (np.fft.fft2(fi) * np.conjugate(
                    np.fft.fft2(fi))) + (1 - self.args.lr) * Bi

            # visualize the tracking process...
            cv2.rectangle(current_frame, (pos[0], pos[1]),
                          (pos[0] + pos[2], pos[1] + pos[3]), (255, 0, 0), 2)
            cv2.imshow('demo', current_frame)
            cv2.waitKey(100)
            # if record... save the frames..
            if self.args.record:
                frame_path = 'record_frames/' + self.img_path.split(
                    '/')[1] + '/'
                if not os.path.exists(frame_path):
                    os.mkdir(frame_path)
                cv2.imwrite(frame_path + str(idx).zfill(5) + '.png',
                            current_frame)