Ejemplo n.º 1
0
    def update(self):
        final_result = []
        norm_type = self.cfg.LOSS.get('NORM_TYPE', None)
        hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE
        if self.save_video:
            # initialize the file video stream, adapt ouput video resolution to original video
            stream = cv2.VideoWriter(*[self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize']])
            if not stream.isOpened():
                print("Try to use other video encoders...")
                ext = self.video_save_opt['savepath'].split('.')[-1]
                fourcc, _ext = self.recognize_video_ext(ext)
                self.video_save_opt['fourcc'] = fourcc
                self.video_save_opt['savepath'] = self.video_save_opt['savepath'][:-4] + _ext
                stream = cv2.VideoWriter(*[self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize']])
            assert stream.isOpened(), 'Cannot open video for writing'
        # keep looping infinitelyd
        while True:
            # ensure the queue is not empty and get item
            (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue)
            if orig_img is None:
                # if the thread indicator variable is set (img is None), stop the thread
                if self.save_video:
                    stream.release()
                write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval)
                print("Results have been written to json.")
                return
            # image channel RGB->BGR
            orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1]
            if boxes is None or len(boxes) == 0:
                if self.opt.save_img or self.save_video or self.opt.vis:
                    self.write_image(orig_img, im_name, stream=stream if self.save_video else None)
            else:
                # location prediction (n, kp, 2) | score prediction (n, kp, 1)
                assert hm_data.dim() == 4

                face_hand_num = 110
                if hm_data.size()[1] == 136:
                    self.eval_joints = [*range(0,136)]
                elif hm_data.size()[1] == 26:
                    self.eval_joints = [*range(0,26)]
                elif hm_data.size()[1] == 133:
                    self.eval_joints = [*range(0,133)]
                elif hm_data.size()[1] == 68:
                    face_hand_num = 42
                    self.eval_joints = [*range(0,68)]
                elif hm_data.size()[1] == 21:
                    self.eval_joints = [*range(0,21)]
                pose_coords = []
                pose_scores = []
                for i in range(hm_data.shape[0]):
                    bbox = cropped_boxes[i].tolist()
                    if isinstance(self.heatmap_to_coord, list):
                        pose_coords_body_foot, pose_scores_body_foot = self.heatmap_to_coord[0](
                            hm_data[i][self.eval_joints[:-face_hand_num]], bbox, hm_shape=hm_size, norm_type=norm_type)
                        pose_coords_face_hand, pose_scores_face_hand = self.heatmap_to_coord[1](
                            hm_data[i][self.eval_joints[-face_hand_num:]], bbox, hm_shape=hm_size, norm_type=norm_type)
                        pose_coord = np.concatenate((pose_coords_body_foot, pose_coords_face_hand), axis=0)
                        pose_score = np.concatenate((pose_scores_body_foot, pose_scores_face_hand), axis=0)
                    else:
                        pose_coord, pose_score = self.heatmap_to_coord(hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type)
                    pose_coords.append(torch.from_numpy(pose_coord).unsqueeze(0))
                    pose_scores.append(torch.from_numpy(pose_score).unsqueeze(0))
                preds_img = torch.cat(pose_coords)
                preds_scores = torch.cat(pose_scores)
                if not self.opt.pose_track:
                    boxes, scores, ids, preds_img, preds_scores, pick_ids = \
                        pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area, use_heatmap_loss=self.use_heatmap_loss)

                _result = []
                for k in range(len(scores)):
                    _result.append(
                        {
                            'keypoints':preds_img[k],
                            'kp_score':preds_scores[k],
                            'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]),
                            'idx':ids[k],
                            'box':[boxes[k][0], boxes[k][1], boxes[k][2]-boxes[k][0],boxes[k][3]-boxes[k][1]] 
                        }
                    )

                result = {
                    'imgname': im_name,
                    'result': _result
                }


                if self.opt.pose_flow:
                    poseflow_result = self.pose_flow_wrapper.step(orig_img, result)
                    for i in range(len(poseflow_result)):
                        result['result'][i]['idx'] = poseflow_result[i]['idx']

                final_result.append(result)
                if self.opt.save_img or self.save_video or self.opt.vis:
                    if hm_data.size()[1] == 49:
                        from alphapose.utils.vis import vis_frame_dense as vis_frame
                    elif self.opt.vis_fast:
                        from alphapose.utils.vis import vis_frame_fast as vis_frame
                    else:
                        from alphapose.utils.vis import vis_frame
                    img = vis_frame(orig_img, result, self.opt, self.vis_thres)
                    self.write_image(img, im_name, stream=stream if self.save_video else None)
Ejemplo n.º 2
0
    def update(self):
        final_result = []
        norm_type = self.cfg.LOSS.get('NORM_TYPE', None)
        hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE
        if self.save_video:
            # initialize the file video stream, adapt ouput video resolution to original video
            stream = cv2.VideoWriter(*[
                self.video_save_opt[k]
                for k in ['savepath', 'fourcc', 'fps', 'frameSize']
            ])
            if not stream.isOpened():
                print("Try to use other video encoders...")
                ext = self.video_save_opt['savepath'].split('.')[-1]
                fourcc, _ext = self.recognize_video_ext(ext)
                self.video_save_opt['fourcc'] = fourcc
                self.video_save_opt[
                    'savepath'] = self.video_save_opt['savepath'][:-4] + _ext
                stream = cv2.VideoWriter(*[
                    self.video_save_opt[k]
                    for k in ['savepath', 'fourcc', 'fps', 'frameSize']
                ])
            assert stream.isOpened(), 'Cannot open video for writing'
        # ======头部姿态估计准备=========
        if self.head_pose:
            # 进行头部姿态估计
            pose_estimator = PoseEstimator(img_size=self.opt.img_size)
            # Introduce scalar stabilizers for pose.
            pose_stabilizers = [
                Stabilizer(state_num=2,
                           measure_num=1,
                           cov_process=0.1,
                           cov_measure=0.1) for _ in range(6)
            ]
            masks_list = []  # 头部关键点列表
            emoji_available_list = []  # 需要进行表情识别的目标的索引
            face_naked_rate = []  # 所有人的脸部露出率
        # keep looping infinitelyd
        while True:
            if self.opt.tracking:  # 处理重识别状态
                reid_states = self.reid_states
                reid_global_states = self.reid_global_states
                reid_global_states["frame"] = (reid_global_states["frame"] +
                                               1) % 9999
                current_time = time.time()
                reid_global_states[
                    "interval"] = current_time - reid_global_states['time']
                reid_global_states['time'] = current_time
            # ensure the queue is not empty and get item
            (boxes, scores, ids, hm_data, cropped_boxes, orig_img,
             im_name) = self.wait_and_get(self.result_queue)
            if orig_img is None:
                # if the thread indicator variable is set (img is None), stop the thread
                if self.save_video:
                    stream.release()
                write_json(final_result,
                           self.opt.outputpath,
                           form=self.opt.format,
                           for_eval=self.opt.eval)
                print("Results have been written to json.")
                return
            # ==========================进一步处理=================================
            # image channel RGB->BGR
            orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1]
            if boxes is None or len(boxes) == 0:
                if self.opt.save_img or self.save_video or self.opt.vis:
                    self.write_image(
                        orig_img,
                        im_name,
                        stream=stream if self.save_video else None)
            else:
                # location prediction (n, kp, 2) | score prediction (n, kp, 1)
                assert hm_data.dim() == 4
                # pred = hm_data.cpu().data.numpy()

                if hm_data.size()[1] == 136:
                    self.eval_joints = [*range(0, 136)]
                elif hm_data.size()[1] == 26:
                    self.eval_joints = [*range(0, 26)]
                pose_coords = []
                pose_scores = []
                for i in range(hm_data.shape[0]):
                    bbox = cropped_boxes[i].tolist()
                    pose_coord, pose_score = self.heatmap_to_coord(
                        hm_data[i][self.eval_joints],
                        bbox,
                        hm_shape=hm_size,
                        norm_type=norm_type)
                    pose_coords.append(
                        torch.from_numpy(pose_coord).unsqueeze(0))
                    pose_scores.append(
                        torch.from_numpy(pose_score).unsqueeze(0))
                preds_img = torch.cat(pose_coords)
                preds_scores = torch.cat(pose_scores)
                if not self.opt.pose_track:
                    boxes, scores, ids, preds_img, preds_scores, pick_ids = \
                        pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area)
                    if len(preds_img) != 0:
                        preds_img = torch.stack(preds_img)
                # print(boxes[0], cropped_boxes[0],hm_data[0].shape)
                # =========================目标检测对象处理===========================
                if len(preds_img) != 0:
                    if self.head_pose:
                        masks_list.clear()
                        emoji_available_list.clear()
                    for i in range(preds_img.shape[0]):
                        if self.opt.tracking:
                            self_state = self.get_reid_state(
                                ids[i], reid_states, reid_global_states)
                            self_state['index'] = i

                        # ===头部姿态估计相关======
                        if self.head_pose:
                            # 取出脸部关键点
                            face_keypoints = preds_img[i, 26:94]
                            face_keypoints_scores = preds_scores[i, 26:94]
                            # 获取标准化后的人脸关键点坐标
                            # scale_face_keypoints, _ = self.get_scaled_face_keypoints(face_keypoints)
                            # =====脸部露出判定======
                            face_naked = torch.sum(face_keypoints_scores[27:48]
                                                   > 0.01) / 21  # 这部分暂时不包括嘴部数据
                            mouth_naked = torch.sum(
                                face_keypoints_scores[48:68] > 0.1
                            ) / 20  # 这部分是嘴部的裸露程度
                            if face_naked > 0.5 or mouth_naked > 0.5:
                                # 判断是否能够识别表情
                                emoji_available_list.append(i)

                            # ====指标====脸部遮挡检测=======
                            if self_state is not None:
                                self.face_hide(self_state, reid_global_states,
                                               face_naked)
                            # ====进行头部姿态估计=====
                            self.estimate_head_pose(pose_estimator,
                                                    face_keypoints, masks_list)
                            # ==口型识别== 打哈欠和说话
                            if mouth_naked > 0.5 and False:
                                scaled_mouth_keypoints, _ = self.get_scaled_mouth_keypoints(
                                    face_keypoints)
                                mouth_distance = self.mouth_open_degree(
                                    scaled_mouth_keypoints)
                                if mouth_distance[1] > 0.3:
                                    open_mouth = "open mouth!!!!"
                                elif mouth_distance[1] > 0.2:
                                    open_mouth = "open"
                                else:
                                    open_mouth = ""
                                print(mouth_distance, open_mouth)

                    # =====开始表情识别=====
                # =========================目标检测对象处理完成=========================
                # =========================整理数据========================
                _result = []
                for k in range(len(scores)):
                    _result.append({
                        'keypoints':
                        preds_img[k],
                        'kp_score':
                        preds_scores[k],
                        'proposal_score':
                        torch.mean(preds_scores[k]) + scores[k] +
                        1.25 * max(preds_scores[k]),
                        'idx':
                        ids[k],
                        'box': [
                            boxes[k][0], boxes[k][1],
                            boxes[k][2] - boxes[k][0],
                            boxes[k][3] - boxes[k][1]
                        ]
                    })

                result = {'imgname': im_name, 'result': _result}

                if self.opt.pose_flow:
                    poseflow_result = self.pose_flow_wrapper.step(
                        orig_img, result)
                    for i in range(len(poseflow_result)):
                        result['result'][i]['idx'] = poseflow_result[i]['idx']

                final_result.append(result)
                # ==========================绘图=================================
                if self.opt.save_img or self.save_video or self.opt.vis:
                    if hm_data.size()[1] == 49:
                        from alphapose.utils.vis import vis_frame_dense as vis_frame, DEFAULT_FONT
                    elif self.opt.vis_fast:
                        from alphapose.utils.vis import vis_frame_fast as vis_frame, DEFAULT_FONT
                    else:
                        from alphapose.utils.vis import vis_frame, DEFAULT_FONT
                    # 开始绘图==============
                    img = vis_frame(orig_img, result, self.opt)
                    if self.head_pose and len(masks_list) != 0:
                        for p in masks_list:
                            pose_estimator.draw_annotation_box(img,
                                                               p[0],
                                                               p[1],
                                                               color=(128, 255,
                                                                      128))
                        if self.opt.tracking:
                            # 行人重识别状态
                            for _id in ids:
                                _state = reid_states[_id]
                                index = _state['index']
                                bbox = _result[index]['box']
                                bbox = [
                                    bbox[0], bbox[0] + bbox[2], bbox[1],
                                    bbox[1] + bbox[3]
                                ]
                                cv2.putText(
                                    img,
                                    f'no focus: {round(_state["face_hide_rate"], 2)}',
                                    (int(bbox[0]), int((bbox[2] + 52))),
                                    DEFAULT_FONT, 1, (255, 0, 0), 2)
                    # 结束绘图==============》显示图片
                    self.write_image(
                        img,
                        im_name,
                        stream=stream if self.save_video else None)
Ejemplo n.º 3
0
    def update(self):
        if self.save_video:
            # initialize the file video stream, adapt ouput video resolution to original video
            stream = cv2.VideoWriter(*[
                self.video_save_opt[k]
                for k in ['savepath', 'fourcc', 'fps', 'frameSize']
            ])
            if not stream.isOpened():
                print("Try to use other video encoders...")
                ext = self.video_save_opt['savepath'].split('.')[-1]
                fourcc, _ext = self.recognize_video_ext(ext)
                self.video_save_opt['fourcc'] = fourcc
                self.video_save_opt[
                    'savepath'] = self.video_save_opt['savepath'][:-4] + _ext
                stream = cv2.VideoWriter(*[
                    self.video_save_opt[k]
                    for k in ['savepath', 'fourcc', 'fps', 'frameSize']
                ])
            assert stream.isOpened(), 'Cannot open video for writing'
        # keep looping infinitelyd
        while True:
            # ensure the queue is not empty and get item
            (boxes, scores, ids, hm_data, cropped_boxes, orig_img,
             im_name) = self.wait_and_get(self.result_queue)
            if orig_img is None:
                # if the thread indicator variable is set (img is None), stop the thread
                self.wait_and_put(self.final_result_queue, None)
                if self.save_video:
                    stream.release()
                return
            # image channel RGB->BGR
            orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1]
            if boxes is None:
                if self.opt.save_img or self.save_video or self.opt.vis:
                    self.write_image(
                        orig_img,
                        im_name,
                        stream=stream if self.save_video else None)
            else:
                # location prediction (n, kp, 2) | score prediction (n, kp, 1)
                pred = hm_data.cpu().data.numpy()
                assert pred.ndim == 4

                pose_coords = []
                pose_scores = []
                for i in range(hm_data.shape[0]):
                    bbox = cropped_boxes[i].tolist()
                    pose_coord, pose_score = self.heatmap_to_coord(
                        pred[i][EVAL_JOINTS], bbox)
                    pose_coords.append(
                        torch.from_numpy(pose_coord).unsqueeze(0))
                    pose_scores.append(
                        torch.from_numpy(pose_score).unsqueeze(0))
                preds_img = torch.cat(pose_coords)
                preds_scores = torch.cat(pose_scores)
                result = pose_nms(boxes, scores, ids, preds_img, preds_scores,
                                  self.opt.min_box_area)
                result = {'imgname': im_name, 'result': result}
                if self.opt.pose_track:
                    poseflow_result = self.pose_flow_wrapper.step(
                        orig_img, result)
                    for i in range(len(poseflow_result)):
                        result['result'][i]['idx'] = poseflow_result[i]['idx']
                self.wait_and_put(self.final_result_queue, result)
                if self.opt.save_img or self.save_video or self.opt.vis:
                    if self.opt.vis_fast:
                        from alphapose.utils.vis import vis_frame_fast as vis_frame
                    else:
                        from alphapose.utils.vis import vis_frame
                    img = vis_frame(orig_img,
                                    result,
                                    add_bbox=(self.opt.pose_track
                                              | self.opt.tracking))
                    self.write_image(
                        img,
                        im_name,
                        stream=stream if self.save_video else None)
Ejemplo n.º 4
0
    def update(self):
        final_result = []
        norm_type = self.cfg.LOSS.get('NORM_TYPE', None)
        hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE
        if self.save_video:
            # initialize the file video stream, adapt ouput video resolution to original video
            stream = cv2.VideoWriter(*[
                self.video_save_opt[k]
                for k in ['savepath', 'fourcc', 'fps', 'frameSize']
            ])
            if not stream.isOpened():
                print("Try to use other video encoders...")
                ext = self.video_save_opt['savepath'].split('.')[-1]
                fourcc, _ext = self.recognize_video_ext(ext)
                self.video_save_opt['fourcc'] = fourcc
                self.video_save_opt[
                    'savepath'] = self.video_save_opt['savepath'][:-4] + _ext
                stream = cv2.VideoWriter(*[
                    self.video_save_opt[k]
                    for k in ['savepath', 'fourcc', 'fps', 'frameSize']
                ])
            assert stream.isOpened(), 'Cannot open video for writing'
        # keep looping infinitelyd
        while True:
            # ensure the queue is not empty and get item
            (boxes, scores, ids, hm_data, cropped_boxes, orig_img,
             im_name) = self.wait_and_get(self.result_queue)
            if orig_img is None:
                # if the thread indicator variable is set (img is None), stop the thread
                if self.save_video:
                    stream.release()
                write_json(final_result,
                           self.opt.outputpath,
                           form=self.opt.format,
                           for_eval=self.opt.eval)
                print("Results have been written to json.")
                return
            # image channel RGB->BGR
            orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1]
            if boxes is None or len(boxes) == 0:
                if self.opt.save_img or self.save_video or self.opt.vis:
                    self.write_image(
                        orig_img,
                        im_name,
                        stream=stream if self.save_video else None)
            else:
                # location prediction (n, kp, 2) | score prediction (n, kp, 1)
                assert hm_data.dim() == 4
                #pred = hm_data.cpu().data.numpy()

                if hm_data.size()[1] == 136:
                    self.eval_joints = [*range(0, 136)]
                elif hm_data.size()[1] == 26:
                    self.eval_joints = [*range(0, 26)]
                pose_coords = []
                pose_scores = []
                for i in range(hm_data.shape[0]):
                    bbox = cropped_boxes[i].tolist()
                    pose_coord, pose_score = self.heatmap_to_coord(
                        hm_data[i][self.eval_joints],
                        bbox,
                        hm_shape=hm_size,
                        norm_type=norm_type)
                    pose_coords.append(
                        torch.from_numpy(pose_coord).unsqueeze(0))
                    pose_scores.append(
                        torch.from_numpy(pose_score).unsqueeze(0))
                preds_img = torch.cat(pose_coords)
                preds_scores = torch.cat(pose_scores)
                if not self.opt.pose_track:
                    boxes, scores, ids, preds_img, preds_scores, pick_ids = \
                        pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area)

                _result = []
                for k in range(len(scores)):
                    _result.append({
                        'keypoints':
                        preds_img[k],
                        'kp_score':
                        preds_scores[k],
                        'proposal_score':
                        torch.mean(preds_scores[k]) + scores[k] +
                        1.25 * max(preds_scores[k]),
                        'idx':
                        ids[k],
                        'box': [
                            boxes[k][0], boxes[k][1],
                            boxes[k][2] - boxes[k][0],
                            boxes[k][3] - boxes[k][1]
                        ]
                    })
                    #print("lbh key point" + str(k) + str(preds_img[k]))
                    #print([int(boxes[k][0]), boxes[k][1], boxes[k][2], boxes[k][3]])
                    box_img = orig_img[int(boxes[k][1]):int(boxes[k][3]),
                                       int(boxes[k][0]):int(boxes[k][2])]
                    orig_img = orig_img.copy()

                    pose_coord_array = pose_coords[k].numpy()

                    pose_text = [
                        'Nose', 'LEye', 'REye', 'LEar', 'REar', 'LShoulder',
                        'RShoulder', 'LElbow', 'RElbow', 'LWrist', 'RWrist',
                        'LHip', 'RHip', 'LKnee', 'Rknee', 'LAnkle', 'RAnkle'
                    ]
                    for i in range(len(pose_text)):
                        #pass
                        orig_img = cv2.putText(orig_img, pose_text[i],
                                               (pose_coord_array[0][i][0],
                                                pose_coord_array[0][i][1]),
                                               cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                               (0, 255, 0), 1)

                    if self.is_fall(pose_coord_array):
                        print('falling')
                        cv2.rectangle(orig_img,
                                      (int(boxes[k][0]), int(boxes[k][1])),
                                      (int(boxes[k][2]), int(boxes[k][3])),
                                      (0, 0, 255), 2)
                        fall_img = orig_img[int(boxes[k][1]):int(boxes[k][3]),
                                            int(boxes[k][0]):int(boxes[k][2])]
                        if not os.path.exists(self.opt.outputpath + '/fall'):
                            os.mkdir(self.opt.outputpath + '/fall')
                        #cv2.imshow("fall", fall_img)

                        fall_img_name = os.path.join(self.opt.outputpath,
                                                     'fall',
                                                     str(time.time()) + '.jpg')

                        cv2.imwrite(fall_img_name, fall_img)
                    else:
                        print('not falling')
                        cv2.rectangle(orig_img,
                                      (int(boxes[k][0]), int(boxes[k][1])),
                                      (int(boxes[k][2]), int(boxes[k][3])),
                                      (0, 255, 0), 2)
                result = {'imgname': im_name, 'result': _result}

                if self.opt.pose_flow:
                    poseflow_result = self.pose_flow_wrapper.step(
                        orig_img, result)
                    for i in range(len(poseflow_result)):
                        result['result'][i]['idx'] = poseflow_result[i]['idx']

                final_result.append(result)
                if self.opt.save_img or self.save_video or self.opt.vis:
                    if hm_data.size()[1] == 49:
                        from alphapose.utils.vis import vis_frame_dense as vis_frame
                    elif self.opt.vis_fast:
                        from alphapose.utils.vis import vis_frame_fast as vis_frame
                    else:
                        from alphapose.utils.vis import vis_frame
                    img = vis_frame(orig_img, result, self.opt)
                    self.write_image(
                        img,
                        im_name,
                        stream=stream if self.save_video else None)
Ejemplo n.º 5
0
    def update(self):
        final_result = []
        norm_type = self.cfg.LOSS.get('NORM_TYPE', None)
        hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE
        if self.save_video:
            # initialize the file video stream, adapt ouput video resolution to original video
            stream = cv2.VideoWriter(*[
                self.video_save_opt[k]
                for k in ['savepath', 'fourcc', 'fps', 'frameSize']
            ])
            if not stream.isOpened():
                print("Try to use other video encoders...")
                ext = self.video_save_opt['savepath'].split('.')[-1]
                fourcc, _ext = self.recognize_video_ext(ext)
                self.video_save_opt['fourcc'] = fourcc
                self.video_save_opt[
                    'savepath'] = self.video_save_opt['savepath'][:-4] + _ext
                stream = cv2.VideoWriter(*[
                    self.video_save_opt[k]
                    for k in ['savepath', 'fourcc', 'fps', 'frameSize']
                ])
            assert stream.isOpened(), 'Cannot open video for writing'
        # keep looping infinitelyd
        while True:
            # ensure the queue is not empty and get item
            (boxes, scores, ids, hm_data, cropped_boxes, orig_img,
             im_name) = self.wait_and_get(self.result_queue)
            if orig_img is None:
                # if the thread indicator variable is set (img is None), stop the thread
                if self.save_video:
                    stream.release()
                write_json(final_result,
                           self.opt.outputpath,
                           form=self.opt.format,
                           for_eval=self.opt.eval)
                print("Results have been written to json.")
                return
            # image channel RGB->BGR
            orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1]
            if boxes is None or len(boxes) == 0:
                if self.opt.save_img or self.save_video or self.opt.vis:
                    self.write_image(
                        orig_img,
                        im_name,
                        stream=stream if self.save_video else None)
            else:
                # location prediction (n, kp, 2) | score prediction (n, kp, 1)
                assert hm_data.dim() == 4
                #pred = hm_data.cpu().data.numpy()

                if hm_data.size()[1] == 136:
                    self.eval_joints = [*range(0, 136)]
                elif hm_data.size()[1] == 26:
                    self.eval_joints = [*range(0, 26)]
                pose_coords = []
                pose_scores = []
                for i in range(hm_data.shape[0]):
                    bbox = cropped_boxes[i].tolist()
                    pose_coord, pose_score = self.heatmap_to_coord(
                        hm_data[i][self.eval_joints],
                        bbox,
                        hm_shape=hm_size,
                        norm_type=norm_type)
                    pose_coords.append(
                        torch.from_numpy(pose_coord).unsqueeze(0))
                    pose_scores.append(
                        torch.from_numpy(pose_score).unsqueeze(0))
                preds_img = torch.cat(pose_coords)
                preds_scores = torch.cat(pose_scores)
                if not self.opt.pose_track:
                    boxes, scores, ids, preds_img, preds_scores, pick_ids = \
                        pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area)

                _result = []
                for k in range(len(scores)):
                    _result.append({
                        'keypoints':
                        preds_img[k],
                        'kp_score':
                        preds_scores[k],
                        'proposal_score':
                        torch.mean(preds_scores[k]) + scores[k] +
                        1.25 * max(preds_scores[k]),
                        'idx':
                        ids[k],
                        'box': [
                            boxes[k][0], boxes[k][1],
                            boxes[k][2] - boxes[k][0],
                            boxes[k][3] - boxes[k][1]
                        ]
                    })

                result = {'imgname': im_name, 'result': _result}

                if self.opt.pose_flow:
                    poseflow_result = self.pose_flow_wrapper.step(
                        orig_img, result)
                    for i in range(len(poseflow_result)):
                        result['result'][i]['idx'] = poseflow_result[i]['idx']

                final_result.append(result)

                if len(self.opt.server) > 0:
                    import requests, json, copy
                    if self.opt.post_image:
                        result = trans_to_json([result])
                        img = self.act_model.solve_image(result, orig_img)

                        frame_encoded = cv2.imencode(".jpg", img)[1]
                        files = {'image': frame_encoded.tostring()}

                        for idx, person in enumerate(
                                result):  # result (list of dict)
                            person.pop("keypoints")
                            person.pop("image_id")
                            person.pop("category_id")
                            person.pop("score")
                            person.pop("idx")
                        requests.post(self.opt.server,
                                      files=files,
                                      data={"result": json.dumps(result)})
                    else:
                        result = trans_to_json([result])
                        self.act_model.make_act_score(result)
                        for idx, person in enumerate(result):
                            person.pop("keypoints")
                            person.pop("image_id")
                            person.pop("category_id")
                            person.pop("score")
                            person.pop("idx")
                        requests.post(self.opt.server, json=result)
                if self.opt.save_img or self.save_video or self.opt.vis:
                    result = trans_to_json([result])
                    img = self.act_model.solve_image(result, orig_img)
                    self.write_image(
                        img,
                        im_name,
                        stream=stream if self.save_video else None)
Ejemplo n.º 6
0
    def update(self):
        norm_type = self.cfg.LOSS.get('NORM_TYPE', None)
        hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE

        # get item
        (boxes, scores, ids, hm_data, cropped_boxes, orig_img,
         im_name) = self.item
        if orig_img is None:
            return None
        # image channel RGB->BGR
        orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1]
        self.orig_img = orig_img
        if boxes is None or len(boxes) == 0:
            return None
        else:
            # location prediction (n, kp, 2) | score prediction (n, kp, 1)
            assert hm_data.dim() == 4
            if hm_data.size()[1] == 136:
                self.eval_joints = [*range(0, 136)]
            elif hm_data.size()[1] == 26:
                self.eval_joints = [*range(0, 26)]
            elif hm_data.size()[1] == 133:
                self.eval_joints = [*range(0, 133)]
            pose_coords = []
            pose_scores = []

            for i in range(hm_data.shape[0]):
                bbox = cropped_boxes[i].tolist()
                if isinstance(self.heatmap_to_coord, list):
                    pose_coords_body_foot, pose_scores_body_foot = self.heatmap_to_coord[
                        0](hm_data[i][self.eval_joints[:-110]],
                           bbox,
                           hm_shape=hm_size,
                           norm_type=norm_type)
                    pose_coords_face_hand, pose_scores_face_hand = self.heatmap_to_coord[
                        1](hm_data[i][self.eval_joints[-110:]],
                           bbox,
                           hm_shape=hm_size,
                           norm_type=norm_type)
                    pose_coord = np.concatenate(
                        (pose_coords_body_foot, pose_coords_face_hand), axis=0)
                    pose_score = np.concatenate(
                        (pose_scores_body_foot, pose_scores_face_hand), axis=0)
                else:
                    pose_coord, pose_score = self.heatmap_to_coord(
                        hm_data[i][self.eval_joints],
                        bbox,
                        hm_shape=hm_size,
                        norm_type=norm_type)
                pose_coords.append(torch.from_numpy(pose_coord).unsqueeze(0))
                pose_scores.append(torch.from_numpy(pose_score).unsqueeze(0))
            preds_img = torch.cat(pose_coords)
            preds_scores = torch.cat(pose_scores)

            boxes, scores, ids, preds_img, preds_scores, pick_ids = \
                pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area, use_heatmap_loss=self.use_heatmap_loss)

            _result = []
            for k in range(len(scores)):
                _result.append({
                    'keypoints':
                    preds_img[k],
                    'kp_score':
                    preds_scores[k],
                    'proposal_score':
                    torch.mean(preds_scores[k]) + scores[k] +
                    1.25 * max(preds_scores[k]),
                    'idx':
                    ids[k],
                    'bbox': [
                        boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0],
                        boxes[k][3] - boxes[k][1]
                    ]
                })

            result = {'imgname': im_name, 'result': _result}

            if hm_data.size()[1] == 49:
                from alphapose.utils.vis import vis_frame_dense as vis_frame
            elif self.opt.vis_fast:
                from alphapose.utils.vis import vis_frame_fast as vis_frame
            else:
                from alphapose.utils.vis import vis_frame
            self.vis_frame = vis_frame

        return result
Ejemplo n.º 7
0
def validate(m, heatmap_to_coord, batch_size=20):
    det_dataset = builder.build_dataset(cfg.DATASET.TEST,
                                        preset_cfg=cfg.DATA_PRESET,
                                        train=False,
                                        opt=opt)
    eval_joints = det_dataset.EVAL_JOINTS

    det_loader = torch.utils.data.DataLoader(det_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=20,
                                             drop_last=False)
    kpt_json = []
    m.eval()

    norm_type = cfg.LOSS.get('NORM_TYPE', None)
    hm_size = cfg.DATA_PRESET.HEATMAP_SIZE
    combined_loss = (cfg.LOSS.get('TYPE') == 'Combined')

    halpe = (cfg.DATA_PRESET.NUM_JOINTS == 133) or (cfg.DATA_PRESET.NUM_JOINTS
                                                    == 136)

    for inps, crop_bboxes, bboxes, img_ids, scores, imghts, imgwds in tqdm(
            det_loader, dynamic_ncols=True):
        if isinstance(inps, list):
            inps = [inp.cuda() for inp in inps]
        else:
            inps = inps.cuda()
        output = m(inps)
        if opt.flip_test:
            if isinstance(inps, list):
                inps_flip = [flip(inp).cuda() for inp in inps]
            else:
                inps_flip = flip(inps).cuda()
            output_flip = flip_heatmap(m(inps_flip),
                                       det_dataset.joint_pairs,
                                       shift=True)
            pred_flip = output_flip[:, eval_joints, :, :]
        else:
            output_flip = None
            pred_flip = None

        pred = output
        assert pred.dim() == 4
        pred = pred[:, eval_joints, :, :]

        if output.size()[1] == 68:
            face_hand_num = 42
        else:
            face_hand_num = 110

        for i in range(output.shape[0]):
            bbox = crop_bboxes[i].tolist()
            if combined_loss:
                pose_coords_body_foot, pose_scores_body_foot = heatmap_to_coord[
                    0](pred[i][det_dataset.EVAL_JOINTS[:-face_hand_num]],
                       bbox,
                       hm_shape=hm_size,
                       norm_type=norm_type,
                       hms_flip=pred_flip[i][
                           det_dataset.EVAL_JOINTS[:-face_hand_num]]
                       if pred_flip is not None else None)
                pose_coords_face_hand, pose_scores_face_hand = heatmap_to_coord[
                    1](pred[i][det_dataset.EVAL_JOINTS[-face_hand_num:]],
                       bbox,
                       hm_shape=hm_size,
                       norm_type=norm_type,
                       hms_flip=pred_flip[i][
                           det_dataset.EVAL_JOINTS[-face_hand_num:]]
                       if pred_flip is not None else None)
                pose_coords = np.concatenate(
                    (pose_coords_body_foot, pose_coords_face_hand), axis=0)
                pose_scores = np.concatenate(
                    (pose_scores_body_foot, pose_scores_face_hand), axis=0)
            else:
                pose_coords, pose_scores = heatmap_to_coord(
                    pred[i][det_dataset.EVAL_JOINTS],
                    bbox,
                    hm_shape=hm_size,
                    norm_type=norm_type,
                    hms_flip=pred_flip[i][det_dataset.EVAL_JOINTS]
                    if pred_flip is not None else None)

            keypoints = np.concatenate((pose_coords, pose_scores), axis=1)
            keypoints = keypoints.reshape(-1).tolist()

            data = dict()
            data['bbox'] = bboxes[i, 0].tolist()
            data['image_id'] = int(img_ids[i])
            data['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            data['score'] = float(scores[i] + np.mean(pose_scores) +
                                  1.25 * np.max(pose_scores))
            # data['score'] = float(scores[i])
            data['category_id'] = 1
            data['keypoints'] = keypoints

            kpt_json.append(data)

    if opt.ppose_nms:
        from alphapose.utils.pPose_nms import ppose_nms_validate_preprocess, pose_nms, write_json
        final_result = []
        tmp_data = ppose_nms_validate_preprocess(kpt_json)
        for key in tmp_data:
            boxes, scores, ids, preds_img, preds_scores = tmp_data[key]
            boxes, scores, ids, preds_img, preds_scores, pick_ids = \
                        pose_nms(boxes, scores, ids, preds_img, preds_scores, 0, cfg.LOSS.get('TYPE') == 'MSELoss')

            _result = []
            for k in range(len(scores)):
                _result.append({
                    'keypoints':
                    preds_img[k],
                    'kp_score':
                    preds_scores[k],
                    'proposal_score':
                    torch.mean(preds_scores[k]) + scores[k] +
                    1.25 * max(preds_scores[k]),
                    'idx':
                    ids[k],
                    'box': [
                        boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0],
                        boxes[k][3] - boxes[k][1]
                    ]
                })
            im_name = str(key).zfill(12) + '.jpg'
            result = {'imgname': im_name, 'result': _result}
            final_result.append(result)

        write_json(final_result,
                   './exp/json/',
                   form='coco',
                   for_eval=True,
                   outputfile='validate_rcnn_kpt.json')
    else:
        if opt.oks_nms:
            from alphapose.utils.pPose_nms import oks_pose_nms
            kpt_json = oks_pose_nms(kpt_json)

        with open('./exp/json/validate_rcnn_kpt.json', 'w') as fid:
            json.dump(kpt_json, fid)

    sysout = sys.stdout
    res = evaluate_mAP('./exp/json/validate_rcnn_kpt.json',
                       ann_type='keypoints',
                       ann_file=os.path.join(cfg.DATASET.TEST.ROOT,
                                             cfg.DATASET.TEST.ANN),
                       halpe=halpe)
    sys.stdout = sysout
    return res
Ejemplo n.º 8
0
    def update(self):
        ####
        person_height = 165
        frame_offset = 20
        max_diff_angle = 15
        max_diff_distance = 10
        N_angle = 23
        N_distance = 20
        #
        frames = []
        ground_points = []
        head_points = []
        final_result = []
        final_angles = {'Frame': []}
        final_min_angles = {'Frame': []}
        final_max_angles = {'Frame': []}
        final_distances = {'Frame': []}
        final_min_distances = {'Frame': []}
        final_max_distances = {'Frame': []}
        #
        for i in range(1, N_angle + 1):
            final_angles['Angle_' + str(i)] = []
            final_min_angles['Angle_' + str(i)] = []
            final_max_angles['Angle_' + str(i)] = []
        for i in range(1, N_distance + 1):
            final_distances['Distance_' + str(i)] = []
            final_min_distances['Distance_' + str(i)] = []
            final_max_distances['Distance_' + str(i)] = []
        #
        frame = 0
        min_angle = 180
        max_angle = 0
        min_distance = person_height + 100
        max_distance = 0
        #####
        norm_type = self.cfg.LOSS.get('NORM_TYPE', None)
        hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE
        if self.save_video:
            # initialize the file video stream, adapt ouput video resolution to original video
            stream = cv2.VideoWriter(*[
                self.video_save_opt[k]
                for k in ['savepath', 'fourcc', 'fps', 'frameSize']
            ])
            if not stream.isOpened():
                print("Try to use other video encoders...")
                ext = self.video_save_opt['savepath'].split('.')[-1]
                fourcc, _ext = self.recognize_video_ext(ext)
                self.video_save_opt['fourcc'] = fourcc
                self.video_save_opt[
                    'savepath'] = self.video_save_opt['savepath'][:-4] + _ext
                stream = cv2.VideoWriter(*[
                    self.video_save_opt[k]
                    for k in ['savepath', 'fourcc', 'fps', 'frameSize']
                ])
            assert stream.isOpened(), 'Cannot open video for writing'
        # keep looping infinitelyd
        while True:
            # ensure the queue is not empty and get item
            (boxes, scores, ids, hm_data, cropped_boxes, orig_img,
             im_name) = self.wait_and_get(self.result_queue)
            if orig_img is None:
                # if the thread indicator variable is set (img is None), stop the thread
                if self.save_video:
                    stream.release()
                write_json(final_result,
                           self.opt.outputpath,
                           form=self.opt.format,
                           for_eval=self.opt.eval)
                print("Results have been written to json.")
                return
            # image channel RGB->BGR
            orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1]
            if boxes is None or len(boxes) == 0:
                if self.opt.save_img or self.save_video or self.opt.vis:
                    self.write_image(
                        orig_img,
                        im_name,
                        stream=stream if self.save_video else None)
            else:
                # location prediction (n, kp, 2) | score prediction (n, kp, 1)
                assert hm_data.dim() == 4
                #pred = hm_data.cpu().data.numpy()

                if hm_data.size()[1] == 136:
                    self.eval_joints = [*range(0, 136)]
                elif hm_data.size()[1] == 26:
                    self.eval_joints = [*range(0, 26)]
                pose_coords = []
                pose_scores = []
                for i in range(hm_data.shape[0]):
                    bbox = cropped_boxes[i].tolist()
                    pose_coord, pose_score = self.heatmap_to_coord(
                        hm_data[i][self.eval_joints],
                        bbox,
                        hm_shape=hm_size,
                        norm_type=norm_type)
                    pose_coords.append(
                        torch.from_numpy(pose_coord).unsqueeze(0))
                    pose_scores.append(
                        torch.from_numpy(pose_score).unsqueeze(0))
                preds_img = torch.cat(pose_coords)
                preds_scores = torch.cat(pose_scores)
                if not self.opt.pose_track:
                    boxes, scores, ids, preds_img, preds_scores, pick_ids = \
                        pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area)

                _result = []
                for k in range(len(scores)):
                    _result.append({
                        'keypoints':
                        preds_img[k],
                        'kp_score':
                        preds_scores[k],
                        'proposal_score':
                        torch.mean(preds_scores[k]) + scores[k] +
                        1.25 * max(preds_scores[k]),
                        'idx':
                        ids[k],
                        'box': [
                            boxes[k][0], boxes[k][1],
                            boxes[k][2] - boxes[k][0],
                            boxes[k][3] - boxes[k][1]
                        ]
                    })

                result = {'imgname': im_name, 'result': _result}

                if self.opt.pose_flow:
                    poseflow_result = self.pose_flow_wrapper.step(
                        orig_img, result)
                    for i in range(len(poseflow_result)):
                        result['result'][i]['idx'] = poseflow_result[i]['idx']

                final_result.append(result)
                if self.opt.save_img or self.save_video or self.opt.vis:
                    if hm_data.size()[1] == 49:
                        from alphapose.utils.vis import vis_frame_dense as vis_frame
                    elif self.opt.vis_fast:
                        from alphapose.utils.vis import vis_frame_fast as vis_frame
                    else:
                        from alphapose.utils.vis import vis_frame
                    img = vis_frame(orig_img, result, self.opt)
                    #####
                    frame += 1
                    if frame <= frame_offset:
                        ground_point, head_point = self.calc_bound_points(
                            result, vis_thres=0.4)
                        if ground_point is not None:
                            ground_points.append(ground_point)
                            x_point = [x for x, _ in ground_points]
                            y_point = [y for _, y in ground_points]
                            ground_point = (int(np.average(x_point)),
                                            int(np.average(y_point)))
                        if head_point is not None:
                            head_points.append(head_point)
                            x_point = [x for x, _ in head_points]
                            y_point = [y for _, y in head_points]
                            head_point = (int(np.average(x_point)),
                                          int(np.average(y_point)))
                        if ground_point is not None and head_point is not None:
                            dist_height = np.linalg.norm(
                                np.array(head_point) - np.array(ground_point))
                            height_ratio = person_height / (dist_height + 1e-6)
                        else:
                            height_ratio = 0

                    distances = self.calc_distances(result,
                                                    ground_point,
                                                    head_point,
                                                    height_ratio,
                                                    vis_thres=0.4)
                    angles = self.calc_angles(result, vis_thres=0.4)
                    frames.append(frame)
                    final_angles['Frame'].append(frame)
                    final_min_angles['Frame'].append(frame)
                    final_max_angles['Frame'].append(frame)
                    final_distances['Frame'].append(frame)
                    final_min_distances['Frame'].append(frame)
                    final_max_distances['Frame'].append(frame)
                    ##
                    for angle_name, angle in angles.items():
                        angle = int(angle)
                        if angle < 0 and frame > frame_offset:
                            angle = final_angles[angle_name][frame - 2]
                        ##

                        final_angles[angle_name].append(angle)
                        ##
                        if frame <= frame_offset:
                            if angle >= 0 and angle < min_angle:
                                final_min_angles[angle_name].append(angle)
                            else:
                                final_min_angles[angle_name].append(min_angle)
                            if angle >= 0 and angle > max_angle:
                                final_max_angles[angle_name].append(angle)
                            else:
                                final_max_angles[angle_name].append(max_angle)
                        else:
                            previous_min_angle = final_min_angles[angle_name][
                                frame - 2]
                            previous_max_angle = final_max_angles[angle_name][
                                frame - 2]
                            diff_angle = abs(
                                final_angles[angle_name][frame - 1] -
                                final_angles[angle_name][frame - 2])
                            if angle >= 0 and angle < previous_min_angle and diff_angle < max_diff_angle:
                                final_min_angles[angle_name].append(angle)
                            else:
                                final_min_angles[angle_name].append(
                                    previous_min_angle)
                            if angle >= 0 and angle > previous_max_angle and diff_angle < max_diff_angle:
                                final_max_angles[angle_name].append(angle)
                            else:
                                final_max_angles[angle_name].append(
                                    previous_max_angle)
                        ##
                        plt.figure()
                        plt.plot(frames[frame_offset + 1:],
                                 final_angles[angle_name][frame_offset + 1:])
                        plt.plot(frames[frame_offset + 1:],
                                 final_min_angles[angle_name][frame_offset +
                                                              1:],
                                 linestyle='--',
                                 dashes=(5, 3))
                        plt.plot(frames[frame_offset + 1:],
                                 final_max_angles[angle_name][frame_offset +
                                                              1:],
                                 linestyle='--',
                                 dashes=(5, 3))
                        plt.xlabel('Frames')
                        plt.ylabel('Angle (degree)')
                        plt.title(angle_name)
                        plt.grid(True)
                        plt.savefig(
                            os.path.join(self.opt.outputpath_plot,
                                         angle_name + ".jpg"))
                        plt.close()
                    ##
                    for distance_name, distance in distances.items():
                        distance = round(distance, 2)
                        if distance < 0 and frame > frame_offset:
                            distance = final_distances[distance_name][frame -
                                                                      2]
                        ##
                        final_distances[distance_name].append(distance)
                        ##
                        if frame <= frame_offset:
                            if distance >= 0 and distance < min_distance:
                                final_min_distances[distance_name].append(
                                    distance)
                            else:
                                final_min_distances[distance_name].append(
                                    min_distance)
                            if distance >= 0 and distance > max_distance:
                                final_max_distances[distance_name].append(
                                    distance)
                            else:
                                final_max_distances[distance_name].append(
                                    max_distance)
                        else:
                            previous_min_distance = final_min_distances[
                                distance_name][frame - 2]
                            previous_max_distance = final_max_distances[
                                distance_name][frame - 2]
                            diff_distance = abs(
                                final_distances[distance_name][frame - 1] -
                                final_distances[distance_name][frame - 2])
                            if distance_name is 'Distance_10' or distance_name is 'Distance_11':
                                diff_distance *= 100
                            if distance >= 0 and distance < previous_min_distance and diff_distance < max_diff_distance:
                                final_min_distances[distance_name].append(
                                    distance)
                            else:
                                final_min_distances[distance_name].append(
                                    previous_min_distance)
                            if distance >= 0 and distance > previous_max_distance and diff_distance < max_diff_distance:
                                final_max_distances[distance_name].append(
                                    distance)
                            else:
                                final_max_distances[distance_name].append(
                                    previous_max_distance)
                        ##
                        plt.figure()
                        plt.plot(
                            frames[frame_offset + 1:],
                            final_distances[distance_name][frame_offset + 1:])
                        plt.plot(
                            frames[frame_offset + 1:],
                            final_min_distances[distance_name][frame_offset +
                                                               1:],
                            linestyle='--',
                            dashes=(5, 3))
                        plt.plot(
                            frames[frame_offset + 1:],
                            final_max_distances[distance_name][frame_offset +
                                                               1:],
                            linestyle='--',
                            dashes=(5, 3))
                        plt.xlabel('Frames')
                        plt.ylabel('Distance (cm)')
                        plt.title(distance_name)
                        plt.grid(True)
                        plt.savefig(
                            os.path.join(self.opt.outputpath_plot,
                                         distance_name + ".jpg"))
                        plt.close()
                    ##
                    df_angle = pd.DataFrame.from_dict(final_angles)
                    df_min_angle = pd.DataFrame.from_dict(final_min_angles)
                    df_max_angle = pd.DataFrame.from_dict(final_max_angles)
                    with pd.ExcelWriter(
                            os.path.join(self.opt.outputpath_plot,
                                         "Angles.xlsx")) as writer:
                        df_angle.to_excel(writer,
                                          sheet_name='Angles',
                                          index=False)
                        df_min_angle.to_excel(writer,
                                              sheet_name='Min_Angles',
                                              index=False)
                        df_max_angle.to_excel(writer,
                                              sheet_name='Max_Angles',
                                              index=False)
                    ##
                    df_distance = pd.DataFrame.from_dict(final_distances)
                    df_min_distance = pd.DataFrame.from_dict(
                        final_min_distances)
                    df_max_distance = pd.DataFrame.from_dict(
                        final_max_distances)
                    with pd.ExcelWriter(
                            os.path.join(self.opt.outputpath_plot,
                                         "Distances.xlsx")) as writer:
                        df_distance.to_excel(writer,
                                             sheet_name='Distances',
                                             index=False)
                        df_min_distance.to_excel(writer,
                                                 sheet_name='Min_Distances',
                                                 index=False)
                        df_max_distance.to_excel(writer,
                                                 sheet_name='Max_Distances',
                                                 index=False)
                    #########
                    self.write_image(
                        img,
                        im_name,
                        stream=stream if self.save_video else None,
                        frame=frame)