def load_models(self): self.detect_model = TinyYOLOv3_onecls(self.inp_dets, device=self.device) self.pose_model = SPPE_FastPose(self.pose_backbone, self.inp_pose[0], self.inp_pose[1], device=self.device) self.tracker = Tracker(30, n_init=3) self.action_model = TSSTG(device=self.device)
default='cpu', help='Device to run model on cpu or cuda.') args = par.parse_args() device = args.device # device= torch.args.device("cpu") # DETECTION MODEL. inp_dets = args.detection_input_size detect_model = TinyYOLOv3_onecls(inp_dets, device=device) # POSE MODEL. inp_pose = args.pose_input_size.split('x') inp_pose = (int(inp_pose[0]), int(inp_pose[1])) pose_model = SPPE_FastPose(args.pose_backbone, inp_pose[0], inp_pose[1], device=device) # Tracker. max_age = 30 tracker = Tracker(max_age=max_age, n_init=3) # Actions Estimate. action_model = TSSTG() resize_fn = ResizePadding(inp_dets, inp_dets) cam_source = if type(cam_source) is str and os.path.isfile(cam_source): # Use loader thread with Q for video file. cam = CamLoader_Q(cam_source, queue_size=1000,
class Models: def __init__(self): self.inp_dets = 416 self.inp_pose = (256, 192) self.pose_backbone = 'resnet50' self.show_detected = True self.show_skeleton = True self.device = 'cuda' self.load_models() def load_models(self): self.detect_model = TinyYOLOv3_onecls(self.inp_dets, device=self.device) self.pose_model = SPPE_FastPose(self.pose_backbone, self.inp_pose[0], self.inp_pose[1], device=self.device) self.tracker = Tracker(30, n_init=3) self.action_model = TSSTG(device=self.device) def kpt2bbox(self, kpt, ex=20): return np.array((kpt[:, 0].min() - ex, kpt[:, 1].min() - ex, kpt[:, 0].max() + ex, kpt[:, 1].max() + ex)) def process_frame(self, frame): detected = self.detect_model.detect(frame, need_resize=False, expand_bb=10) self.tracker.predict() for track in self.tracker.tracks: det = torch.tensor([track.to_tlbr().tolist() + [1.0, 1.0, 0.0]], dtype=torch.float32) detected =[detected, det], dim=0) if detected is not None else det detections = [] if detected is not None: poses = self.pose_model.predict(frame, detected[:, 0:4], detected[:, 4]) detections = [ Detection( self.kpt2bbox(ps['keypoints'].numpy()), np.concatenate( (ps['keypoints'].numpy(), ps['kp_score'].numpy()), axis=1), ps['kp_score'].mean().numpy()) for ps in poses ] if self.show_detected: for bb in detected[:, 0:5]: frame = cv2.rectangle(frame, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 1) self.tracker.update(detections) for i, track in enumerate(self.tracker.tracks): if not track.is_confirmed(): continue track_id = track.track_id bbox = track.to_tlbr().astype(int) center = track.get_center().astype(int) action = 'pending..' clr = (0, 255, 0) if len(track.keypoints_list) == 30: pts = np.array(track.keypoints_list, dtype=np.float32) out = self.action_model.predict(pts, frame.shape[:2]) action_name = self.action_model.class_names[out[0].argmax()] action = '{}: {:.2f}%'.format(action_name, out[0].max() * 100) if action_name == 'Fall Down': clr = (255, 0, 0) elif action_name == 'Lying Down': clr = (255, 200, 0) track.actions = out if track.time_since_update == 0: if self.show_skeleton: frame = draw_single(frame, track.keypoints_list[-1]) frame = cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1) frame = cv2.putText(frame, str(track_id), (center[0], center[1]), cv2.FONT_HERSHEY_DUPLEX, 0.4, (255, 0, 0), 2) frame = cv2.putText(frame, action, (bbox[0] + 5, bbox[1] + 15), cv2.FONT_HERSHEY_COMPLEX, 0.4, clr, 1) return frame
from PoseEstimateLoader import SPPE_FastPose from fn import vis_frame_fast save_path = '../../Data/Home_new-pose+score.csv' annot_file = '../../Data/Home_new.csv' # from video_folder = '../Data/falldata/Home/Videos' annot_folder = '../Data/falldata/Home/Annotation_files' # bounding box annotation for each frame. # DETECTION MODEL. detector = TinyYOLOv3_onecls() # POSE MODEL. inp_h = 320 inp_w = 256 pose_estimator = SPPE_FastPose(inp_h, inp_w) # with score. columns = [ 'video', 'frame', 'Nose_x', 'Nose_y', 'Nose_s', 'LShoulder_x', 'LShoulder_y', 'LShoulder_s', 'RShoulder_x', 'RShoulder_y', 'RShoulder_s', 'LElbow_x', 'LElbow_y', 'LElbow_s', 'RElbow_x', 'RElbow_y', 'RElbow_s', 'LWrist_x', 'LWrist_y', 'LWrist_s', 'RWrist_x', 'RWrist_y', 'RWrist_s', 'LHip_x', 'LHip_y', 'LHip_s', 'RHip_x', 'RHip_y', 'RHip_s', 'LKnee_x', 'LKnee_y', 'LKnee_s', 'RKnee_x', 'RKnee_y', 'RKnee_s', 'LAnkle_x', 'LAnkle_y', 'LAnkle_s', 'RAnkle_x', 'RAnkle_y', 'RAnkle_s', 'label' ] def normalize_points_with_size(points_xy, width, height, flip=False): points_xy[:, 0] /= width