def run_demo(net, image_provider, height_size, cpu, track, smooth): net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 0 for img in image_provider: orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses print("draw", img.dtype, img.shape, img.min(), img.max()) for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) print(img.min(), img.max()) for pose in current_poses: cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 33: delay = 0 else: delay = 33
def _dect_pose(self, **kwargs): """Detect poses. Arguments: img {ndarray}: input image. model {PoseEstimationWithMobileNet}: initialized OpenPose model. previous_poses {list}: previous poses for tracking mode. Returns: list: detected poses. """ img = kwargs.get('img', None) model = kwargs.get('model', None) previous_poses = kwargs.get('previous_poses', None) use_cuda = kwargs.get('use_cuda', False) track = self.__params.track smooth = self.__params.smooth stride = self.__params.stride upsample_ratio = self.__params.upsample_ratio num_keypoints = Pose.num_kpts #orig_img = img.copy() heatmaps, pafs, scale, pad = self._infer_fast(model=model, img=img, use_cuda=use_cuda) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses return current_poses
def run_on_image(net, height_size, cpu, track, smooth, img, stride, upsample_ratio, num_keypoints, threshold): global previous_poses orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) score = 0 total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) for pose in current_poses: # cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), r_hand_center, r_hand_width, l_hand_center, l_hand_width, = detect_hand( pose) if -1 not in r_hand_center: cv2.circle(img, (r_hand_center[0], r_hand_center[1]), 5, (255, 0, 0), 5) cv2.rectangle(img, (r_hand_center[0] - r_hand_width, r_hand_center[1] - r_hand_width), (r_hand_center[0] + r_hand_width, r_hand_center[1] + r_hand_width), (0, 255, 255)) if -1 not in l_hand_center: cv2.circle(img, (l_hand_center[0], l_hand_center[1]), 5, (255, 0, 0), 5) cv2.rectangle(img, (l_hand_center[0] - l_hand_width, l_hand_center[1] - l_hand_width), (l_hand_center[0] + l_hand_width, l_hand_center[1] + l_hand_width), (0, 255, 255)) face_center, face_width = detect_face(pose) if -1 not in face_center: cv2.rectangle( img, (face_center[0] - face_width, face_center[1] - face_width), (face_center[0] + face_width, face_center[1] + face_width), (0, 0, 255)) # (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (face_center[0] - face_width, face_center[1] - face_width - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) if -1 not in r_hand_center: x, y, h, w, score = detect_touch(face_center, face_width, r_hand_center, r_hand_width) if h != 0: cv2.rectangle(img, (x, y), (x + h, y + w), (255, 0, 255)) cv2.putText(img, f'Score: {score:0.2f}', (x, y - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0)) if -1 not in l_hand_center: x, y, h, w, score = detect_touch(face_center, face_width, l_hand_center, l_hand_width) if h != 0: cv2.rectangle(img, (x, y), (x + h, y + w), (255, 0, 255)) cv2.putText(img, f'Score: {score:0.2f}', (x, y - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 0)) cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) delay = 1 detect = False key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 33: delay = 0 else: delay = 33 return score > threshold
def run_inference(net, image_provider, height_size, cpu, track, smooth, no_display, json_view=False): net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 100 if isinstance(image_provider, ImageReader): delay = 0 for img in image_provider: heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n, pose_entry in enumerate(pose_entries): if len(pose_entry) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entry[kpt_id] != -1.0: pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entry[kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entry[kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entry[18]) current_poses.append(pose) if json_view == True: return current_poses if not no_display: if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) for pose in current_poses: cv2.rectangle( img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (32, 202, 252)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) cv2.imshow('PoseCamera', img) key = cv2.waitKey(delay) if key == 27: return
def run_demo(net, height_size, track, smooth, record_vid, camera_type): net = net.eval() net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] ##Tarit defined slope_threshold = 0.4 ear_slope_threshold = 0.5 eye_ear_slope_threshold = 0.5 not_detected = (-1, -1) sleep_confirmation_time = 60 #in seconds #flags to detect whether the person is sleeping or not sleeping = False timer_started = False time_notified = 0 selected_pose = None while True: #msg, frame = receiver.receive(timeout = 60.0) #img = cv2.imdecode(np.frombuffer(frame, dtype='uint8'), -1) img = cap.read() if camera_type == "jetson": img = img[1300:1780, 1320:1960] #start_time = time.time() orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses '''for pose in current_poses: pose.draw(img)''' ##find longest_nect_to_nose_dst and select that pose longest_nect_to_nose_dst = 0 for pose in current_poses: nose = tuple(pose.keypoints[0]) neck = tuple(pose.keypoints[1]) ##pythagoras nect_to_nose_dst = pow((pow(abs(nose[0] - neck[0]), 2)) + (pow(abs(nose[1] - neck[1]), 2)), 1 / 2) if nect_to_nose_dst > longest_nect_to_nose_dst: longest_nect_to_nose_dst = nect_to_nose_dst selected_pose = pose if selected_pose is not None: selected_pose.draw(img) nose = tuple(selected_pose.keypoints[0]) neck = tuple(selected_pose.keypoints[1]) l_ear = tuple(selected_pose.keypoints[16]) r_ear = tuple(selected_pose.keypoints[17]) l_eye = tuple(selected_pose.keypoints[15]) r_eye = tuple(selected_pose.keypoints[14]) #print(cal_slope(l_eye,l_ear),cal_slope(r_eye,r_ear)) ##detect if the person back if facing to the camera if nose == (-1, -1): if l_ear != not_detected and r_ear != not_detected: ear_slope = abs(l_ear[1] - r_ear[1]) / abs(l_ear[0] - r_ear[0]) cv2.circle(img, l_ear, 5, (255, 0, 0), 3) cv2.circle(img, r_ear, 5, (0, 255, 0), 3) if ear_slope > ear_slope_threshold: sleeping = True print("sleeping") else: sleeping = False else: ##out of condition, can't detect sleeping = False else: cv2.circle(img, nose, 5, (255, 0, 0), 3) cv2.circle(img, neck, 5, (0, 255, 0), 3) slope_inverse = (nose[0] - neck[0]) / (nose[1] - neck[1]) l_ear_eye_slope = cal_slope(l_eye, l_ear) r_ear_eye_slope = cal_slope(r_eye, r_ear) #increase the slope_threshold if the person is turning their head #print(pose.keypoints[16],pose.keypoints[17]) #print ear location if l_ear == (-1, -1) or r_ear == (-1, -1): slope_threshold = 1 print("one ear missing , Increasing slope_threshold") else: slope_threshold = 0.4 if abs(slope_inverse) > slope_threshold: #cv2.putText(img,"".join([str(pose.id),"sleeping"]),(20,50),cv2.FONT_HERSHEY_COMPLEX,2,(255,0,0),3) print("Sleeping (neck bend more than threshold)") #cv2.putText(img,"sleeping",(20,50),cv2.FONT_HERSHEY_COMPLEX,2,(255,0,0),3) sleeping = True elif l_eye == not_detected or r_eye == not_detected: sleeping = True print("Sleeping (not seeing both eyes)") elif l_ear_eye_slope < -0.6 or r_ear_eye_slope > 0.6 or l_ear_eye_slope > eye_ear_slope_threshold or r_ear_eye_slope < -eye_ear_slope_threshold: sleeping = True print("Sleeping (ears higher/lower than eyes)") else: print("Not sleeping") sleeping = False if sleeping: if not timer_started: t_start_sleep = time.time() timer_started = True else: if time.time() - t_start_sleep > sleep_confirmation_time: print("sending line message") pic_name = "".join( ["log_data/", str(time_notified), ".jpg"]) cv2.imwrite(pic_name, img) #lineNotify("Elderly sleeping %d"%time_notified) notifyFile("Elderly sleeping %d" % time_notified, pic_name) time_notified += 1 timer_started = False sleeping = False else: timer_started = False #song = AudioSegment.from_mp3("Alarm_Clock_Sound.mp3") #play(song) img = cv2.addWeighted(orig_img, 0.6, img, 0.6, 0) for pose in current_poses: cv2.rectangle( img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) cv2.imshow('Sleep detector', img) if record_vid: out_raw.write(orig_img) out_pose.write(img) #print((1/(time.time()-start_time))) if cv2.waitKey(1) == 27: # esc #receiver.close() cap.stop() if record_vid: out_raw.release() out_pose.release() return
def run_demo(model, image_provider, height_size, cpu, track, smooth, file): """[summary] Args: model ([type]): [description] image_provider ([type]): [description] height_size ([type]): [description] cpu ([type]): [description] track ([type]): [description] smooth ([type]): [description] file ([type]): [description] Returns: [type]: [description] """ model = model.eval() if not cpu: model = model.cuda() point_list = [] stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] # 保存视频 fps = image_provider.fps width = image_provider.width height = image_provider.height fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V') # video_saver = cv2.VideoWriter('TESTV.mp4', fourcc, fps, (height, width)) save_video_path = os.path.join(os.getcwd(), 'video_output') if not os.path.exists(save_video_path): os.mkdir(save_video_path) save_video_name = os.path.join(save_video_path, file + '.mp4') video_saver = cv2.VideoWriter(save_video_name, fourcc, fps, (width, height)) for img in image_provider: orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(model, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for pose_entry in pose_entries: if len(pose_entry) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entry[kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entry[kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entry[kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entry[18]) current_poses.append(pose) # save keypoint in list key_point_list = pose_keypoints.flatten().tolist() point_list.append(key_point_list) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) for pose in current_poses: cv2.rectangle( img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) video_saver.write(img) return point_list
def run_demo(net, image_provider, output_dir, height_size, cpu, track, smooth, generate_video): net = net.eval() if not cpu: net = net.cuda() if generate_video: video_output_path = os.path.join(output_dir, output_dir.split("/")[-1] +"-annotations" + ".mp4") #(1080, 1920, 3) video_out = cv2.VideoWriter(video_output_path, cv2.VideoWriter_fourcc('m','p','4','v'), 10, (1920,1080)) stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 33 for idx, img in enumerate(image_provider): orig_img = img.copy() frame_file = os.path.join(output_dir, output_dir.split("/")[-1]+f"_{idx:06}.txt") with open(frame_file, "w") as frame_f: print("Input the model is", orig_img.shape) #print(output_path) heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses annotations_hand = [] annotations_head = [] for pose in current_poses: pose.draw(img) print(pose.hands_bbox, pose.head_bbox) for hand_bbox in pose.hands_bbox: x_center = (hand_bbox[0][0] + hand_bbox[1][0]) / (2*1920) y_center = (hand_bbox[0][1] + hand_bbox[1][1]) / (2*1080) x_offset = (hand_bbox[1][0] - hand_bbox[0][0]) / 1920 y_offset = (hand_bbox[1][1] - hand_bbox[0][1]) / 1080 if x_center < 0.0: x_offset = x_offset - x_center x_center = 0 hand_bbox_scaled = (x_center, y_center, x_offset, y_offset) print(hand_bbox_scaled) frame_f.write("8 " + ' '.join(map(str, hand_bbox_scaled)) + "\n") annotations_hand.append(hand_bbox_scaled) x_center = (pose.head_bbox[0][0] + pose.head_bbox[1][0]) / (2*1920) y_center = (pose.head_bbox[0][1] + pose.head_bbox[1][1]) / (2*1080) x_offset = (pose.head_bbox[1][0] - pose.head_bbox[0][0]) / 1920 y_offset = (pose.head_bbox[1][1] - pose.head_bbox[0][1]) / 1080 if x_center < 0.0: x_offset = x_offset - x_center x_center = 0 head_bbox_scaled = (x_center, y_center, x_offset, y_offset) print(head_bbox_scaled) frame_f.write("9 " + ' '.join(map(str, head_bbox_scaled)) + "\n") annotations_head.append(head_bbox_scaled) img = cv2.addWeighted(orig_img, 0.3, img, 0.7, 0) for pose in current_poses: cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) img_filename = os.path.join(output_dir, output_dir.split("/")[-1] + f"_{idx:06}.jpg") print(img_filename) cv2.imwrite(img_filename, orig_img) if generate_video: video_out.write(img) key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 33: delay = 0 else: delay = 33 if video_out: video_out.release()
def run(net, image_provider, height_size, cpu, track, smooth): ts1 = time.time() net = setup(net, cpu) stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 1 ts2 = time.time() print("setup took ", ts2 - ts1, " s") print(ts2) print(ts1) for img in image_provider: ts1 = time.time() orig_img = img.copy() print('Original Dimensions : ', orig_img.shape) ts_netw1 = time.time() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) ts_netw2 = time.time() print("network time", (ts_netw2 - ts_netw1) * 1000, "ms") total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) for pose in current_poses: cv2.rectangle( img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) ts2 = time.time() cv2.putText(img, '%.2fms' % ((ts2 - ts1) * 1000.0), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 1: delay = 0 else: delay = 1
def run_lightweight(net, img, height_size=256, cpu=False, track=False, smooth=True): ts1 = time.time() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 1 orig_img = img.copy() ts_netw1 = time.time() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) ts_netw2 = time.time() # print("network time", (ts_netw2 - ts_netw1) * 1000, "ms") total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: # pose.draw(img) pose.draw_only_relevant(img) #img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) for pose in current_poses: #relevant only for demonstration # cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), # (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) ts2 = time.time() cv2.putText(img, '%.2fms' % ((ts2 - ts1) * 1000.0), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) return img
def run_demo(net, image_provider, height_size, cpu, track, smooth): net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 1 mean_time = 0 video_time = 0 prep_displacement = 0 prep_time = 0 for img in image_provider: tik1 = cv2.getTickCount() orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) if (pose.bbox[0] + pose.bbox[2] / 2 > 355 and (pose.bbox[0] + pose.bbox[2] / 2 < 555)): current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) cv2.rectangle(img, (330, 330), (430, 230), (0, 0, 255), 2) prev_pose = [] for pose in current_poses: if track: cv2.rectangle( img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) if (pose.keypoints[4][0] > 330 and pose.keypoints[4][0] < 430) and (pose.keypoints[4][1] > 230 and pose.keypoints[4][1] < 330) and \ (pose.keypoints[7][0] > 330 and pose.keypoints[7][0] < 430) and (pose.keypoints[7][1] > 230 and pose.keypoints[7][1] < 330): cv2.rectangle( img, (pose.keypoints[4][0] - 10, pose.keypoints[4][1] - 10), (pose.keypoints[4][0] + 10, pose.keypoints[4][1] + 10), (0, 255, 0), 2) cv2.rectangle( img, (pose.keypoints[7][0] - 10, pose.keypoints[7][1] - 10), (pose.keypoints[7][0] + 10, pose.keypoints[7][1] + 10), (0, 255, 0), 2) if bool(previous_poses): prev_pose = previous_poses[0] prep_displacement += ( (pose.keypoints[4][0] - prev_pose.keypoints[4][0])**2 + (pose.keypoints[4][1] - prev_pose.keypoints[4][1])** 2)**0.5 prep_displacement += ( (pose.keypoints[7][0] - prev_pose.keypoints[7][0])**2 + (pose.keypoints[7][1] - prev_pose.keypoints[7][1])** 2)**0.5 prep_time += (cv2.getTickCount() - tik1) / cv2.getTickFrequency() previous_poses = current_poses current_time = (cv2.getTickCount() - tik1) / cv2.getTickFrequency() video_time += current_time cv2.putText(img, "displacement: %d" % int(prep_displacement), (20, 20), cv2.FONT_HERSHEY_COMPLEX, 0.6, (0, 0, 255), 1) cv2.putText(img, "prep time (s): %.1f" % (prep_time / 2), (20, 40), cv2.FONT_HERSHEY_COMPLEX, 0.6, (0, 0, 255), 1) cv2.putText(img, "video time (s): %.1f" % (video_time / 2), (20, 60), cv2.FONT_HERSHEY_COMPLEX, 0.6, (0, 0, 255), 1) if mean_time == 0: mean_time = current_time else: mean_time = mean_time * 0.95 + current_time * 0.05 #cv2.putText(img, 'FPS: {}'.format(int(1 / mean_time * 10) / 10), # (40, 80), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255)) cv2.imshow('Chicken Prep Test Demo', img) key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 1: delay = 0 else: delay = 1
def run_demo(net, image_provider, height_size, cpu, track, smooth): net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 33 dict_id_color = {} for img in image_provider: orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses dict_id_color_r = {} for id_ in dict_id_color.keys(): flag_track = False for pose in current_poses: if id_ == pose.id: flag_track = True break if flag_track: dict_id_color_r[pose.id] = dict_id_color[pose.id] dict_id_color = dict_id_color_r track_thr = 0 for pose in current_poses: # print('pose.id : ',pose.id) if pose.id not in dict_id_color.keys(): R_ = random.randint(30,255) G_ = random.randint(30,255) B_ = random.randint(30,255) dict_id_color[pose.id] = [[B_,G_,R_],1] else: dict_id_color[pose.id][1] += 1 if dict_id_color[pose.id][1]>track_thr: pose.draw(img,color_x = dict_id_color[pose.id][0]) img = cv2.addWeighted(orig_img, 0.3, img, 0.7, 0) for pose in current_poses: if dict_id_color[pose.id][1]>track_thr: cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 255, 0),4) cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) cv2.namedWindow('Lightweight Human Pose Estimation Python Demo', 0) cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 33: delay = 0 else: delay = 33
def run_demo(net, image_provider, height_size, cpu, track, smooth): show_time = True net = net.eval() if not cpu: net = net.cuda() track = True print("track:" + str(track)) stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 1 for img, img_dir in image_provider: orig_img = img.copy() start_time = datetime.datetime.now() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) end_time = datetime.datetime.now() if show_time: print("pose time:") print((end_time - start_time).microseconds / 1000) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) for pose in current_poses: cv2.rectangle( img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) #cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) print(img_dir) img_save_dir = img_dir.replace("15_1", "15_1_lightpose") #cv2.imwrite("/data2/qilei_chen/DATA/ShanghaiAutograding/gangganpingheng_images_240/15_1/right_light_pose/"+os.path.basename(img_dir),img) cv2.imwrite(img_save_dir, img) '''
def run_demo(net, image_provider, height_size, cpu, track, smooth, com): net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts # +1 for Hidden COM previous_poses = [] # original delay 33 # 0 = pause / wait for input indefinetly delay = 0 for img in image_provider: orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones( (num_keypoints + 1, 2), dtype=np.int32) * -1 # +1 here for COM found_kpts = [] C_pts = [] BOS = [[-1, -1], [-1, -1]] for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) found_kpts.append(kpt_id) # print(kpt_id, pose_keypoints[kpt_id], Pose.kpt_names[kpt_id]) # ====== HOLY GRAIL ========= if com: COM, C_pts, BOS = compute_com(found_kpts, pose_keypoints) pose_keypoints[-1] = COM pose = Pose(pose_keypoints, pose_entries[n][18], C_pts, BOS) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) for pose in current_poses: cv2.rectangle( img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 33: delay = 0 else: delay = 33
def run_demo(net, height_size, cpu, track, smooth, image_provider=None, use_realsense_cam=False, openvino=False): if not image_provider and not use_realsense_cam: raise ValueError('Either `image_provider` or `use_realsense_cam` must be provided') if use_realsense_cam: pipeline = init_realsense() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 33 while True: # get frame from source provided if use_realsense_cam: frames = pipeline.wait_for_frames() color_frame = frames.get_color_frame() img = np.asanyarray(color_frame.get_data()) else: img = next(image_provider) orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu, openvino=openvino) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) for pose in current_poses: cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 33: delay = 0 else: delay = 33
def run_demo(net, image_provider, height_size, cpu, track, smooth): net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 33 for img in image_provider: orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) # import ipdb; ipdb.set_trace() # print(current_poses) current_poses[-1].keypoints if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) cv2.imshow("Lightweight Human Pose Estimation Python Demo", img) key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 33: delay = 0 else: delay = 33
def anime_frame(rgb, env, size=None, useSigmod=False, useTwice=False): if env is None: # return init_pose("./checkpoint_iter_370000.pth") # return init_pose("./default_checkpoints/R.pth") return init_pose("./refine4_checkpoints/checkpoint_iter_14000.pth") net, previous_poses = env stride = 8 upsample_ratio = 4 heatmaps, pafs, scale, pad = infer_fast(net, rgb, 368, stride, upsample_ratio) num_keypoints = Pose.num_kpts total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[ 1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[ 0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) # if track: track_poses(previous_poses, current_poses, smooth=True) previous_poses = current_poses env[1] = previous_poses # return rgb, env # print(rgb.min(), rgb.max()) # img = rgb.squeeze(0).permute(1, 2, 0).cpu().numpy()[:, :, ::-1]# * 255 # img = img.squeeze(0).permute(1, 2, 0).cpu().numpy()[:, :, ::-1]# * 255 # img += 0.5 # img *= 255 # img = img.astype(np.uint8) img = np.zeros((rgb.shape[2], rgb.shape[3], rgb.shape[1]), dtype=np.uint8) show_info = True for pose in current_poses: pose.draw(img, show_info) show_info = False # break # for pose in current_poses: # cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), # (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) # if track: # cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), # cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) img = img[:, :, ::-1] #/ 255 # print(img.shape, img.dtype, img.min(), img.max()) img = torch.FloatTensor(img.astype(np.float32)) img /= 255 img = img.permute(2, 0, 1).unsqueeze(0).cuda() output = rgb * 0.6 + img * 0.4 # output = img return output, env
def get_skel_coords(net, image_provider, height_size=256, cpu=False, track=1, smooth=1): # text_file = open("skel/"+ args.save_txt, "w") net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 33 img = image_provider # for img in image_provider: #print(img.shape) orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) # for pose in current_poses: if len(current_poses) != 0: # n = text_file.write(coords) cv2.rectangle( img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) if track: cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) # break #cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) if len(current_poses) != 0: #print ("not zero", img) return current_poses[0].return_coords(), img else: #print ("zero") return [], np.array((1, 1, 1), np.uint8) key = cv2.waitKey(delay) if key == 27: # esc text_file.close() return elif key == 112: # 'p' if delay == 33: delay = 0 else: delay = 33
def run_demo(net, image_provider, height_size, cpu, track, smooth, target_dir=''): net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 1 for img, frame_name in image_provider: print("inside demo frame name is " + os.path.basename(frame_name)) orig_img = img.copy() heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) if track: track_poses(previous_poses, current_poses, smooth=smooth) previous_poses = current_poses for i, pose in zip(range(len(current_poses)), current_poses): pose.draw(img) blank = create_blank() pose.draw(blank) print("saving: " + os.path.join(target_dir, os.path.basename(frame_name))) res = cv2.imwrite( os.path.join(target_dir, os.path.basename(frame_name)), img) print("status is :" + str(res)) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) # for pose in current_poses: # cv2.rectangle(img, (pose.bbox[0], pose.bbox[1]), # (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) # if track: # cv2.putText(img, 'id: {}'.format(pose.id), (pose.bbox[0], pose.bbox[1] - 16), # cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255)) #cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) # res=cv2.imwrite('daniel_testd.jpg', img) # print("res is " +str(res) ) key = cv2.waitKey(delay) if key == 27: # esc return elif key == 112: # 'p' if delay == 1: delay = 0 else: delay = 1