def main(): test_total_class = list() # 미리 학습한 모델을 불러와 스켈레톤 벡터 추출 준비(PoseNet) posenet_model = posenet.load_model(args.model) posenet_model = posenet_model.cuda() output_stride = posenet_model.output_stride # 비디오를 이미지로 변환 video_filenames = [ v.path for v in os.scandir(args.video_dir) if v.is_file() and v.path.endswith(('.mp4')) ] if args.image_dir: if not os.path.exists(args.image_dir): os.makedirs(args.image_dir) for iv, v in enumerate(video_filenames): if not os.path.exists(args.image_dir + '/' + v[10:-4] + '/'): os.makedirs(args.image_dir + '/' + v[10:-4] + '/') video2frame(v, args.image_dir + '/' + v[10:-4] + '/') if args.output_dir: if not os.path.exists(args.output_dir + '/' + v[10:-4] + '/'): os.makedirs(args.output_dir + '/' + v[10:-4] + '/') # 비디오에서 추출된 이미지를 통한 스켈레톤 벡터 추출 시작 for iv, v in enumerate(video_filenames): filenames = [ f.path for f in os.scandir(args.image_dir + '/' + v[10:-4] + '/') if f.is_file() and f.path.endswith(('.png', '.jpg')) ] for i, f in enumerate(filenames): input_image, draw_image, output_scale = posenet.read_imgfile( f, scale_factor=args.scale_factor, output_stride=output_stride) with torch.no_grad(): input_image = torch.Tensor(input_image).cuda() # PoseNet을 통해 벡터 추출 heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = posenet_model( input_image) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(0), offsets_result.squeeze(0), displacement_fwd_result.squeeze(0), displacement_bwd_result.squeeze(0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.25) keypoint_coords *= output_scale # 스켈레톤 벡터 추출 시각화를 위한 이미지저장(이미지-추출한 시퀀스 이미지에서 댄서의 스켈레톤 벡터를 표시) if args.output_dir: draw_image = posenet.draw_skel_and_kp(draw_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.25, min_part_score=0.25) cv2.imwrite( os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), draw_image) if not args.notxt: max_score = 0 max_index = 0 ignore = 0 for pi in range(len(pose_scores)): if max_score > pose_scores[pi]: max_index = pi if pose_scores[pi] == 0.: ignore = 1 break # Posenet을 통해 추출한 스켈레톤 벡터를 가지고 Hand Craft Feature 계산 if pose_scores[max_index] != 0.: tmp_data = dict() out_data = dict(image_name=[f[10:-4]]) for ki, (s, c) in enumerate( zip(keypoint_scores[max_index, :], keypoint_coords[max_index, :, :])): tmp_data[posenet.PART_NAMES[ki]] = c.tolist() out_data['feature_1'] = xy_to_feature_1( tmp_data['leftShoulder'], tmp_data['rightShoulder'], tmp_data['leftHip'], tmp_data['rightHip']) out_data['feature_2'] = xy_to_feature_2( tmp_data['leftShoulder'], tmp_data['rightShoulder'], tmp_data['leftElbow'], tmp_data['rightElbow']) out_data['feature_3'] = xy_to_feature_3( tmp_data['leftHip'], tmp_data['rightHip'], tmp_data['leftKnee'], tmp_data['rightKnee']) out_data['feature_4'] = xy_to_feature_4( tmp_data['leftHip'], tmp_data['rightHip'], tmp_data['leftShoulder'], tmp_data['rightShoulder']) out_data['feature_5'] = xy_to_feature_5( tmp_data['leftShoulder'], tmp_data['rightShoulder'], tmp_data['leftElbow'], tmp_data['rightElbow'], tmp_data['leftWrist'], tmp_data['rightWrist']) out_data['feature_6'] = xy_to_feature_6( tmp_data['leftHip'], tmp_data['rightHip'], tmp_data['leftKnee'], tmp_data['rightKnee'], tmp_data['leftAnkle'], tmp_data['rightAnkle']) out_data['total_feature'] = list() out_data['total_feature'].extend([out_data['feature_1']]) out_data['total_feature'].extend([out_data['feature_2']]) out_data['total_feature'].extend([out_data['feature_3']]) out_data['total_feature'].extend([out_data['feature_4']]) out_data['total_feature'].extend( [out_data['feature_5'][0]]) out_data['total_feature'].extend( [out_data['feature_5'][1]]) out_data['total_feature'].extend( [out_data['feature_6'][0]]) out_data['total_feature'].extend( [out_data['feature_6'][1]]) test_total_class.append(out_data['total_feature']) if len(test_total_class) is 150: break ############################################################################ # 유사도 검색을 진행 ############################################################################ # 사전에 학습된 NetVLAD 모델 로드 (데이터셋에 맞춰 코랩으로 학습진행) base_feature = np.load('./models/Train_for_pca_DIY.npy') base_feature_label = np.load('./models/Train_label_for_pca_DIY.npy') class_cnt = 20 test_total_class = np.array(test_total_class) test_total_class = test_total_class.reshape(150, 1, 8) x_test_data = torch.from_numpy(test_total_class) # Pretrained BLSTM과 NetVLAD를 결합해 새로운 모델 정의(EmbedNet) back_bone_model = LSTM(8, 32, batch_size=1, output_dim=class_cnt, num_layers=2) net_vlad = NetVLAD(num_clusters=40, dim=64, alpha=1.0) vlad_model = EmbedNet(back_bone_model, net_vlad) vlad_model.load_state_dict(torch.load( './models/checkpoints/VLAD_Checkpoint_20200609_Best_DIY_total.pth'), strict=True) # 웹을 통해 입력된 영상에서 스켈레톤추출-Hand craft Feature계산 과정을 거치고 해당 data를 BLSTM과 NetVLAD에 입력 test_out_feature = vlad_model(x_test_data) test_out_feature = np.array(test_out_feature) test_out_feature = test_out_feature.reshape(-1, 2560) #실제 결과는 2560차원에 데이터가 생성. 좀더 빠른 데모를 위해서 실제 2차원으로 축소 진행 pca = PCA(n_components=2) X_Train_pca = pca.fit_transform(base_feature) X_test_pca = pca.transform(test_out_feature) check = np.concatenate((X_Train_pca, X_test_pca), axis=0) #축소된 데이터들과 기존 DB간의 similarity 계산 pairwise_dist_t = _pairwise_distances(torch.from_numpy(check)) pairwise_dist_t = pairwise_dist_t.cpu().detach().numpy() pairwise_dist_sort = np.sort(pairwise_dist_t[-1][:-1]) # 계산된 distance를 상대적인 유사도 값으로 변환 및 출력 # 특정 안무 검색과 전체 안무 검색에 따라 두가지로 나뉨 final_out_bef = list() final_out = list() final_score = list() if (args.compare_id == -1): for index in range(0, 20): for i in range(len(pairwise_dist_t)): if pairwise_dist_sort[index] == pairwise_dist_t[-1][i]: score = 100 - (300 * pairwise_dist_sort[index]) if score > 0 and findIndex(final_out, base_feature_label[i]): final_out_bef.append(i) final_score.append(score) final_out.append(base_feature_label[i]) break print(final_out) print(final_score)
display_image, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.85) b = [] # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image, b = posenet.draw_skel_and_kp(display_image, pose_scores, keypoint_scores, keypoint_coords, b, min_pose_score=0.15, min_part_score=0.1) nose = [None] * 10 left_wrist = [None] * 10 right_wrist = [None] * 10 fark = 0 for i in range(len(b)): if b[i] == []: continue for m in range(len(b[i])): if b[i][m][0] == 0:
def main(): with tf.Session() as sess: model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] if args.file is not None: cap = cv2.VideoCapture(args.file) else: cap = cv2.VideoCapture(args.cam_id) cap.set(3, args.cam_width) cap.set(4, args.cam_height) start = time.time() frame_count = 0 # header = ['action','frame', 'input_number', 'x_inputs', 'y_inputs'] # # with open('dataset.csv','w') as w: # do = csv.writer(w) # do.writerow(header) while True: input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.15) keypoint_coords *= output_scale frame = [] action_name = [] position = [] x_coordinates = [] y_coordinates = [] for i, j in enumerate(keypoint_coords[0]): action_name.append('walking') frame.append(frame_count) position.append(i + 1) x_coordinates.append(j[0]) y_coordinates.append(j[1]) print( f'Frame is {frame}, body pos {position}, x value {x_coordinates}, y value {y_coordinates}' ) # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image = posenet.draw_skel_and_kp(display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) #print(cap.get(cv2.CAP_PROP_FPS)) #print(len(keypoint_scores[0])) print(keypoint_coords[0]) cv2.imshow('posenet', overlay_image) frame_count += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break print('Average FPS: ', frame_count / (time.time() - start)) rows = zip(action_name, frame, position, x_coordinates, y_coordinates) with open('dataset.csv', 'a') as f: writer = csv.writer(f) #writer.writerow(header) for row in rows: writer.writerow(row)
def main(): with tf.Session() as sess: model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] if args.file is not None: cap = cv2.VideoCapture(args.file) else: cap = cv2.VideoCapture(args.cam_id) cap.set(3, args.cam_width) cap.set(4, args.cam_height) start = time.time() frame_count = 0 col_names = [ 'action', 'frame_number', 'input_number', 'x_inputs', 'y_inputs' ] csv_reader = pd.read_csv( r'C:\Users\insapab\Desktop\Python\Projects\Deep Learning based fall-detection\Week3\posenet-python-master\dataset1.csv', names=col_names, header=None) # reading old data to a dataframe while True: input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=1, ## detectiong only single position min_pose_score=0.15) keypoint_coords *= output_scale ## vectorized code to extract key points data from first pose and appends to csv_reader dataframe ## input_number_array = ( np.arange(len(keypoint_scores[0])).reshape( 1, len(keypoint_scores[0])) + 1)[0] # an array [1,2 ------ no.of keypoints] x_input_array = keypoint_coords[ 0][:, 0] # an array having x-cordinates of key points in a frame y_input_array = keypoint_coords[ 0][:, 1] # an array having y-cordinates of key points in a frame d = { 'action': 'Pushups', 'frame_number': frame_count + 1, 'input_number': input_number_array, 'x_inputs': x_input_array, 'y_inputs': y_input_array } dummy_frame = pd.DataFrame(data=d) csv_reader = csv_reader.append( dummy_frame) #appending new frame data to data frame # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image = posenet.draw_skel_and_kp(display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) cv2.imshow('posenet', overlay_image) frame_count += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break if frame_count == 25: # total frames considered are (video_length_sec)*(frames/sec) break csv_reader.to_csv('dataset1.csv') # writing data in the same csv file print('Average FPS: ', frame_count / (time.time() - start))
def main(): model = posenet.load_model(args.model) # model = model.cuda() output_stride = model.output_stride if args.output_dir: if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) filenames = [ f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg')) ] start = time.time() for f in filenames: input_image, draw_image, output_scale = posenet.read_imgfile( f, scale_factor=args.scale_factor, output_stride=output_stride) with torch.no_grad(): input_image = torch.Tensor(input_image) #.cuda() heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = model( input_image) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(0), offsets_result.squeeze(0), displacement_fwd_result.squeeze(0), displacement_bwd_result.squeeze(0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.25) keypoint_coords *= output_scale if args.output_dir: draw_image = posenet.draw_skel_and_kp(draw_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.25, min_part_score=0.25) cv2.imwrite( os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), draw_image) if not args.notxt: original_stdout = sys.stdout with open('coordinates', 'a') as f1: sys.stdout = f1 print() print("Results for image: %s" % f) for pi in range(len(pose_scores)): if pose_scores[pi] == 0.: break print('Pose #%d, score = %f' % (pi, pose_scores[pi])) for ki, (s, c) in enumerate( zip(keypoint_scores[pi, :], keypoint_coords[pi, :, :])): print('Keypoint %s, score = %f, coord = %s' % (posenet.PART_NAMES[ki], s, c)) sys.stdout = original_stdout print('Average FPS:', len(filenames) / (time.time() - start))
def main(): with tf.Session() as sess: model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] if args.output_dir: if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) filenames = [ f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg')) ] start = time.time() for f in filenames: input_image, draw_image, output_scale = posenet.read_imgfile( f, scale_factor=args.scale_factor, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.07) keypoint_coords *= output_scale if args.output_dir: draw_image = posenet.draw_skel_and_kp(draw_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.07, min_part_score=0.07) cv2.imwrite( os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), draw_image) if not args.notxt: print() print("Results for image: %s" % f) for pi in range(len(pose_scores)): if pose_scores[pi] == 0.: break print('Pose #%d, score = %f' % (pi, pose_scores[pi])) for ki, (s, c) in enumerate( zip(keypoint_scores[pi, :], keypoint_coords[pi, :, :])): print('Keypoint %s, score = %f, coord = %s' % (posenet.PART_NAMES[ki], s, c)) print('Average FPS:', len(filenames) / (time.time() - start))
def main(): with tf.Session() as sess: model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] if args.file is not None: cap1 = cv2.VideoCapture(args.file) else: cap2 = cv2.VideoCapture(args.cam_id) cap1 = cv2.VideoCapture("test3.mp4") cap1.set(cv2.CAP_PROP_POS_MSEC, 40000) cap1.set(3, 720) cap1.set(4, 560) start = time.time() frame_count = 0 while True: input_image, display_image, output_scale = posenet.read_cap( cap1, scale_factor=args.scale_factor, output_stride=output_stride) input_image2, display_image2, output_scale2 = posenet.read_cap( cap2, scale_factor=args.scale_factor, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) heatmaps_result2, offsets_result2, displacement_fwd_result2, displacement_bwd_result2 = sess.run( model_outputs, feed_dict={'image:0': input_image2}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.15) pose_scores2, keypoint_scores2, keypoint_coords2 = posenet.decode_multi.decode_multiple_poses( heatmaps_result2.squeeze(axis=0), offsets_result2.squeeze(axis=0), displacement_fwd_result2.squeeze(axis=0), displacement_bwd_result2.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.15) keypoint_coords *= output_scale keypoint_coords2 *= output_scale2 # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image = posenet.draw_skel_and_kp(display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) overlay_image2 = posenet.draw_skel_and_kp(display_image2, pose_scores2, keypoint_scores2, keypoint_coords2, min_pose_score=0.15, min_part_score=0.1) overlay_image2 = cv2.flip(overlay_image2, 1) overlay_image3 = posenet.draw_skel_and_kp2(overlay_image2, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) cv2.imshow('posenet1', overlay_image) frame_count += 1 cv2.imshow('posenet2', overlay_image3) if cv2.waitKey(1) & 0xFF == ord('q'): break print('Average FPS: ', frame_count / (time.time() - start))
def main(): save_file = ['ffmpeg', '-i','-', '-c:v', 'copy', '-hls_time', '10', '-hls_list_size', '3', '-hls_wrap', '3', '-hls_segment_type', 'fmp4', 'vid.m3u8'] stream_file = ['ffmpeg', '-f', 'rawvideo', '-pix_fmt', 'bgr24', '-s','640x480', '-i','vid.m3u8', '-an', # disable audio '-f', 'image2pipe', '-pix_fmt', 'bgr24', '-vcodec', 'rawvideo', # '-tune', 'zerolatency', # '-filter:v', 'minterpolate=\'mi_mode=mci:mc_mode=aobmc:vsbmc=1:fps=30\'', # '-b:v', '1M', # '-c:v', 'libx264', # '-maxrate', '1M', # '-bufsize', '2M', # '-strict','experimental', # '-vcodec','h264', # '-pix_fmt','yuv420p', # '-g', '50', # '-vb','1000k', # '-profile:v', 'baseline', # '-preset', 'ultrafast', # '-r', '10', # '-f', 'flv', 'rtmp://live-lax.twitch.tv/app/live_173288790_pEOfgLFUAfocVRZdAQ1D8bUubjL4OY'] pipe = subprocess.Popen(save_file, stdin=subprocess.PIPE) stream = None model = posenet.load_model(args.model) model = model.cuda() output_stride = model.output_stride cap = cv2.VideoCapture(args.cam_id) cap.set(3, args.cam_width) cap.set(4, args.cam_height) start = time.time() frame_count = 0 while True: input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride) with torch.no_grad(): input_image = torch.Tensor(input_image).cuda() heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = model(input_image) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(0), offsets_result.squeeze(0), displacement_fwd_result.squeeze(0), displacement_bwd_result.squeeze(0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.15) keypoint_coords *= output_scale # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image = posenet.draw_skel_and_kp( display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) pipe.stdin.write(overlay_image.tostring()) if not stream: stream = subprocess.Popen(stream_file) print('Average FPS: ', frame_count / (time.time() - start))
def modal(self, context, event): if (event.type in {'RIGHTMOUSE', 'ESC'}) or self.stop == True: self.cancel(context) return {'CANCELLED'} if event.type == 'TIMER': self.init_session() self.init_camera() input_image, display_image, output_scale = posenet.read_cap( self._cap, scale_factor=self.scale_factor, output_stride=self.output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = self.sess.run( self.model_outputs, feed_dict={'image:0': input_image}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=self.output_stride, max_pose_detections=10, min_pose_score=0.15) keypoint_coords *= output_scale overlay_image = posenet.draw_skel_and_kp(display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) bones = bpy.data.objects["rig"].pose.bones if self.calibration_count < self.calibration_times: self.original_nose = keypoint_coords[0][0] self.original_leftEye = keypoint_coords[0][1] self.original_rightEye = keypoint_coords[0][2] self.original_leftEar = keypoint_coords[0][3] self.original_rightEar = keypoint_coords[0][4] self.original_leftShoulder = keypoint_coords[0][5] self.original_rightShoulder = keypoint_coords[0][6] self.original_leftElbow = keypoint_coords[0][7] self.original_rightElbow = keypoint_coords[0][8] self.original_leftWrist = keypoint_coords[0][9] self.original_rightWrist = keypoint_coords[0][10] self.original_leftHip = keypoint_coords[0][11] self.original_rightHip = keypoint_coords[0][12] self.original_leftKnee = keypoint_coords[0][13] self.original_rightKnee = keypoint_coords[0][14] self.original_leftAnkle = keypoint_coords[0][15] self.original_rightAnkle = keypoint_coords[0][16] if self.original_leftShoulder[ 0] == 0 or self.original_leftShoulder[ 1] == 0 or self.original_rightShoulder[ 0] == 0 or self.original_rightShoulder[1] == 0: return {'PASS_THROUGH'} self.proportion = self.object_shoulder_width / math.sqrt( math.pow( self.original_leftShoulder[0] - self.original_rightShoulder[0], 2) + math.pow( self.original_leftShoulder[1] - self.original_rightShoulder[1], 2)) self.calibration_count += 1 else: x, y = self.calibration_location(keypoint_coords) # bones["root"].location[2] = y*self.proportion # bones["root"].location[0] = x*self.proportion # Rotate nose nose = keypoint_coords[0][0] nose = [(self.original_nose[0] - nose[0]) * self.proportion, (self.original_nose[1] - nose[1]) * self.proportion] bones["head"].rotation_mode = 'XYZ' bones["head"].rotation_euler[0] = self.smooth_value( "head.x", 3, self.calibration_value(nose[0] * -1, 1, -1)) bones["head"].rotation_euler[1] = self.smooth_value( "head.y", 3, self.calibration_value(nose[1], 1, -1)) # Move eyes leftEye = keypoint_coords[0][1] leftEye = [ (self.original_leftEye[0] - leftEye[0]) * self.proportion, (self.original_leftEye[1] - leftEye[1]) * self.proportion ] rightEye = keypoint_coords[0][2] rightEye = [ (self.original_rightEye[0] - rightEye[0]) * self.proportion, (self.original_rightEye[1] - rightEye[1]) * self.proportion ] # Move ears leftEar = keypoint_coords[0][3] leftEar = [ (self.original_leftEar[0] - leftEar[0]) * self.proportion, (self.original_leftEar[1] - leftEar[1]) * self.proportion ] rightEar = keypoint_coords[0][4] rightEar = [ (self.original_rightEar[0] - rightEar[0]) * self.proportion, (self.original_rightEar[1] - rightEar[1]) * self.proportion ] # Move shoulders leftShoulder = keypoint_coords[0][5] leftShoulder = [ (self.original_leftShoulder[0] - leftShoulder[0]) * self.proportion, (self.original_leftShoulder[1] - leftShoulder[1]) * self.proportion ] rightShoulder = keypoint_coords[0][6] rightShoulder = [ (self.original_rightShoulder[0] - rightShoulder[0]) * self.proportion, (self.original_rightShoulder[1] - rightShoulder[1]) * self.proportion ] # Move elbows leftElbow = keypoint_coords[0][7] leftElbow = [(self.original_leftElbow[0] - leftElbow[0]) * self.proportion, (self.original_leftElbow[1] - leftElbow[1]) * self.proportion * -1] x_x = 0.219534 x_y = 0.650383 x_z = 0.72719 z_x = 0.17474 z_y = -0.759534 z_z = 0.626558 bones["forearm_tweak.L"].location[0] = self.smooth_value( "forearm_tweak.L.x", 2, leftElbow[1] * x_x + leftElbow[0] * z_x) bones["forearm_tweak.L"].location[1] = self.smooth_value( "forearm_tweak.L.y", 2, leftElbow[1] * x_y + leftElbow[0] * z_y) bones["forearm_tweak.L"].location[2] = self.smooth_value( "forearm_tweak.L.z", 2, leftElbow[1] * x_z + leftElbow[0] * z_z) # bones["forearm_tweak.L"].location[2] = self.smooth_value("forearm_tweak.L.y", 3, leftElbow[1]) # bones["forearm_tweak.L"].location[0] = self.smooth_value("forearm_tweak.L.x", 3, leftElbow[0]) rightElbow = keypoint_coords[0][8] rightElbow = [(self.original_rightElbow[0] - rightElbow[0]) * self.proportion, (self.original_rightElbow[1] - rightElbow[1]) * self.proportion * -1] x_x = 0.219243 x_y = -0.650383 x_z = -0.727278 z_x = -0.174489 z_y = -0.759534 z_z = 0.626628 bones["forearm_tweak.R"].location[0] = self.smooth_value( "forearm_tweak.R.x", 2, rightElbow[1] * x_x + rightElbow[0] * z_x) bones["forearm_tweak.R"].location[1] = self.smooth_value( "forearm_tweak.R.y", 2, rightElbow[1] * x_y + rightElbow[0] * z_y) bones["forearm_tweak.R"].location[2] = self.smooth_value( "forearm_tweak.R.z", 2, rightElbow[1] * x_z + rightElbow[0] * z_z) # bones["forearm_tweak.R"].location[2] = self.smooth_value("forearm_tweak.R.y", 3, rightElbow[1]) # bones["forearm_tweak.R"].location[0] = self.smooth_value("forearm_tweak.R.x", 3, rightElbow[0]) # Move wrists leftWrist = keypoint_coords[0][9] leftWrist = [(self.original_leftWrist[0] - leftWrist[0]) * self.proportion, (self.original_leftWrist[1] - leftWrist[1]) * self.proportion * -1] print(leftWrist) x_x = 0.222131 x_y = 0.667043 x_z = 0.711134 z_x = 0.171844 z_y = -0.744722 z_z = 0.644871 bones["hand_ik.L"].location[0] = self.smooth_value( "hand_ik.L.x", 3, leftWrist[1] * x_x + leftWrist[0] * z_x) bones["hand_ik.L"].location[1] = self.smooth_value( "hand_ik.L.y", 3, leftWrist[1] * x_y + leftWrist[0] * z_y) bones["hand_ik.L"].location[2] = self.smooth_value( "hand_ik.L.z", 3, leftWrist[1] * x_z + leftWrist[0] * z_z) # bones["hand_ik.L"].location[2] = self.smooth_value("hand_ik.L.y", 3, leftWrist[1]) # bones["hand_ik.L"].location[0] = self.smooth_value("hand_ik.L.x", 3, leftWrist[0]) rightWrist = keypoint_coords[0][10] rightWrist = [(self.original_rightWrist[0] - rightWrist[0]) * self.proportion, (self.original_rightWrist[1] - rightWrist[1]) * self.proportion * -1] x_x = 0.118835 x_y = -0.801089 x_z = -0.586629 z_x = -0.251902 z_y = -0.59581 z_z = 0.762598 bones["hand_ik.R"].location[0] = self.smooth_value( "hand_ik.R.x", 3, rightWrist[1] * x_x + rightWrist[0] * z_x) bones["hand_ik.R"].location[1] = self.smooth_value( "hand_ik.R.y", 3, rightWrist[1] * x_y + rightWrist[0] * z_y) bones["hand_ik.R"].location[2] = self.smooth_value( "hand_ik.R.z", 3, rightWrist[1] * x_z + rightWrist[0] * z_z) # bones["hand_ik.R"].location[2] = self.smooth_value("hand_ik.R.y", 3, rightWrist[1]) # bones["hand_ik.R"].location[0] = self.smooth_value("hand_ik.R.x", 3, rightWrist[0]) # Rotate hand bones["hand_ik.L"].rotation_mode = 'XYZ' bones["hand_ik.L"].rotation_euler[0] = self.smooth_value( "hand_ik.L.r", 3, math.tan( (keypoint_coords[0][7][0] - keypoint_coords[0][9][0]) / math.sqrt( math.pow( keypoint_coords[0][7][0] - keypoint_coords[0][9][0], 2) + math.pow( keypoint_coords[0][7][1] - keypoint_coords[0][9][1], 2)))) + 1.2 bones["hand_ik.R"].rotation_mode = 'XYZ' bones["hand_ik.R"].rotation_euler[0] = self.smooth_value( "hand_ik.R.r", 3, math.tan( (keypoint_coords[0][8][0] - keypoint_coords[0][10][0]) / math.sqrt( math.pow( keypoint_coords[0][8][0] - keypoint_coords[0][10][0], 2) + math.pow( keypoint_coords[0][8][1] - keypoint_coords[0][10][1], 2)))) + 1.2 # Move hips leftHip = keypoint_coords[0][11] leftHip = [ (self.original_leftHip[0] - leftHip[0]) * self.proportion, (self.original_leftHip[1] - leftHip[1]) * self.proportion ] rightHip = keypoint_coords[0][12] rightHip = [ (self.original_rightHip[0] - rightHip[0]) * self.proportion, (self.original_rightHip[1] - rightHip[1]) * self.proportion ] bones["torso"].location[2] = self.smooth_value( "torso.y", 2, (leftHip[1] + rightHip[1]) / 2) bones["torso"].location[0] = self.smooth_value( "torso.x", 2, (leftHip[0] + rightHip[0]) / 2) # Move knees leftKnee = keypoint_coords[0][13] leftKnee = [ (self.original_leftKnee[0] - leftKnee[0]) * self.proportion, (self.original_leftKnee[1] - leftKnee[1]) * self.proportion ] # bones["shin_tweak.L"].location[2] = self.smooth_value("torso.y", 2, leftKnee[1]) # bones["shin_tweak.L"].location[0] = self.smooth_value("torso.x", 2, leftKnee[0]) rightKnee = keypoint_coords[0][14] rightKnee = [(self.original_rightKnee[0] - rightKnee[0]) * self.proportion, (self.original_rightKnee[1] - rightKnee[1]) * self.proportion] # bones["shin_tweak.R"].location[2] = rightKnee[0] # bones["shin_tweak.R"].location[1] = rightKnee[1] # Move ankles leftAnkle = keypoint_coords[0][15] leftAnkle = [(self.original_leftAnkle[0] - leftAnkle[0]) * self.proportion, (self.original_leftAnkle[1] - leftAnkle[1]) * self.proportion] bones["foot_ik.L"].location[2] = self.smooth_value( "foot_ik.L.y", 2, leftAnkle[1]) bones["foot_ik.L"].location[0] = self.smooth_value( "foot_ik.L.x", 2, leftAnkle[0]) rightAnkle = keypoint_coords[0][16] rightAnkle = [(self.original_rightAnkle[0] - rightAnkle[0]) * self.proportion, (self.original_rightAnkle[1] - rightAnkle[1]) * self.proportion] bones["foot_ik.R"].location[2] = self.smooth_value( "foot_ik.R.y", 2, rightAnkle[1]) bones["foot_ik.R"].location[0] = self.smooth_value( "foot_ik.R.x", 2, rightAnkle[0]) if self.keyframe_insert_enable == True: # rotation_euler bones["head"].keyframe_insert(data_path="rotation_euler", index=-1) # location bones["root"].keyframe_insert(data_path="location", index=-1) bones["chest"].keyframe_insert(data_path="location", index=-1) bones["forearm_tweak.L"].keyframe_insert( data_path="location", index=-1) bones["forearm_tweak.R"].keyframe_insert( data_path="location", index=-1) bones["hand_ik.L"].keyframe_insert(data_path="location", index=-1) bones["hand_ik.R"].keyframe_insert(data_path="location", index=-1) bones["torso"].keyframe_insert(data_path="location", index=-1) bones["shin_tweak.L"].keyframe_insert(data_path="location", index=-1) bones["shin_tweak.R"].keyframe_insert(data_path="location", index=-1) bones["foot_ik.L"].keyframe_insert(data_path="location", index=-1) bones["foot_ik.R"].keyframe_insert(data_path="location", index=-1) cv2.imshow('posenet', overlay_image) cv2.waitKey(1) return {'PASS_THROUGH'}
def capture(): global img_q, save w, h = 640, 480 #w, h = 1280, 720 # Configure depth and color streams pipeline = rs.pipeline() config = rs.config() config.enable_stream(rs.stream.depth, w, h, rs.format.z16, 30) config.enable_stream(rs.stream.color, w, h, rs.format.bgr8, 30) # Start streaming profile = pipeline.start(config) # Getting the depth sensor's depth scale (see rs-align example for explanation) depth_sensor = profile.get_device().first_depth_sensor() depth_scale = depth_sensor.get_depth_scale() print("Depth Scale is: ", depth_scale) clipping_distance_in_meters = 2 #1 meter clipping_distance = clipping_distance_in_meters / depth_scale frame_count = 0 t = time.time() with tf.Session() as sess: model_cfg, model_outputs = posenet.load_model(101, sess) output_stride = model_cfg['output_stride'] while True: frames = pipeline.wait_for_frames() """ aligned_frames = align.process(frames) aligned_depth_frame = aligned_frames.get_depth_frame() color_frame = aligned_frames.get_color_frame() if not aligned_depth_frame or not color_frame: continue depth_image = np.asanyarray(aligned_depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data())""" depth_frame = frames.get_depth_frame() color_frame = frames.get_color_frame() if not depth_frame or not color_frame: continue depth_image = np.asanyarray( depth_frame.get_data()) #[:, 300 : 900] color_image = np.asanyarray( color_frame.get_data()) #[:, 300 : 900] out = np.zeros((h, w, 3), np.uint8) depth_image_3d = np.dstack((depth_image, depth_image, depth_image)) out = np.where( (depth_image_3d > clipping_distance) | (depth_image_3d <= 0), 0, color_image) mask = np.where( (depth_image > clipping_distance) | (depth_image <= 0), 0, 255) mask = cv2.applyColorMap(cv2.convertScaleAbs(mask, alpha=1), cv2.COLORMAP_BONE) blur_p = 5 mask = cv2.GaussianBlur(mask, (blur_p, blur_p), 0) #mask = np.dstack((mask,mask,mask)) boxes = [] boxes = get_contours(mask.copy()) #boxes = [] depth_colormap = cv2.applyColorMap( cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_BONE) #boxes = HAAR(color_image) for box in boxes: try: img = out[box[1]:box[3], box[0]:box[2]] img2 = color_image[box[1]:box[3], box[0]:box[2]] input_image, display_image, output_scale = process_input( img, scale_factor=0.7125, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.15) keypoint_coords *= output_scale overlay_image = draw_p(img2, depth_image, depth_scale, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) overlay_image = posenet.draw_skel_and_kp( img2, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) color_image[box[1]:box[3], box[0]:box[2]] = overlay_image cv2.rectangle(color_image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1) except: print('!!!!!!!!!!!!!') print(box) if len(boxes) == 0: overlay_image = color_image #images = np.vstack((np.hstack((overlay_image, color_image)), np.hstack((depth_colormap, depth_colormap)))) #images = color_image #print(color_image.shape, mask.shape) #images = np.hstack((color_image, mask, out)) images = np.vstack((np.hstack( (color_image, mask)), np.hstack((depth_colormap, out)))) if not img_q.full(): img_q.put_nowait(images) if save.value == 1: save_img(images) save.value = 0 if frame_count == 5: print(1 / ((time.time() - t) / frame_count)) frame_count = 0 t = time.time() frame_count += 1
def main(): model = posenet.load_model(args.model) model = model.cuda() output_stride = model.output_stride mirror_flip = args.mirror_flip use_webcam = True if args.file is not None: cap = cv2.VideoCapture(args.file) use_webcam = False else: cap = cv2.VideoCapture(args.cam_id, cv2.CAP_DSHOW) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.cam_width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.cam_height) maxPoses = args.max_poses if maxPoses < 1: print("cannot proceed -- max poses detected needs to be a positive integer!") return elif maxPoses > 30: print("would not proceed -- suggest max poses detected be no more than 30!") return # Check if camera opened successfully if (cap.isOpened()== False): print("Error opening video stream or file") print('\nPress [space] to pause/resume stream...\nPress [q] to quit stream...') start = time.time() frame_count = 0 flag = True while (cap.isOpened()): if cv2.waitKey(1) & 0xFF == ord(' '): flag = not(flag) if flag == True: tik = time.time() input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride, mirror_flip=mirror_flip, use_webcam=use_webcam) if display_image is None: break with torch.no_grad(): input_image = torch.Tensor(input_image).cuda() heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = model(input_image) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(0), offsets_result.squeeze(0), displacement_fwd_result.squeeze(0), displacement_bwd_result.squeeze(0), output_stride=output_stride, max_pose_detections=maxPoses, min_pose_score=0.1) keypoint_coords *= output_scale tok = time.time() frame_count += 1 # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image = posenet.draw_skel_and_kp( display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.1, min_part_score=0.1) cv2.putText(overlay_image, "FPS-overall: %.1f" % (frame_count / (time.time() - start)), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1) cv2.putText(overlay_image, "FPS-posenet: %.1f" % (1/(tok - tik)), (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1) cv2.imshow('posenet', overlay_image) #print('FPS: ', frame_count / (time.time() - start)) if cv2.waitKey(1) & 0xFF == ord('q'): break else: continue print("Average FPS: %.1f" %(frame_count / (time.time() - start))) # When everything done, release the capture cap.release() cv2.destroyAllWindows()
def Pose(): model = posenet.load_model(args.model) model = model.cuda() output_stride = model.output_stride cap = cv2.VideoCapture(args.cam_id) cap.set(3, args.cam_width) cap.set(4, args.cam_height) start = time.time() frame_count = 0 # Robot Setup """""" bot = MachinaRobot() bot.Message("Hello Robot!") bot.AxesTo(0, 0, 0, 0, 90, 0) bot.SpeedTo(50) bot.TransformTo(200, 300, 200, -1, 0, 0, 0, 1, 0) """""" while True: input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride) with torch.no_grad(): input_image = torch.Tensor(input_image).cuda() heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = model(input_image) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(0), offsets_result.squeeze(0), displacement_fwd_result.squeeze(0), displacement_bwd_result.squeeze(0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.15) keypoint_coords *= output_scale coords = keypoint_coords[0] if not started: global rightWrist1 rightWrist1 = coords[10] started = True print(rightWrist1) # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image = posenet.draw_skel_and_kp( display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) cv2.imshow('posenet', overlay_image) frame_count += 1 rightWrist2 = coords[10] print(rightWrist2) diff = rightWrist2-rightWrist1 #bot.Move(diff,0,0) print ("difference is " , diff) if cv2.waitKey(1) & 0xFF == ord('q'): bot.AxesTo(0, 0, 0, 0, 90, 0) break
input_image, draw_image, output_scale = posenet.read_cap( cap, scale_factor=scale_factor, output_stride=output_stride) except: break # run the model on the image and generate output results heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) # here we filter poses generated by above model # and output pose score, keypoint scores and their keypoint coordinates # this function will return maximum 10 pose, it can be changed by maximum_pose # variable. pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, min_pose_score=0.25) # scale keypoint co-ordinate to output scale keypoint_coords *= output_scale # draw pose on input frame to obtain output frame draw_image = posenet.draw_skel_and_kp(draw_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.25, min_part_score=0.25) video.write(draw_image) # release the videoreader and writer video.release() cap.release()
def main(): global drone global drone_cc global drone_ud global drone_fb global shutdown drone = tellopy.Tello() drone.connect() drone.wait_for_connection(60.0) drone.start_video() drone.subscribe(drone.EVENT_FLIGHT_DATA, handler) pid_cc = PID(0.35,0.2,0.2,setpoint=0,output_limits=(-100,100)) pid_ud = PID(0.3,0.3,0.3,setpoint=0,output_limits=(-80,80)) pid_fb = PID(0.35,0.2,0.3,setpoint=0,output_limits=(-50,50)) video = cv2.VideoWriter('test_out.mp4',-1,1,(320,240)) # drone.subscribe(drone.EVENT_VIDEO_FRAME,handler) print("Start Running") with tf.Session() as sess: model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] try: # threading.Thread(target=recv_thread).start() threading.Thread(target=controller_thread).start() container = av.open(drone.get_video_stream()) frame_count = 0 while not shutdown: for frame in container.decode(video=0): frame_count = frame_count + 1 # skip first 300 frames if frame_count < 300: continue if frame_count %4 == 0: im = numpy.array(frame.to_image()) im = cv2.resize(im, (320,240)) #resize frame image = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) #image = cv2.cvtColor(numpy.array(frame.to_image()), cv2.COLOR_RGB2BGR) input_image, display_image, output_scale = posenet.process_input( image, scale_factor=args.scale_factor, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image} ) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.15) keypoint_coords *= output_scale # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image = posenet.draw_skel_and_kp( display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) centerx = int(overlay_image.shape[1]/2) centery = int(overlay_image.shape[0]/2) nosex = int(keypoint_coords[0,0,1]) nosey = int(keypoint_coords[0,0,0]) ctrl_out_cc = 0 ctrl_out_ud = 0 #overlay_image = cv2.putText(overlay_image, str(nosey), (120,50), cv2.FONT_HERSHEY_SIMPLEX , 1, (55,255,45), 2) errorx = 0 errory = 0 if keypoint_scores[0,0] > .04: overlay_image = cv2.line(overlay_image, (centerx,centery - 10), (nosex, nosey), (255, 255, 0), 2) errorx = nosex - centerx errory = nosey - centery - 10 if abs(errorx) > 20: ctrl_out_cc = pid_cc(errorx) drone_cc = ctrl_out_cc else: drone_cc = 0 if abs(errory) > 16: ctrl_out_ud = pid_ud(errory) drone_ud = ctrl_out_ud else: drone_ud = 0 #out_img = cv2.putText(out_img, str(keypoint_scores[ii,kpoint]), (50,50), cv2.FONT_HERSHEY_SIMPLEX , 1, (255,255,45), 2) else: #reset pid drone_cc = 0 drone_ud = 0 pid_cc.reset() pid_ud.reset() leftSholy = int(keypoint_coords[0,5,0]) rightSholy = int(keypoint_coords[0,6,0]) leftHipy = int(keypoint_coords[0,11,0]) rightHipy = int(keypoint_coords[0,12,0]) meanHeight = ((rightHipy - rightSholy) + (leftHipy - leftSholy))/2 #technically arbitrary desiredHeight = 100 ctrl_out_fb = 0 errorFB = 0 if keypoint_scores[0,5] > .04 and keypoint_scores[0,6] > .04 and keypoint_scores[0,11] > .04 and keypoint_scores[0,12] > .04: errorFB = meanHeight - desiredHeight #error can be within +/- 15 without caring if abs(errorFB) > 15: ctrl_out_fb = pid_cc(errorFB) drone_fb = ctrl_out_fb else: drone_fb = 0 #out_img = cv2.putText(out_img, str(keypoint_scores[ii,kpoint]), (50,50), cv2.FONT_HERSHEY_SIMPLEX , 1, (255,255,45), 2) else: #reset pid drone_fb = 0 pid_fb.reset() #don't let the hips lie # if keypoint_scores[0,11] < .04 and keypoint_scores[0,12] < .04: # drone_fb = -20 # drone_ud = -20 #overlay_image = cv2.putText(overlay_image, str(ctrl_out_fb), (30,110), cv2.FONT_HERSHEY_SIMPLEX , 1, (55,255,45), 2) # overlay_image = cv2.putText(overlay_image, str(ctrl_out_ud), (30,30), cv2.FONT_HERSHEY_SIMPLEX , 1, (55,255,45), 2) #overlay_image = cv2.putText(overlay_image, str(errory), (30,70), cv2.FONT_HERSHEY_SIMPLEX , 1, (55,255,45), 2) cv2.imshow('posenet', overlay_image) video.write(overlay_image) #cv2.imshow('Original', image) #cv2.imshow('Canny', cv2.Canny(image, 100, 200)) cv2.waitKey(1) except KeyboardInterrupt as e: print(e) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) print(e) cv2.destroyAllWindows() video.release() drone.quit() exit(1)
def keypoints(choice): frameST = st.empty() with tf.Session() as sess: model_cfg, model_outputs = posenet.load_model(101, sess) output_stride = model_cfg['output_stride'] model = pickle.load(open("RNNmodel.pkl","rb")) col =['nose_xCoord', 'nose_yCoord','leftEye_xCoord', 'leftEye_yCoord', 'rightEye_xCoord', 'rightEye_yCoord', 'leftEar_xCoord', 'leftEar_yCoord', 'rightEar_xCoord', 'rightEar_yCoord', 'leftShoulder_xCoord', 'leftShoulder_yCoord', 'rightShoulder_xCoord', 'rightShoulder_yCoord', 'leftElbow_xCoord', 'leftElbow_yCoord', 'rightElbow_xCoord', 'rightElbow_yCoord', 'leftWrist_xCoord', 'leftWrist_yCoord', 'rightWrist_xCoord', 'rightWrist_yCoord', 'leftHip_xCoord', 'leftHip_yCoord', 'rightHip_xCoord', 'rightHip_yCoord', 'leftKnee_xCoord', 'leftKnee_yCoord', 'rightKnee_xCoord', 'rightKnee_yCoord', 'leftAnkle_xCoord', 'leftAnkle_yCoord', 'rightAnkle_xCoord', 'rightAnkle_yCoord'] dummy_frame = pd.DataFrame(columns = col) Mean = np.array([148.66119491, 305.30049224, 142.6646119 , 310.25743596, 141.70017197, 302.93952183, 141.27736617, 317.44867283, 139.46369719, 297.02198297, 163.55299211, 318.43891733, 161.08800759, 288.89443786, 201.92641401, 327.4968114 , 198.14737829, 268.80913136, 227.76026519, 321.95571328, 222.86624951, 266.31670662, 231.37762059, 301.21041038, 229.25918355, 278.40873512, 279.47660651, 299.97468079, 275.04189452, 267.78987458, 332.20440178, 298.95615126, 326.76802751, 265.76282502]) Std = np.array([ 96.38779959, 127.16401286, 97.13470333, 129.87910693, 95.74375047, 127.46249832, 93.7858246 , 129.05961629, 90.41257012, 125.11638235, 87.46516144, 123.3528153 , 82.68437065, 115.98756097, 93.88162078, 126.86459209, 88.92850539, 111.58051582, 106.44469339, 126.27066327, 104.85191381, 111.8015252 , 86.30759081, 119.30815504, 84.94167523, 113.47366614, 91.67524166, 121.34462458, 90.18384711, 115.57534719, 105.70870987, 131.18502572, 104.6401585 , 126.9626246 ]) if choice == 'Webcam': cap = cv2.VideoCapture(0) cap.set(3, 500) cap.set(4, 500) elif choice == 'upload video': cap = cv2.VideoCapture('test.mp4') frame_count = 0 result = 'Loading..' ### for writing text on frame font = cv2.FONT_HERSHEY_PLAIN org = (0, 50) fontScale = 2 color = (0, 0, 255) ### colour on BGR thickness = 2 while True: input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=0.7125, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image} ) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=1, ## detectiong only single position min_pose_score=0.15) keypoint_coords *= output_scale # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image = posenet.draw_skel_and_kp( display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) #cv2.imshow('posenet', overlay_image) nose_xCoord = keypoint_coords[0][0][0] nose_yCoord = keypoint_coords[0][0][1] leftEye_xCoord = keypoint_coords[0][1][0] leftEye_yCoord = keypoint_coords[0][1][1] rightEye_xCoord = keypoint_coords[0][2][0] rightEye_yCoord = keypoint_coords[0][2][1] leftEar_xCoord = keypoint_coords[0][3][0] leftEar_yCoord = keypoint_coords[0][3][1] rightEar_xCoord = keypoint_coords[0][4][0] rightEar_yCoord = keypoint_coords[0][4][1] leftShoulder_xCoord = keypoint_coords[0][5][0] leftShoulder_yCoord = keypoint_coords[0][5][1] rightShoulder_xCoord = keypoint_coords[0][6][0] rightShoulder_yCoord = keypoint_coords[0][6][1] leftElbow_xCoord = keypoint_coords[0][7][0] leftElbow_yCoord = keypoint_coords[0][7][1] rightElbow_xCoord = keypoint_coords[0][8][0] rightElbow_yCoord = keypoint_coords[0][8][1] leftWrist_xCoord = keypoint_coords[0][9][0] leftWrist_yCoord = keypoint_coords[0][9][1] rightWrist_xCoord = keypoint_coords[0][10][0] rightWrist_yCoord = keypoint_coords[0][10][1] leftHip_xCoord = keypoint_coords[0][11][0] leftHip_yCoord = keypoint_coords[0][11][1] rightHip_xCoord = keypoint_coords[0][12][0] rightHip_yCoord = keypoint_coords[0][12][1] leftKnee_xCoord = keypoint_coords[0][13][0] leftKnee_yCoord = keypoint_coords[0][13][1] rightKnee_xCoord = keypoint_coords[0][14][0] rightKnee_yCoord = keypoint_coords[0][14][1] leftAnkle_xCoord = keypoint_coords[0][15][0] leftAnkle_yCoord = keypoint_coords[0][15][1] rightAnkle_xCoord = keypoint_coords[0][16][0] rightAnkle_yCoord = keypoint_coords[0][16][1] d ={'nose_xCoord': nose_xCoord, 'nose_yCoord': nose_yCoord, 'leftEye_xCoord': leftEye_xCoord, 'leftEye_yCoord': leftEye_yCoord, 'rightEye_xCoord': rightEye_xCoord, 'rightEye_yCoord': rightEye_yCoord, 'leftEar_xCoord': leftEar_xCoord, 'leftEar_yCoord': leftEar_yCoord, 'rightEar_xCoord': rightEar_xCoord, 'rightEar_yCoord': rightEar_yCoord, 'leftShoulder_xCoord': leftShoulder_xCoord, 'leftShoulder_yCoord': leftShoulder_yCoord, 'rightShoulder_xCoord': rightShoulder_xCoord, 'rightShoulder_yCoord': rightShoulder_yCoord, 'leftElbow_xCoord': leftElbow_xCoord, 'leftElbow_yCoord': leftElbow_yCoord, 'rightElbow_xCoord': rightElbow_xCoord, 'rightElbow_yCoord': rightElbow_yCoord, 'leftWrist_xCoord': leftWrist_xCoord, 'leftWrist_yCoord': leftWrist_yCoord, 'rightWrist_xCoord': rightWrist_xCoord, 'rightWrist_yCoord': rightWrist_yCoord, 'leftHip_xCoord': leftHip_xCoord, 'leftHip_yCoord': leftHip_yCoord, 'rightHip_xCoord': rightHip_xCoord, 'rightHip_yCoord': rightHip_yCoord, 'leftKnee_xCoord': leftKnee_xCoord, 'leftKnee_yCoord': leftKnee_yCoord, 'rightKnee_xCoord': rightKnee_xCoord, 'rightKnee_yCoord': rightKnee_yCoord, 'leftAnkle_xCoord': leftAnkle_xCoord, 'leftAnkle_yCoord': leftAnkle_yCoord, 'rightAnkle_xCoord': rightAnkle_xCoord, 'rightAnkle_yCoord': rightAnkle_yCoord} dummy_frame = dummy_frame.append(pd.DataFrame(data = d , index = [frame_count])) #dummy_frame = pd.DataFrame(data = d, columns = col, index = [frame_count]) #print(dummy_frame.shape) if (((frame_count+1) % 30 == 0) and (frame_count+1 >= 120)): #sc=StandardScaler() #X = np.zeros((120,34)) X = (dummy_frame.values)[(frame_count-119):(frame_count+1)] X = (X-Mean)/Std #print(X) #print(X.shape) X = np.asarray(X).reshape(-1,120,34) #print(X) #print(X.shape) #X = tf.transpose(X, [1,0,2]) #X = tf.reshape(X,[-1, 34]) #print(X.shape) result= model.predict_classes(X) ## predicting from the model prob = model.predict_proba(X) if (result == 0): result = 'Falling' elif (result == 1): result = 'Pushups' elif (result == 2): result = 'Sitting' elif (result == 3): result = 'Walking' else: result = 'Error' #st.write(prob) cv2.putText(overlay_image, 'Action: '+result, org, font, fontScale, color, thickness, cv2.LINE_AA, False) frameST.image(overlay_image, channels="BGR") frame_count += 1 if cv2.waitKey(1) & 0xFF == ord('q'): frameST = st.empty() cap.release() cv2.destroyAllWindows() break if choice == 'upload video': if frame_count == int(cap.get(cv2.CAP_PROP_FRAME_COUNT)):# total frames considered are (video_length_sec)*(frames/sec) frameST = st.empty() cap.release() cv2.destroyAllWindows() break
def main(): with tf.Session() as sess: model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] cap = cv2.VideoCapture("data/Dabadaba/Cam_1/2_1.mp4") frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) cap.set(3, args.cam_width) cap.set(4, args.cam_height) len_video = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_movie = cv2.VideoWriter('data/daba_out/pose_detection_2_1.avi', fourcc, 29, (frame_width, frame_height)) start = time.time() frame_count = 0 while True: if frame_count == len_video - 1: break res, img = cap.read() if not res: break input_image, display_image, output_scale = posenet.process_input( img, scale_factor=args.scale_factor, output_stride=output_stride) #input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=50, min_pose_score=0.15) keypoint_coords *= output_scale Keypoints_face_coords = [] Keypoints_face_scores = [] for d in keypoint_coords: Keypoints_face_coords.append(d[0:5]) for d in keypoint_scores: Keypoints_face_scores.append(d[0:5]) Keypoints_eyes_coords = [] Keypoints_eyes_scores = [] for d in keypoint_coords: Keypoints_eyes_coords.append(d[1:3]) for d in keypoint_scores: Keypoints_eyes_scores.append(d[1:3]) Keypoints_half_eyes_coords = [] Keypoints_half_eyes_scores = [] for d in Keypoints_eyes_coords: Keypoints_half_eyes_coords.append(np.mean(d, axis=0)) for d in Keypoints_eyes_scores: Keypoints_half_eyes_scores.append(np.mean(d, axis=0)) Keypoints_angle_coords = [] Keypoints_angle_scores = [] angle = 0 for d in Keypoints_eyes_coords: angle = math.atan2(d[1][1] - d[0][1], d[1][0] - d[0][0]) Keypoints_angle_coords.append(angle) for d in Keypoints_eyes_scores: Keypoints_angle_scores.append(np.mean(d, axis=0)) Keypoints_direction = [] angle_2 = 0 for ii in range(0, len(keypoint_coords) - 1): if Keypoints_eyes_coords[ii][0][ 0] == 0 and Keypoints_eyes_coords[ii][0][ 1] == 0 and Keypoints_eyes_coords[ii][1][ 0] == 0 and Keypoints_eyes_coords[ii][1][ 1] == 0: Keypoints_direction.append("no hay ojos") #rectoº continue elif Keypoints_eyes_coords[ii][0][ 0] == 0 and Keypoints_eyes_coords[ii][0][1] == 0: Keypoints_direction.append( "ojo izquierdo (Derecha)") #rectoº continue elif Keypoints_eyes_coords[ii][1][ 0] == 0 and Keypoints_eyes_coords[ii][1][1] == 0: Keypoints_direction.append( "ojo derecho (Izquierda)") #rectoº continue if Keypoints_face_coords[ii][0][ 1] == 0 and Keypoints_face_coords[ii][0][ 0] == 0 and Keypoints_half_eyes_coords[ii][ 0] == 0 and Keypoints_half_eyes_coords[ii][ 1] == 0: Keypoints_direction.append("fallo") #rectoº continue angle_2 = math.atan2( Keypoints_face_coords[ii][0][1] - Keypoints_half_eyes_coords[ii][1], Keypoints_face_coords[ii][0][0] - Keypoints_half_eyes_coords[ii][0]) if abs(angle_2) < 0.261799: Keypoints_direction.append("recto") #rectoº elif angle_2 > 0.261799: Keypoints_direction.append("derecha") #derecha elif angle_2 < -0.261799: Keypoints_direction.append("izquierda") #izq Keypoints_att_show_coords = [] Keypoints_att_show_scores = [] a = [] b = [] for ii in range(0, len(keypoint_coords) - 1): a.append(Keypoints_face_coords[ii][1]) a.append(Keypoints_face_coords[ii][2]) a.append(Keypoints_face_coords[ii][0]) a.append(Keypoints_half_eyes_coords[ii]) Keypoints_att_show_coords.append(np.asarray(a)) for ii in range(0, len(keypoint_coords) - 1): b.append(Keypoints_face_scores[ii][1]) b.append(Keypoints_face_scores[ii][2]) b.append(Keypoints_face_scores[ii][0]) b.append(Keypoints_half_eyes_scores[ii]) Keypoints_att_show_scores.append(np.asarray(b)) Keypoints_right_arm_coords = [] Keypoints_right_arm_scores = [] a = [] b = [] for d in keypoint_coords: a.append(d[6]) a.append(d[8]) a.append(d[10]) Keypoints_right_arm_coords.append(np.asarray(a)) for d in keypoint_scores: b.append(d[6]) b.append(d[8]) b.append(d[10]) Keypoints_right_arm_scores.append(np.asarray(b)) Keypoints_left_arm_coords = [] Keypoints_left_arm_scores = [] a = [] b = [] for d in keypoint_coords: a.append(d[5]) a.append(d[7]) a.append(d[9]) Keypoints_left_arm_coords.append(np.asarray(a)) for d in keypoint_scores: b.append(d[5]) b.append(d[7]) b.append(d[9]) Keypoints_left_arm_scores.append(np.asarray(b)) Keypoints_chest_coords = [] Keypoints_chest_scores = [] a = [] b = [] for d in keypoint_coords: a.append(d[5]) a.append(d[6]) a.append(d[11]) a.append(d[12]) Keypoints_chest_coords.append(np.asarray(a)) for d in keypoint_scores: b.append(d[5]) b.append(d[6]) b.append(d[11]) b.append(d[12]) Keypoints_chest_scores.append(np.asarray(b)) mean_points_face_coords = [] mean_points_face_scores = [] mean_points_chest_coords = [] mean_points_chest_scores = [] mean_points_right_arm_coords = [] mean_points_right_arm_scores = [] mean_points_left_arm_coords = [] mean_points_left_arm_scores = [] for d in Keypoints_face_coords: mean_points_face_coords.append(np.mean(d, axis=0)) for d in Keypoints_face_scores: mean_points_face_scores.append(np.mean(d)) for d in Keypoints_chest_coords: mean_points_chest_coords.append(np.mean(d, axis=0)) for d in Keypoints_chest_scores: mean_points_chest_scores.append(np.mean(d)) for d in Keypoints_right_arm_coords: mean_points_right_arm_coords.append(np.mean(d, axis=0)) for d in Keypoints_right_arm_scores: mean_points_right_arm_scores.append(np.mean(d)) for d in Keypoints_left_arm_coords: mean_points_left_arm_coords.append(np.mean(d, axis=0)) for d in Keypoints_left_arm_scores: mean_points_left_arm_scores.append(np.mean(d)) total_means_coords = [] means_prov_coords = [] for ii, d in enumerate(pose_scores): means_prov_coords.append(mean_points_face_coords[ii]) #means_prov_coords.append(mean_points_chest_coords[ii]) #means_prov_coords.append(mean_points_right_arm_coords[ii]) #means_prov_coords.append(mean_points_left_arm_coords[ii]) total_means_coords.append(np.asarray(means_prov_coords)) total_means_scores = [] means_prov_scores = [] for ii, d in enumerate(pose_scores): means_prov_scores.append(mean_points_face_scores[ii]) #means_prov_scores.append(mean_points_chest_scores[ii]) #means_prov_scores.append(mean_points_right_arm_scores[ii]) #means_prov_scores.append(mean_points_left_arm_scores[ii]) total_means_scores.append(np.asarray(means_prov_scores)) Keypoints_face_scores = np.asarray(Keypoints_face_scores) Keypoints_face_coords = np.asarray(Keypoints_face_coords) Keypoints_right_arm_coords = np.asarray(Keypoints_right_arm_coords) Keypoints_right_arm_scores = np.asarray(Keypoints_right_arm_scores) Keypoints_left_arm_coords = np.asarray(Keypoints_left_arm_coords) Keypoints_left_arm_scores = np.asarray(Keypoints_left_arm_scores) total_means_coords = np.asarray(total_means_coords) total_means_scores = np.asarray(total_means_scores) Keypoints_att_show_coords = np.asarray(Keypoints_att_show_coords) Keypoints_att_show_scores = np.asarray(Keypoints_att_show_scores) overlay_image_keypoints = posenet.draw_keypoints( display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.15) overlay_image = posenet.draw_skel_and_kp(display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.15) overlay_image_face = posenet.draw_face(display_image, pose_scores, Keypoints_face_scores, Keypoints_face_coords, min_pose_score=0.15, min_part_score=0.15) overlay_image_right_arm = posenet.draw_arm_right( display_image, pose_scores, Keypoints_right_arm_scores, Keypoints_right_arm_coords, min_pose_score=0.15, min_part_score=0.15) overlay_image_left_arm = posenet.draw_arm_left( display_image, pose_scores, Keypoints_left_arm_scores, Keypoints_left_arm_coords, min_pose_score=0.15, min_part_score=0.15) overlay_image_chest = posenet.draw_chest(display_image, pose_scores, Keypoints_chest_scores, Keypoints_chest_coords, min_pose_score=0.15, min_part_score=0.15) overlay_image_means = posenet.draw_means(display_image, pose_scores, total_means_scores, total_means_coords, min_pose_score=0.15, min_part_score=0.15) overlay_image_att = posenet.draw_att(display_image, pose_scores, Keypoints_att_show_scores, Keypoints_att_show_coords, min_pose_score=0.15, min_part_score=0.15) overlay_image_skeleton = posenet.draw_skeleton(display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.15) #cv2.imshow('posenet_body', overlay_image) #cv2.imshow('posenet_face', overlay_image_face) #cv2.imshow('posenet_keypoints', overlay_image_keypoints) #cv2.imshow('posenet_skeleton', overlay_image_skeleton) #cv2.imshow('posenet_right_arm', overlay_image_right_arm) #cv2.imshow('posenet_left_arm', overlay_image_left_arm) #cv2.imshow('posenet_means', overlay_image_means) #cv2.imshow('posenet_chest', overlay_image_chest) cv2.imshow('posenet_att', overlay_image_att) #output_movie.write(overlay_image) frame_count += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break print('Average FPS: ', frame_count / (time.time() - start)) print('time: ', (time.time() - start))
def getpoints(image_input, flag, model_black_image): with tf.Session() as sess: model_cfg, model_outputs = posenet.load_model(101, sess) output_stride = model_cfg['output_stride'] pos_temp_data = [] sum = 0 input_image, draw_image, output_scale = posenet.read_imgfile( image_input, scale_factor=1.0, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=1, min_pose_score=0.1) keypoint_coords *= output_scale draw_image = posenet.draw_skel_and_kp(flag, draw_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.1, min_part_score=0.0001) black_image = numpy.zeros( (draw_image.shape[0], draw_image.shape[1], 3), dtype='uint8') if flag == 1: black_image = posenet.draw_skel_and_kp(flag, black_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.1, min_part_score=0.0001) if flag == 0: black_image = posenet.draw_skel_and_kp(flag, model_black_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.1, min_part_score=0.0001) for pi in range(len(pose_scores)): if pose_scores[pi] == 0.: break for ki, (s, c) in enumerate( zip(keypoint_scores[pi, :], keypoint_coords[pi, :, :])): pos_temp_data.append(c[1]) pos_temp_data.append(c[0]) for ki, (s, c) in enumerate( zip(keypoint_scores[pi, :], keypoint_coords[pi, :, :])): pos_temp_data.append(s) sum = sum + s pos_temp_data.append(sum) return pos_temp_data, draw_image, black_image
def main(): sg.theme('DarkBrown') args.file = sg.popup_get_file('Filename to play') args.out_data = sg.popup_get_file('Filename to save') if args.file is None: args.file = 'C:/Users/knum/Documents/Isfan Works/Train Data/Output data/pose1.mp4' if args.out_data is None: args.out_data = 'coba_gui.csv' # define the window layout layout = [[ sg.Text('K-Numbers Motion Tracking Web Application', size=(60, 1), justification='Right', font='Helvetica 20'), ], [ sg.Text('Web-Camera', size=(30, 1), justification='left', font='Helvetica 20'), sg.Text('Trainer Motion', size=(50, 1), justification='center', font='Helvetica 20') ], [ sg.Image(filename='', key='image'), sg.Image(filename='', key='image_vid') ], [ sg.Text('None', key='motion', size=(30, 1), justification='center', font='Helvetica 20') ], [ sg.Button('Record', size=(10, 1), font='Helvetica 14'), sg.Button('Stop', size=(10, 1), font='Any 14'), sg.Button('Exit', size=(10, 1), font='Helvetica 14') ]] # create the window and show it without the plot window = sg.Window('Demo Application - OpenCV Integration', layout, location=(200, 150)) # ---===--- Event LOOP Read and display frames, operate the GUI --- # cap = cv2.VideoCapture(args.cam_id) recording = False while True: event, values = window.read(timeout=20) ret, frame = cap.read() imgbytes = cv2.imencode('.png', frame)[1].tobytes() # ditto window['image'].update(data=imgbytes) if event == 'Exit' or event == sg.WIN_CLOSED: return elif event == 'Record': recording = True cap = cv2.VideoCapture(args.cam_id) cap_vid = cv2.VideoCapture(args.file) loop = True cap.set(3, args.cam_width) cap.set(4, args.cam_height) cap_vid.set(3, args.cam_width) cap_vid.set(4, args.cam_height) start = time.time() frame_count = 0 scores = np.empty((0, 2), dtype=np.float32) coor = np.empty((0, 2), dtype=np.float32) label = np.empty((0, 1), dtype=np.str) frame = np.empty((0, 1), dtype=np.int) keypoint = 17 if recording: # Reset the whole tensorflow graph tf.reset_default_graph() tf.compat.v1.global_variables_initializer() with tf.compat.v1.Session() as sess: with open('./model/' + 'yoga_net.json', 'r') as arch_file: loaded_model = model_from_json(arch_file.read()) # load weights into new model loaded_model.load_weights(TRAINED_MODEL_NAME) print("Loaded model from disk") model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] while loop: input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.15) keypoint_coords *= output_scale # TODO this isn't particularly fast, use GL for drawing and display someday... overlay_image = posenet.draw_skel_and_kp( display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) imS = cv2.resize(overlay_image, (args.cam_width, args.cam_height)) # cv2.imshow('posenet', imS) imgbytes = cv2.imencode('.png', imS)[1].tobytes() # ditto window['image'].update(data=imgbytes) ret, frame_vid = cap_vid.read() frame_out = cv2.resize(frame_vid, (args.cam_width, args.cam_height)) imgbytes = cv2.imencode('.png', frame_out)[1].tobytes() # ditto window['image_vid'].update(data=imgbytes) # print(frame_count) if cv2.waitKey(1) & 0xFF == ord('q'): break for pi in range(len(pose_scores)): if pose_scores[pi] <= 0.5: break print('Pose #%d, score = %f' % (pi, pose_scores[pi])) input_rt = np.empty((0, 2)) for ki, (s, c) in enumerate( zip( keypoint_scores[pi, 0:keypoint], keypoint_coords[pi, 0:keypoint, 0:keypoint])): # print('Keypoint %s, score = %f, coord = [%d, %d]' % (posenet.PART_NAMES[ki], s, c[0], c[1])) '''For Storing''' frame = np.vstack((frame, frame_count)) scores = np.vstack( (scores, np.array([pose_scores[pi], s]))) coor = np.vstack((coor, np.array([c[0], c[1]]))) label = np.vstack((label, "none")) input_rt = np.vstack(( input_rt, np.array([ # s, c[0], c[1] ]))) tempNorm = joblib.load(scaler_file) # normalizer = preprocessing.StandardScaler() # tempNorm = normalizer.fit(input_rt.reshape(1,-1)) # print(input_rt.reshape(1,-1).shape) input_rt = tempNorm.transform(input_rt.reshape(1, -1)) # print(input_rt.shape) pred = loaded_model.predict(input_rt.reshape(1, -1)) val = np.argmax(pred[0], axis=0) if (val == 1): motion = "standing" elif (val == 2): motion = "starting" elif (val == 3): motion = "right" elif (val == 4): motion = "left" else: motion = "unknown" print(motion) window['motion'].update(motion) frame_count += 1 event, values = window.read(timeout=2) if event == 'Stop': loop = False recording = False '''Save For Video Recorder''' raw_data = np.column_stack( [frame, scores, coor, label]) write_csv(raw_data, args.out_data) '''send to server''' db_config.send_package(args.out_data, write=True, delete=False) print('Average FPS: ', frame_count / (time.time() - start)) cap.release() cv2.destroyAllWindows() cap = cv2.VideoCapture(args.cam_id)
def main(): with tf.Session() as sess: # Load the models model_cfg, model_outputs = posenet.load_model(args.model, sess) output_stride = model_cfg['output_stride'] if args.file is not None: # Frame source, speicifed file or the specified(or default) live cam cap = cv2.VideoCapture(args.file) else: cap = cv2.VideoCapture(args.cam_id) cap.set(3, args.cam_width) cap.set(4, args.cam_height) previous_pose = '' # '' denotes it is empty, really fast checking! count = 0 # Stores the count of repetitions # A flag denoting change in state. 0 -> previous state is continuing, 1 -> state has changed flag = -1 # Novel string stores a pair of bits for each of the 12 joints denoting whether the joint is moving up or down # when plotted in a graph against time, 1 denotes upward and 0 denotes downward curving of the graph. It is initialised # as '22' so that current_state wont ever be equal to the string we generate unless there is no movement out of tolerance current_state = [2, 2] while True: # Get a frame, and get the model's prediction input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride) heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( model_outputs, feed_dict={'image:0': input_image}) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.4) keypoint_coords *= output_scale # Normalising the output against the resolution if ( isinstance(previous_pose, str) ): # if previous_pose was not inialised, assign the current keypoints to it previous_pose = keypoint_coords text, previous_pose, current_state, flag = countRepetition( previous_pose, keypoint_coords, current_state, flag) if (flag == 1): count += 1 flag = -1 image = posenet.draw_skel_and_kp(display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.4, min_part_score=0.1) # OpenCV does not recognise the use of \n delimeter y0, dy = 20, 20 for i, line in enumerate(text.split('\n')): y = y0 + i * dy image = cv2.putText(image, line, (10, y), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255), 1) cal_burn = count * 2 text = (f"count:{count} \n cal_burn:{cal_burn}") image = cv2.putText(image, text, (10, y + 20), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 0, 0), 2) cv2.imshow('RepCounter', image) ch = cv2.waitKey(1) if (ch == ord('q') or ch == ord('Q')): break # Exit the loop on press of q or Q elif (ch == ord('r') or ch == ord('R')): count = 0 cap.release() cv2.destroyAllWindows()
def pose(): model = posenet.load_model(args.model) model = model.cuda() output_stride = model.output_stride cap = cv2.VideoCapture(args.cam_id) cap.set(3, args.cam_width) cap.set(4, args.cam_height) start = time.time() frame_count = 0 while True: input_image, display_image, output_scale = posenet.read_cap( cap, scale_factor=args.scale_factor, output_stride=output_stride) with torch.no_grad(): input_image = torch.Tensor(input_image).cuda() heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = model( input_image) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( heatmaps_result.squeeze(0), offsets_result.squeeze(0), displacement_fwd_result.squeeze(0), displacement_bwd_result.squeeze(0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.15) keypoint_coords *= output_scale overlay_image = posenet.draw_skel_and_kp(display_image, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.15, min_part_score=0.1) cv2.imshow('posenet', overlay_image) frame_count += 1 coords = keypoint_coords[0] pose_dictionary['nose'] = coords[0] pose_dictionary['leftEye'] = coords[1] pose_dictionary['rightEye'] = coords[2] pose_dictionary['leftEar'] = coords[3] pose_dictionary['rightEar'] = coords[4] pose_dictionary['leftShoulder'] = coords[5] pose_dictionary['rightShoulder'] = coords[6] pose_dictionary['leftElbow'] = coords[7] pose_dictionary['rightElbow'] = coords[8] pose_dictionary['leftWrist'] = coords[9] pose_dictionary['rightWrist'] = coords[10] pose_dictionary['leftHip'] = coords[11] pose_dictionary['rightHip'] = coords[12] pose_dictionary['leftKnee'] = coords[13] pose_dictionary['rightKnee'] = coords[14] pose_dictionary['leftAnkle'] = coords[15] pose_dictionary['rightAnkle'] = coords[16] # human scale rightEye = pose_dictionary['rightEye'] leftEye = pose_dictionary['leftEye'] human_scale = 6.4 / ( rightEye[1] - leftEye[1] ) # multiply this to any pixel diff --> gives back centimeters # the whole camera integration goes here to add new points to the beginning of the context points list rightWrist = pose_dictionary['rightWrist'] leftWrist = pose_dictionary['leftWrist'] print("rightWrist", rightWrist * human_scale, "leftWrist", leftWrist * human_scale) print("dif", (rightWrist - leftWrist) * human_scale) #print("wristDistanceY", wristDistanceY) if (rightWrist[0] != 0 and rightWrist[1] != 0 and leftWrist[0] != 0 and leftWrist[1] != 0): wristDistanceX = rightWrist[1] - leftWrist[1] wristDistanceY = rightWrist[0] - leftWrist[0] sin_a = ((wristDistanceX - 200) / (500 - 200)) * 3 sin_b = ((wristDistanceY - 160) / (680 - 160)) * 3 #print (pose_dictionary['nose']) #print("wristDistanceX", wristDistanceX) #print("wristDistanceY", wristDistanceY) if cv2.waitKey(1) & 0xFF == ord('q'): break