def Image_camera(num=0): #1はUSBカメラ、0は内蔵カメラ cap = cv2.VideoCapture(num) code = "" zdeg = {} while (True): # Capture frame-by-frame ret, frame = cap.read() #cv2.StartWindowThread() # Display the resulting frame cv2.imshow('frame', frame) #Escで抜ける key = cv2.waitKey(1) & 0xFF #if(isBasis): if (key == ord('f')): th = threading.Thread(target=basis, name='th', args=(np.array(frame, dtype=np.float32), )) th.setDaemon(True) th.start() #緊急用 if (key == ord('q')): break if (key == ord('s')): img = np.array(frame, dtype=np.float32) pose_arr = model(img) pose_arr = pose_arr[0] result_img = draw_person_pose(img, pose_arr) pose_arr = pose_arr[0] if (isKeypoint(pose_arr)): if (isSide(pose_arr, armave, legave)): zdeg = zaxis(armave, legave, pose_arr) code = CreateSide(zdeg) else: code = paint(pose_arr) break cap.release() cv2.waitKey(1) cv2.destroyAllWindows() return code
def Image_path(path): #1はUSBカメラ、0は内蔵カメラ code = "" zdeg = {} frame = cv2.imread(path) img = np.array(frame, dtype=np.float32) pose_arr = model(img) pose_arr = pose_arr[0] result_img = draw_person_pose(img, pose_arr) pose_arr = pose_arr[0] if (isKeypoint(pose_arr)): if (isSide(pose_arr, armave, legave)): zdeg = zaxis(armave, legave, pose_arr) code = CreateSide(zdeg) else: code = paint(pose_arr) return code
def convertData(gesture): parser = argparse.ArgumentParser(description='Pose detector') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() # load model pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu) hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu) dataset = buildGestureDict("dataset/") gesturedf = pd.read_csv("sample.csv") for video in dataset[gesture]["videos"]: print("Currently processing the video for " + video["filename"]) startvideo = time.time() cap = cv2.VideoCapture(video["filepath"]) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) amount_of_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) print("Amount of Frames:", amount_of_frames) cap.set(cv2.CAP_PROP_FPS, 5) ret, img = cap.read() counter = 1 df = pd.DataFrame(columns=["Head", "Left", "Right"]) frame_tracker = int(amount_of_frames / 12) framecounter = 0 #print(frame_tracker) left = 0 right = 0 while ret: ret, img = cap.read() # get video frame if not ret: print("Failed to capture image") break person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) if (counter % frame_tracker == 0): for person_pose in person_pose_array: firstPerson = True if not firstPerson: continue unit_length = pose_detector.get_unit_length(person_pose) # hands estimation # print("Estimating hands keypoints...") hands = pose_detector.crop_hands(img, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") for x in range(len(hand_keypoints)): if (hand_keypoints[x] != None): hand_keypoints[x] = list( np.delete(hand_keypoints[x], 2)) hand_keypoints[x] = [ int(y) for y in hand_keypoints[x] ] res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) left = hand_keypoints cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) else: left = [[1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000]] if hands["right"] is not None: hand_img = hands["right"]["img"] bbox = hands["right"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="right") for x in range(len(hand_keypoints)): if (hand_keypoints[x] != None): hand_keypoints[x] = list( np.delete(hand_keypoints[x], 2)) hand_keypoints[x] = [ int(y) for y in hand_keypoints[x] ] res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) right = hand_keypoints cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) else: right = [[1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000], [1000, 1000]] print("Body Pose") person_pose = np.delete(person_pose, 9, 0) person_pose = np.delete(person_pose, 9, 0) person_pose = np.delete(person_pose, 10, 0) person_pose = np.delete(person_pose, 10, 0) person_pose = person_pose.tolist() for z in range(len(person_pose)): if (person_pose[z] != None): person_pose[z] = list(np.delete(person_pose[z], 2)) person_pose[z] = [int(a) for a in person_pose[z]] print(person_pose) print("Left") print(left) print("Right") print(right) cv2.imshow("result", res_img) head = person_pose for x in range(len(head)): if (head[x] == None): head[x] = [1000, 1000] pca = sklearnPCA(n_components=1) head = pca.fit_transform(head) dfhead = pd.DataFrame(data=head) dfhead = dfhead.T dfhead = dfhead.rename( columns={ 0: "head_1", 1: "head_2", 2: "head_3", 3: "head_4", 4: "head_5", 5: "head_6", 6: "head_7", 7: "head_8", 8: "head_9", 9: "head_10", 10: "head_11", 11: "head_12", 12: "head_13", 13: "head_14" }) for x in range(len(left)): if (left[x] == None): left[x] = [1000, 1000] pca = sklearnPCA(n_components=1) left = pca.fit_transform(left) dfleft = pd.DataFrame(data=left) dfleft = dfleft.T dfleft = dfleft.rename( columns={ 0: "left_1", 1: "left_2", 2: "left_3", 3: "left_4", 4: "left_5", 5: "left_6", 6: "left_7", 7: "left_8", 8: "left_9", 9: "left_10", 10: "left_11", 11: "left_12", 12: "left_13", 13: "left_14", 14: "left_15", 15: "left_16", 16: "left_17", 17: "left_18", 18: "left_19", 19: "left_20", 20: "left_21" }) for x in range(len(right)): if (right[x] == None): right[x] = [1000, 1000] pca = sklearnPCA(n_components=1) right = pca.fit_transform(right) dfright = pd.DataFrame(data=right) dfright = dfright.T dfright = dfright.rename( columns={ 0: "right_1", 1: "right_2", 2: "right_3", 3: "right_4", 4: "right_5", 5: "right_6", 6: "right_7", 7: "right_8", 8: "right_9", 9: "right_10", 10: "right_11", 11: "right_12", 12: "right_13", 13: "right_14", 14: "right_15", 15: "right_16", 16: "right_17", 17: "right_18", 18: "right_19", 19: "right_20", 20: "right_21" }) df2 = pd.concat([dfhead, dfleft, dfright], axis=1) df2["frame"] = framecounter df2["gesture"] = video["gesture"] df2["speaker"] = video["actor"] framecounter = framecounter + 1 df2["frame"] = df2["frame"].astype(int) newdf = newdf.append(df2, sort=False) gesturedf = gesturedf.append(df2, sort=False) firstPerson = False else: cv2.imshow("result", img) counter = counter + 1 #print("Frame",counter) if cv2.waitKey(1) & 0xFF == ord('q'): break #print(df) cap.release() cv2.destroyAllWindows() gesturedf.to_csv("dataset720new/" + gesture + ".csv", index=False) print("Done Recording for: " + gesture) print("Took " + str(time.time() - startvideo) + "seconds")
th = threading.Thread(target=basis, name='th', args=(np.array(frame, dtype=np.float32), )) th.setDaemon(True) th.start() #緊急用 if (key == ord('q')): break if (key == ord('s')): print("start") img = np.array(frame, dtype=np.float32) pose_arr = model(img) pose_arr = pose_arr[0] result_img = draw_person_pose(img, pose_arr) plt.figure(figsize=(6, 6)) plt.imshow(255 - result_img[:, :, ::-1]) pose_arr = pose_arr[0] if (isKeypoint(pose_arr)): while (basisT == False): counting = counting + 1 if (isSide(pose_arr, armave, legave)): zdeg = zaxis(armave, legave, pose_arr) code = CreateSide(zdeg) else: code = paint(pose_arr) else: code = "SAY 'キーポイントが足りません' 5" break
device=args.gpu) hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu) face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu) # read image img = cv2.imread(args.img) # inference print("Estimating pose...") person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) # each person detected for person_pose in person_pose_array: unit_length = pose_detector.get_unit_length(person_pose) # face estimation print("Estimating face keypoints...") cropped_face_img, bbox = pose_detector.crop_face( img, person_pose, unit_length) if cropped_face_img is not None: face_keypoints = face_detector(cropped_face_img) res_img = draw_face_keypoints(res_img, face_keypoints, (bbox[0], bbox[1])) cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)
cap2 = cv2.VideoCapture(1) #VideoReader(args.video) idx = 0 f = open("C:/Users/Boombada/Desktop/myPy/inputData.txt", 'a') f1 = open("C:/Users/Boombada/Desktop/myPy/Label.txt", 'a') index = 0 # for img in video_provider: while (cap.isOpened() & cap2.isOpened()): ret1, img = cap.read() ret2, img2 = cap2.read() poses, _ = pose_detector(img) poses2, _ = pose_detector(img2) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, poses), 0.4, 0) res_img2 = cv2.addWeighted(img2, 0.6, draw_person_pose(img2, poses2), 0.4, 0) #logger.debug("type: {}".format(type(poses))) #logger.debug("shape: {}".format(poses.shape)) logger.debug("A") logger.debug(poses) logger.debug(poses2) # cv2.imshow(file_body_name + '_result', res_img) if (ret1 & ret2): cv2.imshow('video', res_img) cv2.imshow('video2', res_img2)
# load model pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu) hand_detector = HandDetector("handnet", "models/handnet.npz", device=args.gpu) cap = cv2.VideoCapture('sign language/sample.mp4') cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) cap.set(cv2.CAP_PROP_FPS, 4) counter=0 while(cap.isOpened()): ret, img = cap.read() if not ret: print("Failed to capture image") break person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) # each person detected for person_pose in person_pose_array: unit_length = pose_detector.get_unit_length(person_pose) #print(person_pose) # hands estimation hands = pose_detector.crop_hands(img, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) #print("Left") #print(hand_keypoints) cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) while True: # get video frame ret, img = cap.read() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if not ret: print("Failed to capture image") break person_pose_array, _ = pose_detector(img) data = draw_person_pose(img, person_pose_array) res_img = cv2.addWeighted(img, 0.6, data, 0.4, 0) frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) #cv2.imshow("result", res_img) out = cv2.VideoWriter('outpy1.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height)) while (True): ret, frame = cap.read() if ret == True: # Write the frame into the file 'output.avi'
def main(cap, im_scale=2, view_results=False): debug_i = 0 fps_timer_arr = [0] * 16 fps = 0 # load model pose_device = 0 pose_model_dir = '../../Chainer_Realtime_Multi-Person_Pose_Estimation/models' pose_detector = PoseDetector("posenet", f"{pose_model_dir}/coco_posenet.npz", device=pose_device) hand_detector = HandDetector("handnet", f"{pose_model_dir}/handnet.npz", device=pose_device) # cv2.namedWindow('display', flags=(cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)) if view_results: cv2.namedWindow('display') video_label_file = VideoLabelFile(cap.video_fname, fname_add='pre_points_pose') labels_current = defaultdict(lambda: []) labels_all_previous = video_label_file.load_previous() im_input = cap.read() im_input_shape = im_input.shape[0:2] first_run = True while (not cap.eof): fps_time_begin = time.perf_counter() debug_i += 1 im_input = cap.read() current_frame_id = cap.frame_idx() # print(cap.info()) im_pose = cv2.resize(im_input, (round(im_input_shape[1] / im_scale), round(im_input_shape[0] / im_scale))) if first_run: print( f"Video size {im_input.shape} -> Model input size {im_pose.shape}" ) first_run = False ########################################## person_pose_array, _ = pose_detector(im_pose) im_display = cv2.addWeighted( im_pose, 0.6, draw_person_pose(im_pose, person_pose_array), 0.4, 0) for person_pose in person_pose_array: unit_length = pose_detector.get_unit_length(person_pose) # arr = np.array([a for a in person_pose if a is not None]) # if arr.any(): # arr[:, 0:2] *= im_scale # labels_current[current_frame_id].append(['pre_person_pose', arr.tolist()]) # hands estimation hands = pose_detector.crop_hands(im_pose, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") im_display = draw_hand_keypoints(im_display, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(im_display, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) if hand_keypoints[5] and hand_keypoints[8]: f_points = np.array( [hand_keypoints[5][:2], hand_keypoints[8][:2]]) f_points = (f_points + np.array([bbox[0], bbox[1]])) * im_scale #f_points = tuple(map(tuple, f_points.astype(int))) f_points = f_points.astype(int).tolist() labels_current[current_frame_id].append(f_points) if hands["right"] is not None: hand_img = hands["right"]["img"] bbox = hands["right"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="right") im_display = draw_hand_keypoints(im_display, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(im_display, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) if hand_keypoints[5] and hand_keypoints[8]: f_points = np.array( [hand_keypoints[5][:2], hand_keypoints[8][:2]]) f_points = (f_points + np.array([bbox[0], bbox[1]])) * im_scale #f_points = tuple(map(tuple, f_points.astype(int))) f_points = f_points.astype(int).tolist() labels_current[current_frame_id].append(f_points) ############################################# for l in labels_current[current_frame_id]: cv2.circle(im_display, (round(l[0][0] / im_scale), round(l[0][1] / im_scale)), 10, (255, 0, 0), 2) cv2.circle(im_display, (round(l[1][0] / im_scale), round(l[1][1] / im_scale)), 10, (0, 255, 0), 2) cv2.putText(im_display, f"frame {int(current_frame_id)}, fps: {int(fps)}.", (10, im_display.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2) if view_results: #cv2.imshow('display', im_display) cv2.imshow('display', im_pose) else: print(".", end="") sys.stdout.flush() # labels_current[current_frame_id].append ############################################# ## KEYBOARD k = cv2.waitKey(5) if k == 27: # esc break elif k == ord('c'): import ipdb ipdb.set_trace() # ipdb.set_trace() # pdb.set_trace() fps_timer_arr[debug_i % 16] = time.perf_counter() - fps_time_begin fps = int(len(fps_timer_arr) * 1 / sum(fps_timer_arr)) print(". ") # cap.release() video_label_file.save_current_labels(labels_current, append_previous=False, custom_lists=True) if view_results: cv2.destroyAllWindows()
import argparse import chainer from pose_detector import PoseDetector, draw_person_pose chainer.using_config('enable_backprop', False) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Pose detector') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() # load model pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=args.gpu) cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) while True: # get video frame ret, img = cap.read() if not ret: print("Failed to capture image") break person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) cv2.imshow("result", res_img) cv2.waitKey(1)
def estimate_pose(img_path, gpu = -1): # parser = argparse.ArgumentParser(description='Pose detector') # parser.add_argument('--img', help='image file path') # parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') # args = parser.parse_args() # load model print("Loading pose detection model...") pose_detector = PoseDetector("posenet", "models/coco_posenet.npz", device=gpu) print("Loading hand detection model...") hand_detector = HandDetector("handnet", "models/handnet.npz", device=gpu) # face_detector = FaceDetector("facenet", "models/facenet.npz", device=args.gpu) # read image img = cv2.imread(img_path) # inference print("Estimating pose...") person_pose_array, _ = pose_detector(img) res_img = cv2.addWeighted(img, 0.6, draw_person_pose(img, person_pose_array), 0.4, 0) # will cause the loop below to perform only at most 1 iteration; which means only 1 person will be recognized has_detected = False # each person detected for person_pose in person_pose_array: if has_detected: continue has_detected = True print("Body:", person_pose) unit_length = pose_detector.get_unit_length(person_pose) # face estimation # print("Estimating face keypoints...") # cropped_face_img, bbox = pose_detector.crop_face(img, person_pose, unit_length) # if cropped_face_img is not None: # face_keypoints = face_detector(cropped_face_img) # res_img = draw_face_keypoints(res_img, face_keypoints, (bbox[0], bbox[1])) # cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) # hands estimation print("Estimating hands keypoints...") hands = pose_detector.crop_hands(img, person_pose, unit_length) if hands["left"] is not None: hand_img = hands["left"]["img"] bbox = hands["left"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="left") print("Left hand: ", print_arr(hand_keypoints)) res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) if hands["right"] is not None: hand_img = hands["right"]["img"] bbox = hands["right"]["bbox"] hand_keypoints = hand_detector(hand_img, hand_type="right") print("Right hand: ", print_arr(hand_keypoints)) res_img = draw_hand_keypoints(res_img, hand_keypoints, (bbox[0], bbox[1])) cv2.rectangle(res_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (255, 255, 255), 1) print('Saving result into result.png...') cv2.imwrite('result.png', res_img)