def get_face(img_queue, box_queue): ''' Get face from image queue. this function is used for multiprocessing. introduce mark_detector to detect landmarks ''' mark_detector = MarkDetector() while True: image = img_queue.get() boxes = mark_detector.extract_cnn_facebox(image) box_queue.put(boxes) # the boxes of faces.
def webcam_main(): print("Camera sensor warming up...") vs = cv2.VideoCapture(0) time.sleep(2.0) mark_detector = MarkDetector() # loop over the frames from the video stream while True: _, frame = vs.read() start = cv2.getTickCount() frame = imutils.resize(frame, width=750, height=750) frame = cv2.flip(frame, 1) faceboxes = mark_detector.extract_cnn_facebox(frame) if faceboxes is not None: for facebox in faceboxes: # Detect landmarks from image of 64X64 with grayscale. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] # cv2.rectangle(frame, (facebox[0], facebox[1]), (facebox[2], facebox[3]), (0, 255, 0), 2) face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY) face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 1) land_start_time = time.time() marks = mark_detector.detect_marks_keras(face_img0) # marks *= 255 marks *= facebox[2] - facebox[0] marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Draw Predicted Landmarks mark_detector.draw_marks(frame, marks, color=(255, 255, 255), thick=2) fps_time = (cv2.getTickCount() - start)/cv2.getTickFrequency() cv2.putText(frame, '%.1ffps'%(1/fps_time) , (frame.shape[1]-65,15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0,255,0)) # show the frame cv2.imshow("Frame", frame) # writer.write(frame) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break # do a bit of cleanup cv2.destroyAllWindows() vs.stop()
def Image_Database_Generator(): mark_detector = MarkDetector() video_capture = cv2.VideoCapture(file) name = input("Enter student id:") directory = os.path.join(facepath, name) if not os.path.exists(facepath): os.makedirs(facepath, exist_ok='True') if not os.path.exists(directory): try: os.makedirs(directory, exist_ok='True') except OSError as e: if e.errno != errno.EEXIST: print('invalid student id or access denied') return number_of_images = 0 MAX_NUMBER_OF_IMAGES = 50 count = 0 count = 0 while number_of_images < MAX_NUMBER_OF_IMAGES: ret, frame = video_capture.read() count += 1 if ret == False: break if count % 5 != 0: continue frame = cv2.flip(frame, 1) height, width, _ = frame.shape frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) original_frame = frame.copy() height, width = frame.shape[:2] shape_predictor = dlib.shape_predictor( "shape_predictor_68_face_landmarks.dat") face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH) facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: # Detect landmarks from image of 128x128. x1 = max(facebox[0] - 0, 0) x2 = min(facebox[2] + 0, width) y1 = max(facebox[1] - 0, 0) y2 = min(facebox[3] + 0, height) face = frame[y1:y2, x1:x2] face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Show preview. if showit: frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0), 2) cv2.imshow("Preview", frame) face = dlib.rectangle(x1, y1, x2, y2) face_aligned = face_aligner.align(original_frame, frame_gray, face) cv2.imwrite( os.path.join(directory, str(name) + '_' + str(number_of_images) + '.jpg'), face_aligned) number_of_images += 1 print(number_of_images) if cv2.waitKey(10) == 27: break print('Thank you') video_capture.release() cv2.destroyAllWindows()
def Camera_Capture(): mark_detector = MarkDetector() name = input("Enter student id:") directory = os.path.join(facepath, name) if not os.path.exists(facepath): os.makedirs(facepath, exist_ok = 'True') if not os.path.exists(directory): try: os.makedirs(directory, exist_ok = 'True') except OSError as e: if e.errno != errno.EEXIST: print('invalid student id or access denied') return poses=['frontal','right','left','up','down'] file=0 cap = cv2.VideoCapture(file) ret, sample_frame = cap.read() i = 0 count = 0 if ret==False: return # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] images_saved_per_pose=0 number_of_images = 0 shape_predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH) while i<5: saveit = False # Read frame, crop it, flip it, suits your needs. ret, frame = cap.read() if ret is False: break if count % 5 !=0: # skip 5 frames count+=1 continue if images_saved_per_pose==IMAGE_PER_POSE: i+=1 images_saved_per_pose=0 # If frame comes from webcam, flip it so it looks like a mirror. if file == 0: frame = cv2.flip(frame, 2) original_frame=frame.copy() frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: # Detect landmarks from image of 128x128. x1=max(facebox[0]-0,0) x2=min(facebox[2]+0,width) y1=max(facebox[1]-0,0) y2=min(facebox[3]+0,height) face = frame[y1: y2,x1:x2] face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # print(steady_pose[0][0]) # if steady_pose[0][0]>0.1: # print('right') # else: # if steady_pose[0][0]<-0.1: # print('left') # if steady_pose[0][1]>0.1: # print('down') # else: # if steady_pose[0][1]<-0.1: # print('up') # print(steady_pose[0]) if i==0: if abs(steady_pose[0][0])<ANGLE_THRESHOLD and abs(steady_pose[0][1])<ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==1: if steady_pose[0][0]>ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==2: if steady_pose[0][0]<-ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==3: if steady_pose[0][1]<-ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==4: if steady_pose[0][1]>ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True # Show preview. if i>=5: print ('Thank you') break frame = cv2.putText(frame,poses[i] +' : '+ str(images_saved_per_pose)+'/'+str(IMAGE_PER_POSE),(10,50),cv2.FONT_HERSHEY_SIMPLEX,2,(255,255,255),1,cv2.LINE_AA) frame = cv2.rectangle(frame, (x1,y1), (x2,y2),(255,255,0),2) cv2.imshow("Preview", frame) if saveit: face = dlib.rectangle(x1, y1, x2, y2) face_aligned = face_aligner.align(original_frame, frame_gray, face) cv2.imwrite(os.path.join(directory, str(name)+'_'+str(number_of_images)+'.jpg'), face_aligned) print(images_saved_per_pose) number_of_images+=1 if cv2.waitKey(10) == 27: break cap.release() cv2.destroyAllWindows()
def Test_Video(): # model=load_model(model_path) with open(model_json, 'r') as json_file: loaded_json = json_file.read() model = model_from_json(loaded_json) model.load_weights(model_weights) trainX = [] ids = [] for filename in os.listdir(embedfolder): if filename.endswith('.npz'): f = filename.split('.npz') foldername = f[0] data = np.load(os.path.join(embedfolder, filename)) trainX.append(data['arr_0']) idc = [foldername] * data['arr_0'].shape[0] ids.append(idc) trainX = np.concatenate(trainX, axis=0) ids = np.concatenate(ids, axis=0) grid = pickle.load(open(classifierfilename, 'rb')) # get target mapping en = TolerantLabelEncoder(ignore_unknown=True) en.fit(ids) video_capture = cv2.VideoCapture(file) count = 0 mark_detector = MarkDetector() while True: ret, frame = video_capture.read() count += 1 if ret == False: break if count % 5 == 0: frame = cv2.flip(frame, 1) height, width = frame.shape[:2] frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: # if len(faces)> 0: x1 = max(facebox[0] - 0, 0) x2 = min(facebox[2] + 0, width) y1 = max(facebox[1] - 0, 0) y2 = min(facebox[3] + 0, height) face = dlib.rectangle(x1, y1, x2, y2) face_aligned = face_aligner.align(frame, frame_gray, face) embedding = model.predict( prewhiten(face_aligned).reshape(-1, FACE_WIDTH, FACE_HEIGHT, 3)) cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) yhat_test = grid.predict(embedding) yhat_prob = grid.predict_proba(embedding) id = np.argmax(yhat_prob) # print(yhat_prob[0][id]) if yhat_prob[0][id] < th: print(yhat_prob[0][id], 'unknown') else: print(yhat_prob[0][id], en.inverse_transform(id)) cv2.imshow('Video', frame) if cv2.waitKey(10) == 27: break video_capture.release() cv2.destroyAllWindows()
def webcam_main(): print("Camera sensor warming up...") cv2.namedWindow('face landmarks', cv2.WINDOW_NORMAL) frame = cv2.imread(IMAGE_PATH) time.sleep(2.0) mark_detector = MarkDetector(current_model, CNN_INPUT_SIZE) if current_model.split(".")[-1] == "pb": run_model = 0 elif current_model.split(".")[-1] == "hdf5" or current_model.split( ".")[-1] == "h5": run_model = 1 else: print("input model format error !") return faceboxes = mark_detector.extract_cnn_facebox(frame) print(faceboxes) if faceboxes is not None: facebox = faceboxes[0] # Detect landmarks from image of 64X64 with grayscale. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] cv2.rectangle(frame, (facebox[0], facebox[1]), (facebox[2], facebox[3]), (0, 255, 0), 2) face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY) face_img0 = face_img.reshape(1, CNN_INPUT_SIZE, CNN_INPUT_SIZE, 1) if run_model == 1: marks = mark_detector.detect_marks_keras(face_img0) else: marks = mark_detector.detect_marks_tensor(face_img0, 'input_2:0', 'output/BiasAdd:0') # marks *= 255 marks *= facebox[2] - facebox[0] marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Draw Predicted Landmarks mark_detector.draw_marks(frame, marks, color=(255, 255, 255), thick=2) # loop over the frames from the video stream while True: start = cv2.getTickCount() # frame = cv2.resize(frame, (750,750)) # frame = cv2.flip(frame, 1) # fps_time = (cv2.getTickCount() - start)/cv2.getTickFrequency() # cv2.putText(frame, '%.1ffps'%(1/fps_time), (frame.shape[1]-65,15), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0,255,0)) # show the frame cv2.imshow("face landmarks", frame) # writer.write(frame) key = cv2.waitKey(1) # if the `q` key was pressed, break from the loop if key == ord("q") or key == 0xFF: break # do a bit of cleanup cv2.destroyAllWindows()
count = 0 import numpy as np frame_got = True import json with open('/private/var/py/AttitudeDetection/head_pose_tensors.json', encoding='utf-8') as f: data = json.loads(f.read()) while frame_got: frame_got, frame = cam.read() print('Frame', frame) facebox = mark_detector.extract_cnn_facebox(frame) print('Facebox', facebox) height, width = frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # cv2.line(frame, (50, 30), (450, 35), (255, 0, 0), thickness=5) face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) # marks = mark_detector.detect_marks(face_img) # print('Marks', marks) for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): print('=' * 30) print(v)
class PoseHeadNode(object): def __init__(self): # subscribe rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb) rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback) # publish self.pub_headpose_info = rospy.Publisher("/headpose_info", GazingInfo, \ queue_size=10) #data self.humans = None self.color_img = None self.init() def init(self): self.detector = RetinaFace('./model/R50', 0, 0, 'net3') self.mark_detector = MarkDetector() self.pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] sample_img = None #if self.color_img is None: # print("None") # time.sleep(0.2) # #height, width, _ = self.color_img.shape self.pose_estimator = PoseEstimator(img_size=(480, 640)) def pose_cb(self, data): if self.color_img is None: return h, w = self.color_img.shape[:2] # ros topic to Person instance humans = [] for p_idx, person in enumerate(data.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) self.humans = humans #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False) def draw_pose(self, img, humans, parts=[4, 7]): if img is None or humans is None: #print("Cannot draw pose on {} image and {} humans".format(img, humans)) return None image_h, image_w, _ = img.shape if parts is None or len(parts) == 0: return TfPoseEstimator.draw_humans(img, humans, imgcopy=False) else: for human in humans: for part in parts: if part in human.body_parts.keys(): body_part = human.body_parts[part] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) img = cv2.circle(img, coord, 2, (0, 255, 0)) return img def color_callback( self, data ): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function #print("Having color image") #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding) #print(cv_image) decoded = np.frombuffer(data.data, np.uint8) img = np.reshape(decoded, (480, 640, 3)) #print("COLOR_CALLBACK", img.shape) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) self.color_img = img def get_top_vertice_human(self, human, img_w, img_h): sorted_id = sorted(human["pose"].body_parts.keys()) body_part = human["pose"].body_parts[sorted_id[0]] return (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) def is_gazing(self, face_direction, human, img): def get_angle_direct_with_peakhand(face_direction, peak): d0, d1 = face_direction peak_direction = (peak[0] - d0[0], peak[1] - d0[1]) face_direct = (d1[0] - d0[0], d1[1] - d0[1]) d_peak_direction = np.sqrt(peak_direction[0]**2 + peak_direction[1]**2) d_face_direct = np.sqrt(face_direct[0]**2 + face_direct[1]**2) return (peak_direction[0]*face_direct[0] + \ peak_direction[1]*face_direct[1])/(d_peak_direction*\ d_face_direct) thresh = 0.1 #0.5 #cos(PI/3) image_h, image_w, _ = img.shape if 4 in human.body_parts.keys(): body_part = human.body_parts[4] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) print("angle left = ", get_angle_direct_with_peakhand(face_direction, coord)) return get_angle_direct_with_peakhand(face_direction, coord) > thresh if 7 in human.body_parts.keys(): body_part = human.body_parts[7] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) print("angle right = ", get_angle_direct_with_peakhand(face_direction, coord)) return get_angle_direct_with_peakhand(face_direction, coord) > thresh return False def get_gazing_status(self, face_directions, face_coords, humans, img): def get_dist_to_humans(human_coords, coord): result = [] for _coord in human_coords: result.append(np.sqrt((coord[0]-_coord[0])**2) + \ (coord[1]-_coord[1])**2) return result image_h, image_w, _ = img.shape gazing_status = [] human_coords = [] for human in humans: sorted_id = sorted(human.body_parts.keys()) body_part = human.body_parts[sorted_id[0]] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) human_coords.append(coord) for fdirection, fcoord in zip(face_directions, face_coords): dists = get_dist_to_humans(human_coords, fcoord) idx = np.argmin(dists) gazing_status.append(self.is_gazing(fdirection, humans[idx], img)) del human_coords[idx] return gazing_status def publish_headpose_info(self, viz=True): thresh = 0.8 time.sleep(0.1) if self.color_img is None or self.humans is None: print("Color img None") return im_shape = self.color_img.shape #scales = [1024, 1980] scales = [480 * 2, 640 * 2] target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) thresh = 0.8 if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] frame = copy.deepcopy(self.color_img) t1 = time.time() faceboxes = self.mark_detector.extract_cnn_facebox(frame) mess = "Not detect pose" face_directions = [] face_coords = [] if faceboxes is not None and len(faceboxes) > 0: if isinstance(faceboxes[1], int): faceboxes = [faceboxes] for facebox in faceboxes: #facebox = facebox.astype(np.int) # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_coords.append((int((facebox[0]+facebox[2])/2), \ int((facebox[1]+facebox[3])/2))) face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = self.mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = self.pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. face_direction = self.pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) face_directions.append(face_direction) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess = round(1 / (t2 - t1), 2) #self.pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) gazing_status = self.get_gazing_status(face_directions, face_coords, \ copy.deepcopy(self.humans), self.color_img) msg = GazingInfo() msg.is_gazing = [] msg.coords = [] for coord, gazestatus in zip(face_coords, gazing_status): _coord = Coord2D() _coord.x = coord[0] _coord.y = coord[1] msg.coords.append(_coord) msg.is_gazing.append(gazestatus) self.pub_headpose_info.publish(msg) print(gazing_status) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \ cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2) # Show preview. if viz: frame = self.draw_pose(frame, self.humans) if frame is None: return cv2.imshow("Preview", frame) cv2.waitKey(1)
def main(): """MAIN""" frame = cv2.imread('pose.jpg') # Video source from webcam or video file. #video_src = 0 #cam = cv2.VideoCapture(video_src) #_, sample_frame = cam.read() sample_frame = frame # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. #img_queue = Queue() #box_queue = Queue() #img_queue.put(sample_frame) #box_process = Process(target=get_face, args=( # mark_detector, img_queue, box_queue,)) #box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] #while True: # Read frame, crop it, flip it, suits your needs. #frame_got, frame = cam.read() #if frame_got is False: # break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. #if video_src == 0: # frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. #img_queue.put(frame) # Get face from box queue. #facebox = box_queue.get() facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stabile_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stabile_pose.append(ps_stb.state[0]) stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotaion on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotaion on frame. pose_estimator.draw_annotation_box( frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) # Show preview. cv2.imwrite('pose-preview.jpg', frame) #cv2.imshow("Preview", frame) #if cv2.waitKey(10) == 27: # break # Clean up the multiprocessing process. #box_process.terminate() #box_process.join() cv2.destroyAllWindows()
def process(input_key): if exists('pretrain/' + input_key): return # input_key = data['keys'] input_file = get_video_file(input_key) print('Input file', input_file) stream = cv2.VideoCapture(input_file) mark_detector = MarkDetector() pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] frame = None count = 0 frame_got = True target_folder = 'pretrain/{key}'.format(key=input_key) if not exists(target_folder): makedirs(target_folder) while frame_got: frame_got, frame = stream.read() if frame_got: # print('Frame', frame) facebox = mark_detector.extract_cnn_facebox(frame) # print('Facebox', facebox) if facebox: height, width = frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (input_size, input_size)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # print('Marks', marks) for mark in marks: cv2.circle(frame, tuple(mark), 3, (255, 0, 0)) # print('Marks Length', len(marks)) pose = pose_estimator.solve_pose_by_68_points(marks) # print('Pose', pose) pose = [pose[0].tolist(), pose[1].tolist()] with open( '{target_folder}/vecs.{count}.txt'.format( count=count, target_folder=target_folder), 'w') as f: f.write(json.dumps(pose)) with open( '{target_folder}/marks.{count}.txt'.format( count=count, target_folder=target_folder), 'w') as f: f.write(json.dumps(marks.tolist())) cv2.imwrite( '{target_folder}/image.{count}.png'.format( count=count, target_folder=target_folder), frame) count += 1
def cameraCap(self): self.uId = self.userId.get() if self.uId != '': if self.outputLabel != None: self.outputLabel.destroy() self.outputLabel = Label(self.framePic, text="Here We Start") self.outputLabel.config(font=("Courier", 44)) self.outputLabel.place(x=400, y=100) mark_detector = MarkDetector() name = self.uId directory = os.path.join(facepath, name) if not os.path.exists(facepath): os.makedirs(facepath, exist_ok='True') if not os.path.exists(directory): try: os.makedirs(directory, exist_ok='True') except OSError as e: if e.errno != errno.EEXIST: print('invalid student id or access denied') return poses = ['frontal', 'right', 'left', 'up', 'down'] file = 0 ret, sample_frame = self.cap.read() i = 0 count = 0 if ret == False: return # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] images_saved_per_pose = 0 number_of_images = 0 shape_predictor = dlib.shape_predictor( "shape_predictor_68_face_landmarks.dat") face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH) while i < 5: saveit = False # Read frame, crop it, flip it, suits your needs. ret, frame = self.cap.read() if ret is False: break if count % 5 != 0: # skip 5 frames count += 1 continue if images_saved_per_pose == IMAGE_PER_POSE: i += 1 images_saved_per_pose = 0 # If frame comes from webcam, flip it so it looks like a mirror. if file == 0: frame = cv2.flip(frame, 2) original_frame = frame.copy() frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: # Detect landmarks from image of 128x128. x1 = max(facebox[0] - 0, 0) x2 = min(facebox[2] + 0, width) y1 = max(facebox[1] - 0, 0) y2 = min(facebox[3] + 0, height) face = frame[y1:y2, x1:x2] face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # print(steady_pose[0][0]) # if steady_pose[0][0]>0.1: # print('right') # else: # if steady_pose[0][0]<-0.1: # print('left') # if steady_pose[0][1]>0.1: # print('down') # else: # if steady_pose[0][1]<-0.1: # print('up') # print(steady_pose[0]) if i == 0: if abs(steady_pose[0][0]) < ANGLE_THRESHOLD and abs( steady_pose[0][1]) < ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True if i == 1: if steady_pose[0][0] > ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True if i == 2: if steady_pose[0][0] < -ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True if i == 3: if steady_pose[0][1] < -ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True if i == 4: if steady_pose[0][1] > ANGLE_THRESHOLD: images_saved_per_pose += 1 saveit = True # Show preview. if i >= 5: print('Thank you') if self.outputLabel != None: self.outputLabel.destroy() self.outputLabel = Label(self.framePic, text="Thanks") self.outputLabel.config(font=("Courier", 44)) self.outputLabel.place(x=400, y=100) break frame = cv2.putText( frame, poses[i] + ' : ' + str(images_saved_per_pose) + '/' + str(IMAGE_PER_POSE), (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 1, cv2.LINE_AA) frame = cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0), 2) frame = imutils.resize(frame, width=300, height=300) # OpenCV represents images in BGR order; however PIL # represents images in RGB order, so we need to swap # the channels, then convert to PIL and ImageTk format image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image = ImageTk.PhotoImage(image) # if the panel is not None, we need to initialize it if self.panel is None: self.panel = Label(self.framePic, image=image) self.panel.image = image self.panel.pack(side=LEFT, padx=0, pady=0) print("Done") # otherwise, simply update the panel else: self.panel.configure(image=image) self.panel.image = image if saveit: face = dlib.rectangle(x1, y1, x2, y2) face_aligned = face_aligner.align( original_frame, frame_gray, face) cv2.imwrite( os.path.join( directory, str(name) + '_' + str(number_of_images) + '.jpg'), face_aligned) print(images_saved_per_pose) number_of_images += 1 self.cap.release() else: if self.outputLabel != None: self.outputLabel.destroy() self.outputLabel = Label(self.framePic, text="Please Enter a Valid Id") self.outputLabel.config(font=("Courier", 44)) self.outputLabel.place(x=400, y=100)