def __init__(self, width, height, source_img, landmarks_model_path, model3d_path, source_seg=None): self.w = width self.h = height self.detector = dlib.get_frontal_face_detector() self.landmarks_model = dlib.shape_predictor(landmarks_model_path) self.mean3DShape, self.blendshapes, self.mesh, self.idxs3D, self.idxs2D = \ utils.load3DFaceModel(model3d_path) self.projectionModel = models.OrthographicProjectionBlendshapes( self.blendshapes.shape[0]) landmarks_arr = self.calcLandmarks(source_img) if landmarks_arr: landmarks = landmarks_arr[0] else: raise Exception("Couldn't find a face in the source image!") textureCoords = self.calcTextureCoords(landmarks) self.renderer = face_renderer.FaceRenderer(self.w, self.h, source_img, textureCoords, self.mesh, source_seg)
def getFaceTextureCoords(img, mean3DShape, blendshapes, idxs2D, idxs3D, detector, predictor, maxImgSizeForDetection=320): projectionModel = models.OrthographicProjectionBlendshapes( blendshapes.shape[0]) keypoints = getFaceKeypoints(img, detector, predictor, maxImgSizeForDetection)[0] modelParams = projectionModel.getInitialParameters(mean3DShape[:, idxs3D], keypoints[:, idxs2D]) modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], keypoints[:, idxs2D]), verbose=0) textureCoords = projectionModel.fun([mean3DShape, blendshapes], modelParams) return textureCoords
def getFaceTextureCoords(img, mean3DShape, blendshapes, idxs2D, idxs3D, openpose): projectionModel = models.OrthographicProjectionBlendshapes( blendshapes.shape[0]) arr, arr2, output_image = openpose.forward(img, False) for shape2D in arr2: keypoints = shape2D[:, :2].T modelParams = projectionModel.getInitialParameters(mean3DShape[:, idxs3D], keypoints[:, idxs2D]) modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], keypoints[:, idxs2D]), verbose=0) textureCoords = projectionModel.fun([mean3DShape, blendshapes], modelParams) return textureCoords
def process_video(self, in_filename, out_filename, face_filename, keep_audio=True): # extract audio clip from src if keep_audio == True: clip = VideoFileClip(in_filename) clip.audio.write_audiofile("./temp/src_audio.mp3", verbose=False) predictor_path = "./models/shape_predictor_68_face_landmarks.dat" # predictor_path = "./models/shape_predictor_81_face_landmarks.dat" # the smaller this value gets the faster the detection will work # if it is too small, the user's face might not be detected maxImageSizeForDetection = 320 detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(predictor_path) mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel( "./models/candide.npz") projectionModel = models.OrthographicProjectionBlendshapes( blendshapes.shape[0]) # open source video vidcap = cv2.VideoCapture(in_filename) # get some parameters from input video width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = int(vidcap.get(cv2.CAP_PROP_FPS)) frames_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) # create a video writer for output res_filename = "./output/" + out_filename vidwriter = cv2.VideoWriter(res_filename, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) cameraImg = vidcap.read()[1] textureImg = cv2.imread(face_filename) textureCoords = utils.getFaceTextureCoords(textureImg, mean3DShape, blendshapes, idxs2D, idxs3D, detector, predictor) renderer = FaceRendering.FaceRenderer(cameraImg, textureImg, textureCoords, mesh) destShapes2D = utils.getFaceKeypoints(cameraImg, detector, predictor, maxImageSizeForDetection) destShape = destShapes2D[0] modelParams = projectionModel.getInitialParameters( mean3DShape[:, idxs3D], destShape[:, idxs2D]) # 3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], destShape[:, idxs2D]), verbose=0) # rendering the model to an image destShape3D = utils.getShape3D(mean3DShape, blendshapes, modelParams) destRenderedImg = renderer.render(destShape3D) self.progressBar.setRange(0, frames_count - 1) # iterate over the frames and apply the face swap for i in tqdm(range(frames_count - 1)): success, cameraImg = vidcap.read() self.progressBar.setValue(i + 1) if success != True: # no frames left => break break shapes2D = utils.getFaceKeypoints(cameraImg, detector, predictor, maxImageSizeForDetection) newImg = cameraImg try: if shapes2D is not None: for shape2D in shapes2D: # 3D model parameter initialization modelParams = projectionModel.getInitialParameters( mean3DShape[:, idxs3D], shape2D[:, idxs2D]) # 3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([ mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], shape2D[:, idxs2D]), verbose=0) # rendering the model to an image shape3D = utils.getShape3D(mean3DShape, blendshapes, modelParams) renderedImg = renderer.render(shape3D) # blending of the rendered face with the image mask = np.copy(renderedImg[:, :, 0]) mask1 = np.copy(destRenderedImg[:, :, 0]) renderedImg = ImageProcessing.colorTransfer( cameraImg, renderedImg, mask) # newImg = ImageProcessing.blendImages(renderedImg, cameraImg, mask) newImg = ImageProcessing.blendImages0( renderedImg, cameraImg, mask, mask1) except: pass vidwriter.write(newImg) # releas video capture and writer vidcap.release() vidwriter.release() renderer.release() # apply audio clip to generated video if keep_audio == True: video = VideoFileClip("./output/proc_video.avi") video.write_videofile(out_filename, audio="./output/src_audio.mp3", progress_bar=False, verbose=False)
#if it is too small, the user's face might not be detected maxImageSizeForDetection = 320 #카메라로 찍히는 영상에서 얼굴을 찾는 detector detector = dlib.get_frontal_face_detector() #사람 얼굴을 찾는 입과 눈의 구석, 코의 끝과 같은 중요한 얼굴 표식의 위치를 식별하는 점들의 집합 predictor = dlib.shape_predictor(predictor_path) # candide = 3D face model source # mean3Dshape : 얼굴의 중립상태에 해당하는 정점 리스트 # blendshapes : 중립상태인 얼굴에서 추가하여 수정할 수 있는 얼굴 # ex) 미소, 눈썹 올라가는 부분 # candide에 정의된 애니메이션 Units에서 파생된다. # mesh : Candide가 얼굴 목록으로 제공한 원래의 mesh # idxs3D, idxs2D: Candide 모델(idxs3D)과 얼굴 정렬점 세트(idxs2D)사이에 해당하는 지점들의 인덱스들이다. mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel("../candide.npz") projectionModel = models.OrthographicProjectionBlendshapes(blendshapes.shape[0]) def movement_detection(): ret, frame1 = cv2.VideoCapture(VIDEO_CAPTURE_CAM_NUM).read() frame1 = cv2.resize(frame1, (100, 50)) prvs = cv2.cvtColor(frame1,cv2.COLOR_BGR2GRAY) hsv = np.zeros_like(frame1) hsv[...,1] = 255 mov_check_cap = cv2.VideoCapture(VIDEO_CAPTURE_CAM_NUM) while(True): ret, frame2 = mov_check_cap.read() frame2 = cv2.resize(frame2, (100, 50)) next = cv2.cvtColor(frame2,cv2.COLOR_BGR2GRAY) flow = cv2.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0) mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
def mix_video(): data = {} if os.path.getsize('global.pickle') > 0: with open('global.pickle', 'rb') as pf: data = pickle.load(pf) else: win32api.MessageBox(0, '값이 제대로 전달되지 않았습니다. 처음부터 다시 시작해주세요', 'warning', 0x00001000) sys.exit(0) backgroundvideo = "testvideo1" genderNum = data['gender'] backgroundvideo = data['backgroundvideo'] userfolder = data['userfolder'] personnumber = data['personnumber'] backgroundvideo_name = data['backgroundvideo_name'] print('gender:', genderNum, 'vid_dir:', backgroundvideo, 'user_dir:', userfolder, 'peonson#:', personnumber, 'vid_name:', backgroundvideo_name) user_shape_data = {} with open("input/" + userfolder + "/" + 'userdata.pickle', 'rb') as pf: user_shape_data = pickle.load(pf) print( "Press T to draw the keypoints and the 3D model") # 결과 영상에 얼굴합성 과정 표시 print("Press R to start recording to a video file") # 녹화 # shape_predictor_64_face_landmarks.dat 여기서 다운받아서 압축해제 # http://sourceforge.net/projects/dclib/files/dlib/v18.10/shape_predictor_68_face_landmarks.dat.bz2rr # 키포인트 인식 모델 predictor_path = "data/shape_predictor_68_face_landmarks.dat" # 이미지 사이즈가 작을수록 처리속도가 빨라짐 # 너무 작으면 얼굴 인식이 안됨 maxImageSizeForDetection = 320 # 얼굴 인식기 로드 detector = dlib.get_frontal_face_detector() # detector = dlib.cnn_face_detection_model_v1("data/mmod_human_face_detector.dat") predictor = dlib.shape_predictor(predictor_path) mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel( "data/candide.npz") projectionModel = models.OrthographicProjectionBlendshapes( blendshapes.shape[0]) modelParams = None lockedTranslation = False drawOverlay = False writer = None # 콤비네이션: VC:Video,Cam | CI: Cam,Image | CC: Cam, Cam | VI: Video,Image cap_background = cv2.VideoCapture(backgroundvideo) # Video for background # cap_background = cv2.VideoCapture("input/" + backgroundvideo + ".mp4") # Video for background # cap_background = cv2.VideoCapture(0) # WebCAM for background cameraImg = cap_background.read()[1] # 본인의 여러 얼굴을 메모리에 저장함 (0~9) textureImgs = [] for i in range(0, 9): textureImgs.append( cv2.imread("input/" + userfolder + "/" + str(i) + ".jpg")) # Image for face # textureImgs.append(cv2.imread("input/user_test1_images/" + str(i) + ".jpg")) # Image for face if not os.path.isdir('output'): os.mkdir('output') output_video_name = 'output/' + backgroundvideo + '_' + datetime.datetime.now( ).strftime('%Y%m%d_%H%M%S') + '.avi' writer = cv2.VideoWriter(output_video_name, cv2.VideoWriter_fourcc(*'XVID'), 25, (cameraImg.shape[1], cameraImg.shape[0]), True) modelParams = np.zeros(20) startTime = time.time() full_shapes2D_csv = selectGender.reading_csv(backgroundvideo_name + "_annotation") # full_shapes2D_csv = selectGender.reading_csv("testvideo1_annotation") for framecnt, shapes2D_csv in enumerate(full_shapes2D_csv): print("frame number:", framecnt) # 배경으로 사용할 영상의 프레임 이미지 읽기 cap_background.set(cv2.CAP_PROP_POS_FRAMES, framecnt) ret, cameraImg = cap_background.read() try: background_user_shapes2D = utils.getFaceKeypoints( cameraImg, detector, predictor, maxImageSizeForDetection) if shapes2D_csv is not None: # 영상이 끝나면 반복문 탈출 if ret == False: break # textureImg, user_face_angle, user_shapes2D, textureCoords = user_shape_data[utils.getFaceAngle(background_user_shapes2D)] # 저장된 유저 얼굴 정보로 부터 해당 각도의 데이터를 가저옴 background_face_angle = utils.getFaceAngle( background_user_shapes2D) print("user_face_angle: {}".format(background_face_angle)) textureImg = textureImgs[background_face_angle] user_shapes2D = utils.getFaceKeypoints( textureImg, detector, predictor, maxImageSizeForDetection) textureCoords = utils.getFaceTextureCoords_v2( textureImg, mean3DShape, blendshapes, idxs2D, idxs3D, user_shapes2D, predictor) renderer = FaceRendering.FaceRenderer(cameraImg, textureImg, textureCoords, mesh) for shape2D in background_user_shapes2D: #[shapes2D_csv]: # 3D 모델 파라미터 초기화 (영상에서 인식된 얼굴로부터 3D 모델 생성을 위해) modelParams = projectionModel.getInitialParameters( mean3DShape[:, idxs3D], shape2D[:, idxs2D]) # 3D 모델 파라미터 최적화 기능 (배경의 얼굴과 입력된 얼굴의 키포인트 간의 거리를 최소화 하도록 머신러닝으로 최적화) modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], shape2D[:, idxs2D]), verbose=0) # 위의 모델을 이용해 입력된 이미지의 얼굴을 3D객체로 바꿔 배경의 얼굴 위치로 보정 shape3D = utils.getShape3D(mean3DShape, blendshapes, modelParams) renderedImg = renderer.render(shape3D) # 배경 영상과 입력된 얼굴 이미지를 합성 (합성과정에서 색변환, 이미지 블랜딩 기법 사용) mask = np.copy(renderedImg[:, :, 0]) renderedImg = ImageProcessing.colorTransfer( cameraImg, renderedImg, mask) cameraImg = ImageProcessing.blendImages( renderedImg, cameraImg, mask) # 3D매쉬와 키포인트를 화면 위에 그림 if drawOverlay: drawPoints(cameraImg, shape2D.T) # 초록색 drawProjectedShape(cameraImg, [mean3DShape, blendshapes], projectionModel, mesh, modelParams, lockedTranslation) writer.write(cameraImg) # 얼굴 합성된 영상 출력 cv2.imshow('image', cameraImg) except: pass # 걸린 시간 (초) 출력 writer.release() endTime = time.time() - startTime print("endTime: ", endTime)
def guide_facechange(): def tmp(): print("") t = threading.Timer(GUIDE_SHOW_TIME, tmp) t.start() print("Press T to draw the keypoints and the 3D model") print("Press R to start recording to a video file") #loading the keypoint detection model, the image and the 3D model predictor_path = "../shape_predictor_68_face_landmarks.dat" image_name = "../data/jolie.jpg" #the smaller this value gets the faster the detection will work #if it is too small, the user's face might not be detected maxImageSizeForDetection = 320 #카메라로 찍히는 영상에서 얼굴을 찾는 detector detector = dlib.get_frontal_face_detector() #사람 얼굴을 찾는 입과 눈의 구석, 코의 끝과 같은 중요한 얼굴 표식의 위치를 식별하는 점들의 집합 predictor = dlib.shape_predictor(predictor_path) # candide = 3D face model source # mean3Dshape : 얼굴의 중립상태에 해당하는 정점 리스트 # blendshapes : 중립상태인 얼굴에서 추가하여 수정할 수 있는 얼굴 # ex) 미소, 눈썹 올라가는 부분 # candide에 정의된 애니메이션 Units에서 파생된다. # mesh : Candide가 얼굴 목록으로 제공한 원래의 mesh # idxs3D, idxs2D: Candide 모델(idxs3D)과 얼굴 정렬점 세트(idxs2D)사이에 해당하는 지점들의 인덱스들이다. mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel( "../candide.npz") # projectionModel = models.OrthographicProjectionBlendshapes( blendshapes.shape[0]) modelParams = None lockedTranslation = False drawOverlay = False #cap = cv2.VideoCapture(0) cap = cv2.VideoCapture("../ROEM 2014 Spring SUZY 320p.mp4") writer = None cameraImg = cap.read()[1] # face swap하여 붙일 영상의 img textureImg = cv2.VideoCapture(0).read()[1] #cv2.imread(image_name) print("광고영상 shape : \t\t", cameraImg.shape[1], cameraImg.shape[0]) print("카메라 캡쳐영상 shape : ", textureImg.shape[1], textureImg.shape[0]) ###### face detection with guide cap_guide_cam = cv2.VideoCapture(0) if (cap_guide_cam.isOpened() == False): print("Unable to read camera feed") frame_width = int(cap_guide_cam.get(3)) frame_height = int(cap_guide_cam.get(4)) str = "match your face" str2 = "O" str3 = "ATTENTION" while (True): ret, frame = cap_guide_cam.read() frame_org = frame cv2.putText(frame, str, (int(frame_width / 3), int(frame_height / 6)), cv2.FONT_HERSHEY_SIMPLEX, int(frame_width / 600), (0, 0, 0), int(frame_width / 300)) cv2.putText(frame, str2, (int(frame_width / 3), int(frame_width / 2)), cv2.FONT_HERSHEY_SIMPLEX, int(frame_width / 60), (0, 0, 255), int(frame_width / 300)) cv2.putText(frame, str3, (int( (frame_width * 2) / 3), int( (frame_height * 2) / 3)), cv2.FONT_HERSHEY_SIMPLEX, int(frame_width / 650), (0, 0, 0), int(frame_width / 300)) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break dets = detector(frame_org, 1) #처음 camera로 촬영한 캡쳐를 넣어서 얼굴을 찾음. if len(dets) > 0: print("detected") break else: print("now detecting") if not t.isAlive(): video() break # 찍은 영상의 캡쳐를 3D로 재구성하여 합침 textureCoords = utils.getFaceTextureCoords(textureImg, mean3DShape, blendshapes, idxs2D, idxs3D, detector, predictor) # 찍은 얼굴의 데이터를 영상의 얼굴에 rendering renderer = FaceRendering.FaceRenderer(cameraImg, textureImg, textureCoords, mesh) doProcess = False meanTime = [[0] * 4 for i in range(4)] while True: #영상 캡쳐 cameraImg = cap.read()[1] shapes2D = utils.getFaceKeypoints(cameraImg, detector, predictor, maxImageSizeForDetection) doProcess = not doProcess if doProcess is not True: continue else: if shapes2D is not None: for shape2D in shapes2D: start = timeit.default_timer() #3D model parameter initialization modelParams = projectionModel.getInitialParameters( mean3DShape[:, idxs3D], shape2D[:, idxs2D]) stop = timeit.default_timer() meanTime[0][0] += stop - start meanTime[0][1] += 1 #print(1, float(meanTime[0][0]/meanTime[0][1])) start = timeit.default_timer() #3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], shape2D[:, idxs2D]), verbose=0) stop = timeit.default_timer() meanTime[1][0] += stop - start meanTime[1][1] += 1 #print(2, float(meanTime[1][0]/meanTime[1][1])) start = timeit.default_timer() #rendering the model to an image #다듬기 shape3D = utils.getShape3D(mean3DShape, blendshapes, modelParams) renderedImg = renderer.render(shape3D) stop = timeit.default_timer() meanTime[2][0] += stop - start meanTime[2][1] += 1 #print(3, float(meanTime[2][0]/meanTime[2][1])) start = timeit.default_timer() #blending of the rendered face with the image mask = np.copy(renderedImg[:, :, 0]) renderedImg = ImageProcessing.colorTransfer( cameraImg, renderedImg, mask) cameraImg = ImageProcessing.blendImages( renderedImg, cameraImg, mask) stop = timeit.default_timer() meanTime[3][0] += stop - start meanTime[3][1] += 1 #print(4, float(meanTime[3][0] / meanTime[3][1])) #drawing of the mesh and keypoints # 't'를 누를 때, 적용. facepoint가 표시됨. if drawOverlay: drawPoints(cameraImg, shape2D.T) drawProjectedShape(cameraImg, [mean3DShape, blendshapes], projectionModel, mesh, modelParams, lockedTranslation) if writer is not None: writer.write(cameraImg) cv2.namedWindow("image", cv2.WND_PROP_FULLSCREEN) cv2.setWindowProperty("image", cv2.WND_PROP_FULLSCREEN, 1) cv2.imshow('image', cameraImg) key = cv2.waitKey(1) if key == 27 or key == ord('q'): break if key == ord('t'): drawOverlay = not drawOverlay if key == ord('r'): cv2.destroyAllWindows() video() if key == ord('w'): if writer is None: print("Starting video writer") writer = cv2.VideoWriter( "../out.avi", cv2.VideoWriter_fourcc('X', 'V', 'I', 'D'), 13, (cameraImg.shape[1], cameraImg.shape[0])) if writer.isOpened(): print("Writer succesfully opened") else: writer = None print("Writer opening failed") else: print("Stopping video writer") writer.release() writer = None cap.release() cap_intro_vid.release() cap_guide_cam.release() cv2.destroyAllWindows()
def main(): os.makedirs('original', exist_ok=True) os.makedirs('landmarks', exist_ok=True) mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel( "candide.npz") projectionModel = models.OrthographicProjectionBlendshapes( blendshapes.shape[0]) files = [] for fl_type in ['png', 'jpg']: [ files.append(fl) for fl in glob.glob(args.Directory + '/*.' + fl_type) ] print(len(files)) count = 0 imgParams = {} for fl in files: frame = cv2.imread(fl) print(fl) frame_resize = cv2.resize(frame, None, fx=1 / DOWNSAMPLE_RATIO, fy=1 / DOWNSAMPLE_RATIO) gray = cv2.cvtColor(frame_resize, cv2.COLOR_BGR2GRAY) faces = detector(gray, 1) black_image = np.zeros(frame.shape, np.uint8) t = time.time() # Perform if there is a face detecte shapes2D = getFaceKeypoints(frame, detector, predictor) if shapes2D is None: continue if len(shapes2D) == 1: # 3D model parameter initialization modelParams = projectionModel.getInitialParameters( mean3DShape[:, idxs3D], shapes2D[0][:, idxs2D]) # 3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton( modelParams, projectionModel.residual, projectionModel.jacobian, ([mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D] ], shapes2D[0][:, idxs2D]), verbose=0) drawProjectedShape(black_image, [mean3DShape, blendshapes], projectionModel, mesh, modelParams, lockedTranslation) # Display the resulting frame imgParams[fl] = modelParams count += 1 print(count) cv2.imwrite("original/{}.png".format(count), frame) cv2.imwrite("landmarks/{}.png".format(count), black_image) print('[INFO] elapsed time: {:.2f}'.format(time.time() - t)) else: print("No face detected")
def main(): os.makedirs('original', exist_ok=True) os.makedirs('landmarks', exist_ok=True) mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel("candide.npz") projectionModel = models.OrthographicProjectionBlendshapes(blendshapes.shape[0]) cap = cv2.VideoCapture(args.filename) fps = video.FPS().start() count = 0 while cap.isOpened(): ret, frame = cap.read() if frame is None: break frame_resize = cv2.resize(frame, None, fx=1 / DOWNSAMPLE_RATIO, fy=1 / DOWNSAMPLE_RATIO) gray = cv2.cvtColor(frame_resize, cv2.COLOR_BGR2GRAY) faces = detector(gray, 1) black_image = np.zeros(frame.shape, np.uint8) t = time.time() # Perform if there is a face detecte shapes2D = getFaceKeypoints(frame, detector, predictor) if shapes2D is None: continue if len(shapes2D) == 1: # 3D model parameter initialization modelParams = projectionModel.getInitialParameters(mean3DShape[:, idxs3D], shapes2D[0][:, idxs2D]) # 3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton(modelParams, projectionModel.residual, projectionModel.jacobian, ( [mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D]], shapes2D[0][:, idxs2D]), verbose=0) drawProjectedShape(black_image, [mean3DShape, blendshapes], projectionModel, mesh, modelParams, lockedTranslation) # Display the resulting frame count += 1 print(count) cv2.imwrite("original/{}.png".format(count), frame) cv2.imwrite("landmarks/{}.png".format(count), black_image) fps.update() print('[INFO] elapsed time: {:.2f}'.format(time.time() - t)) if count == args.number: # only take 400 photos break elif cv2.waitKey(1) & 0xFF == ord('q'): break else: print("No face detected") fps.stop() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps())) cap.release() cv2.destroyAllWindows()
class Visualizer: BREAK_KEY_LABELS = "q(Q) or Escape" BREAK_KEYS = {ord('q'), ord('Q'), 27} mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel( "../candide.npz") projectionModel = models.OrthographicProjectionBlendshapes( blendshapes.shape[0]) modelParams = None lockedTranslation = False drawOverlay = False # cap = cv2.VideoCapture('/dev/video0') writer = None image_name = "../data/AK-2.jpeg" textureImg = cv2.imread(image_name) #loading the keypoint detection model, the image and the 3D model predictor_path = "../shape_predictor_68_face_landmarks.dat" # image_name = "../data/jolie.jpg" image_name = "../data/AK-2.jpeg" #the smaller this value gets the faster the detection will work #if it is too small, the user's face might not be detected maxImageSizeForDetection = 320 detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(predictor_path) def __init__(self, args): # self.cameraImg = self.input_stream.read()[1] self.frame_processor = FrameProcessor(args) self.display = not args.no_show self.print_perf_stats = args.perf_stats self.frame_time = 0 self.frame_start_time = 0 self.fps = 0 self.frame_num = 0 self.frame_count = -1 self.frame_timeout = 0 if args.timelapse else 1 # self.eye_brow_obj = EyeBrows() self.eye_brow_right = cv2.imread( "images/eyebrows/e5.png") #, self.eye_brow_obj.get_image() self.eye_brow_left = cv2.flip(self.eye_brow_right, 1) # self.frame_dict = {} # print(type(self.eye_brow_right)) # print(self.eye_brow_right.shape) # print(self.eye_brow_right) # self.eye_brow_right = cv2.imread(r"D:\.openvino\fd\Images\eyebrows\e1 (5)_.png") # self.eye_brow_right = self.eye_brow_right # _, self.eye_brow_right = cv2.threshold(self.eye_brow_right, 250 , 0 , cv2.THRESH_BINARY) # self.eye_brow_right = cv2.flip(self.eye_brow_left,1) # self.eye_brow_left = cv2.cvtColor(self.eye_brow_left,cv2.COLOR_RGB2HSV) def fliped(self, frame): return cv2.flip(frame, 1) def get_distance(self, b, a): print(b, a) distance = abs(b - a) # time.sleep(10) distance = map(int, distance) print(distance) return tuple(distance) def get_angle(self, b, a): c = np.array([a[0], b[1]]) ba = a - b bc = c - b cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc)) angle = np.degrees(np.arccos(cosine_angle)) angle = angle if b[1] >= a[1] else 360 - angle # ####print(angle,'----------------------------------angle--') #time.sleep(10) return angle def rotateImage(self, image, angle, frame='left'): angle = angle if angle > 0 else 360 + angle #print(angle) image_center = tuple(np.array(image.shape[1::-1]) / 2) rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR) # cv2.imshow(frame,result) # cv2.waitKey(100) return result def update_fps(self): now = time.time() self.frame_time = now - self.frame_start_time self.fps = 1.0 / self.frame_time self.frame_start_time = now def draw_text_with_background(self, frame, text, origin, font=cv2.FONT_HERSHEY_SIMPLEX, scale=1.0, color=(0, 0, 0), thickness=1, bgcolor=(255, 255, 255)): text_size, baseline = cv2.getTextSize(text, font, scale, thickness) cv2.rectangle( frame, tuple((origin + (0, baseline)).astype(int)), tuple((origin + (text_size[0], -text_size[1])).astype(int)), bgcolor, cv2.FILLED) cv2.putText(frame, text, tuple(origin.astype(int)), font, scale, color, thickness) return text_size, baseline def draw_detection_roi(self, frame, roi, identity): # Draw face ROI border cv2.rectangle(frame, tuple(roi.position), tuple(roi.position + roi.size), (0, 220, 0), 2) def draw_detection_keypoints(self, frame, roi, landmarks, head_pose): keypoints = [ landmarks.one, #landmarks.two, landmarks.three, landmarks.four, #landmarks.five, landmarks.six ] print('.', end='') for point in keypoints: #print("point------", point, roi.position, roi.size) center = roi.position + roi.size * point # print("center------", center) cv2.circle(frame, tuple(center.astype(int)), 2, (0, 255, 255), 2) def draw_face_mesh(self, frame, roi, landmarks, head_pose): # print('draw_face_mesh-------------------') if self.shapes2D is not None: for shape2D in self.shapes2D: print(shape2D[0], len(shape2D[0]) ) # list of [[x1,x2,x3.....xn],[y1,y2,y3.....yn]] #3D model parameter initialization modelParams = self.projectionModel.getInitialParameters( self.mean3DShape[:, self.idxs3D], shape2D[:, self.idxs2D]) #3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton( modelParams, self.projectionModel.residual, self.projectionModel.jacobian, ([ self.mean3DShape[:, self.idxs3D], self.blendshapes[:, :, self.idxs3D] ], shape2D[:, self.idxs2D]), verbose=0) #rendering the model to an image shape3D = utils.getShape3D(self.mean3DShape, self.blendshapes, modelParams) renderedImg = self.renderer.render(shape3D) #blending of the rendered face with the image mask = np.copy(renderedImg[:, :, 0]) renderedImg = ImageProcessing.colorTransfer( frame, renderedImg, mask) # apply rendered image on frame frame = ImageProcessing.blendImages(renderedImg, frame, mask) # cv2.imshow('frame', frame) #drawing of the mesh and keypoints if self.drawOverlay: drawPoints(frame, shape2D.T) drawProjectedShape(frame, [self.mean3DShape, self.blendshapes], self.projectionModel, self.mesh, modelParams, self.lockedTranslation) cv2.imshow('Face recognition demo', frame) if self.writer is not None: # output_stream.write(frame) self.writer.write(frame) def draw_eye_brows(self, frame, roi, landmarks, head_pose): # angle_up_down, angle_head_tilt, angle_left_right = map(lambda x:x//20*20 ,map(int,self.get_head_pose_angles(frame, roi, head_pose))) angle_up_down, angle_head_tilt, angle_left_right = self.get_head_pose_angles( frame, roi, head_pose) angle_para = { 'angle_up_down': angle_up_down, 'angle_head_tilt': angle_head_tilt, 'angle_left_right': angle_left_right } # center point for each eye center_r = roi.position + roi.size * landmarks.five center_l = roi.position + roi.size * landmarks.two # print('-----------------------------------------','center_r', center_r) # height, width, channels = img.shape # print(f'{angle_up_down}_{angle_head_tilt}_{angle_left_right}') # try: # self.eye_brow_left, self.eye_brow_right = self.frame_dict[f'{angle_up_down}_{angle_head_tilt}_{angle_left_right}'] # print("-----hurra------") # except Exception as e: # print('e--',e) # self.eye_brow_left, self.eye_brow_right = self.eye_brow_obj.update_layout(angle=angle_para) # self.frame_dict[f'{angle_up_down}_{angle_head_tilt}_{angle_left_right}'] = (self.eye_brow_left,self.eye_brow_right) #manual resize image of eye # for distance center_r1 = roi.size * landmarks.four center_r2 = roi.size * landmarks.six center_l1 = roi.size * landmarks.one center_l2 = roi.size * landmarks.three # eye_brow_right = cv2.resize(self.eye_brow_right,self.get_distance(center_l1,center_l2)) # eye_brow_left = cv2.resize(self.eye_brow_left,self.get_distance(center_r1,center_r2)) # auto resizing eye_brow_right = cv2.resize( self.eye_brow_right, (int(roi.size[1] / 4), int(roi.size[0] / 4))) eye_brow_left = cv2.resize( self.eye_brow_left, (int(roi.size[1] / 4), int(roi.size[0] / 4))) # rotate images of eye brow # eye_brow_right = self.rotateImage(eye_brow_right,self.get_angle(landmarks.three,landmarks.six),frame="right") # eye_brow_left = self.rotateImage(eye_brow_left,self.get_angle(landmarks.one,landmarks.three)) eye_brow_right = self.rotateImage(eye_brow_right, -angle_head_tilt, frame="right") eye_brow_left = self.rotateImage(eye_brow_left, -angle_head_tilt) #eye_brow_left = cv2.cvtColor(eye_brow_left,cv2.COLOR_BGR2RGB) # height, width, channels = img.shape height, width, channels_f = frame.shape rows, cols, channels_r = eye_brow_right.shape rows, cols, channels_l = eye_brow_left.shape #eye_brow_left = cv2.resize(eye_brow_left, (width, height)) #frame = cv2.addWeighted(frame,0,eye_brow_left,1,0) p2, p1 = int(center_r[0] - cols / 2), int(center_r[1] - rows / 2) #0, 250 frame_eb_r = cv2.addWeighted(frame[p1:p1 + rows, p2:p2 + cols], 1, eye_brow_right, 1, 0) #frame[250:250+rows, 0:0+cols ] = eye_brow_right frame[p1:p1 + rows, p2:p2 + cols] = frame_eb_r p2, p1 = int(center_l[0] - cols / 2), int(center_l[1] - rows / 2) #0, 250 frame_eb_r_l = cv2.addWeighted(frame[p1:p1 + rows, p2:p2 + cols], 1, eye_brow_left, 1, 0) #frame[250:250+rows, 0:0+cols ] = eye_brow_right # cv2.circle(frame, (int(center_l[0]-cols/2),int(center_l[1]-rows/3)), 2, (0, 255, 255), 2) frame[p1:p1 + rows, p2:p2 + cols] = frame_eb_r_l #print('channels_f,channels_l,channels_r',channels_f,channels_l,channels_r) # cv2.imshow('dsfdas',self.eye_brow_right) def get_head_pose_angles(self, frame, roi, head_pose): angle_p_fc, angle_r_fc, angle_y_fc = [ next(iter(obj))[0] for obj in head_pose ] # for obj in head_pose: # for j in obj: # print(j,end='') # angle_up_down, angle_head_tilt, angle_left_right = angle_p_fc, angle_r_fc, angle_y_fc # print(angle_p_fc, angle_r_fc, angle_y_fc) return angle_p_fc, angle_r_fc, angle_y_fc def draw_detections(self, frame, detections): # print("******"*50) for roi, landmarks, head_pose, identity in zip(*detections): # self.draw_detection_roi(frame, roi, identity) # self.draw_detection_keypoints(frame, roi, landmarks, head_pose) try: self.draw_eye_brows(frame, roi, landmarks, head_pose) self.draw_face_mesh(frame, roi, landmarks, head_pose) except Exception as ex: print(ex) #self.draw_detection_head_pose(frame, roi, head_pose) def display_interactive_window(self, frame): #frame = cv2.flip(frame,1) color = (255, 255, 255) font = cv2.FONT_HERSHEY_SIMPLEX text_scale = 0.5 text = "Press '%s' key to exit" % (self.BREAK_KEY_LABELS) thickness = 2 text_size = cv2.getTextSize(text, font, text_scale, thickness) origin = np.array([frame.shape[-2] - text_size[0][0] - 10, 10]) line_height = np.array([0, text_size[0][1]]) * 1.5 cv2.putText(frame, text, tuple(origin.astype(int)), font, text_scale, color, thickness) # # height, width, channels = img.shape # height, width, channels = frame.shape # eye_brow_right = cv2.imread(r"D:\.openvino\fd\Eyebrows\e1 (6).png") # eye_brow_left = cv2.imread(r"D:\.openvino\fd\Eyebrows\eye_brow_left.png") # eye_brow_left = cv2.cvtColor(eye_brow_left,cv2.COLOR_BGR2RGB) # # height, width, channels = img.shape # height, width, channels = frame.shape # #eye_brow_left = cv2.resize(eye_brow_left, (width, height)) # #frame = cv2.addWeighted(frame,0,eye_brow_left,1,0) # rows,cols,channels = eye_brow_right.shape # eye_brow_right=cv2.addWeighted(frame[250:250+rows, 0:0+cols],1,eye_brow_right,1,0) # frame[250:250+rows, 0:0+cols ] = eye_brow_right # cv2.imshow('Face recognition demo', frame) def should_stop_display(self): # key = cv2.waitKey(self.frame_timeout) & 0xFF # return key in self.BREAK_KEYS key = cv2.waitKey(1) if key == 27 or key == ord('q'): return True # break if key == ord('l'): self.drawOverlay = not self.drawOverlay return False if key == ord('r'): if self.writer is None: print("Starting video writer") # self.writer = cv2.VideoWriter("../out.avi", cv2.CV_FOURCC('X', 'V', 'I', 'D'), 25, (frame.shape[1], frame.shape[0])) self.writer = cv2.VideoWriter("../out.avi", cv2.VideoWriter.fourcc(*'MJPG'), 25, (frame.shape[1], frame.shape[0])) if self.writer.isOpened(): print("Writer succesfully opened") else: self.writer = None print("Writer opening failed") else: print("Stopping video writer") self.writer.release() self.writer = None def process(self, input_stream, output_stream): frame_rate = 10 prev = 0 self.input_stream = input_stream self.output_stream = output_stream # cameraImg = input_stream.read()[1] while input_stream.isOpened(): time_elapsed = time.time() - prev has_frame, frame = input_stream.read() # print ("##"*100) # to be removed self.shapes2D = utils.getFaceKeypoints( frame, self.detector, self.predictor, self.maxImageSizeForDetection) # if time_elapsed > 1./frame_rate: # prev = time.time() # else: # continue if not has_frame: break # frame = self.fliped(frame) # manually added by SG to make mirror like effect # if self.input_crop is not None: # frame = Visualizer.center_crop(frame, self.input_crop) detections = self.frame_processor.process(frame) # print("detections------------", detections) self.draw_detections(frame, detections) # self.draw_status(frame, detections) if output_stream: output_stream.write(frame) if self.display: self.display_interactive_window(frame) if self.should_stop_display(): break self.update_fps() self.frame_num += 1 def run(self, args): input_stream = Visualizer.open_input_stream(args.input) if input_stream is None or not input_stream.isOpened(): log.error("Cannot open input stream: %s" % args.input) fps = input_stream.get(cv2.CAP_PROP_FPS) frame_size = (int(input_stream.get(cv2.CAP_PROP_FRAME_WIDTH)), int(input_stream.get(cv2.CAP_PROP_FRAME_HEIGHT))) self.frame_count = int(input_stream.get(cv2.CAP_PROP_FRAME_COUNT)) log.info("Input stream info: %d x %d @ %.2f FPS" % \ (frame_size[0], frame_size[1], fps)) output_stream = Visualizer.open_output_stream(args.output, fps, frame_size) cameraImg = input_stream.read()[1] self.textureCoords = utils.getFaceTextureCoords( self.textureImg, self.mean3DShape, self.blendshapes, self.idxs2D, self.idxs3D, self.detector, self.predictor) self.renderer = FaceRendering.FaceRenderer(cameraImg, self.textureImg, self.textureCoords, self.mesh) self.process(input_stream, output_stream) # Release resources if output_stream: output_stream.release() if input_stream: input_stream.release() cv2.destroyAllWindows() @staticmethod def open_input_stream(path): log.info("Reading input data from '%s'" % (path)) stream = path try: stream = int(path) except ValueError: pass return cv2.VideoCapture(stream) @staticmethod def open_output_stream(path, fps, frame_size): print("path, fps, frame_size---", path, fps, frame_size) output_stream = None if path != "": # pass if not path.endswith('.avi'): log.warning("Output file extension is not 'avi'. " \ "Some issues with output can occur, check logs.") log.info("Writing output to '%s'" % (path)) output_stream = cv2.VideoWriter(path, cv2.VideoWriter.fourcc(*'MJPG'), fps, frame_size) return output_stream
def main(): # fourcc = cv2.VideoWriter_fourcc(*'XVID') fourcc = cv2.VideoWriter_fourcc(*'MP4V') writer = None if writer is None: print("Starting video writer") writer = cv2.VideoWriter("./out.mp4", fourcc, 30.0, (CROP_SIZE*2, CROP_SIZE)) if writer.isOpened(): print("Writer succesfully opened") else: writer = None print("Writer opening failed") else: print("Stopping video writer") writer.release() writer = None # TensorFlow graph = load_graph(args.frozen_model_file) image_tensor = graph.get_tensor_by_name('image_tensor:0') output_tensor = graph.get_tensor_by_name('generate_output/output:0') sess = tf.Session(graph=graph) # OpenCV # cap = cv2.VideoCapture(args.video_source) cap = cv2.VideoCapture(args.video_dir) fps = video.FPS().start() mean3DShape, blendshapes, mesh, idxs3D, idxs2D = utils.load3DFaceModel("candide.npz") projectionModel = models.OrthographicProjectionBlendshapes(blendshapes.shape[0]) while True: ret, frame = cap.read() if frame is None: break # resize image and detect face frame_resize = cv2.resize(frame, None, fx=1 / DOWNSAMPLE_RATIO, fy=1 / DOWNSAMPLE_RATIO) gray = cv2.cvtColor(frame_resize, cv2.COLOR_BGR2GRAY) #get frame face label # faces = detector(gray, 1) black_image = np.zeros(frame.shape, np.uint8) # for face in faces: # detected_landmarks = predictor(gray, face).parts() # landmarks = [[p.x * DOWNSAMPLE_RATIO, p.y * DOWNSAMPLE_RATIO] for p in detected_landmarks] # # jaw = reshape_for_polyline(landmarks[0:17]) # left_eyebrow = reshape_for_polyline(landmarks[22:27]) # right_eyebrow = reshape_for_polyline(landmarks[17:22]) # nose_bridge = reshape_for_polyline(landmarks[27:31]) # lower_nose = reshape_for_polyline(landmarks[30:35]) # left_eye = reshape_for_polyline(landmarks[42:48]) # right_eye = reshape_for_polyline(landmarks[36:42]) # outer_lip = reshape_for_polyline(landmarks[48:60]) # inner_lip = reshape_for_polyline(landmarks[60:68]) # # color = (255, 255, 255) # thickness = 3 # # cv2.polylines(black_image, [jaw], False, color, thickness) # cv2.polylines(black_image, [left_eyebrow], False, color, thickness) # cv2.polylines(black_image, [right_eyebrow], False, color, thickness) # cv2.polylines(black_image, [nose_bridge], False, color, thickness) # cv2.polylines(black_image, [lower_nose], True, color, thickness) # cv2.polylines(black_image, [left_eye], True, color, thickness) # cv2.polylines(black_image, [right_eye], True, color, thickness) # cv2.polylines(black_image, [outer_lip], True, color, thickness) # cv2.polylines(black_image, [inner_lip], True, color, thickness) shapes2D = getFaceKeypoints(frame, detector, predictor) if shapes2D is None: continue # 3D model parameter initialization modelParams = projectionModel.getInitialParameters(mean3DShape[:, idxs3D], shapes2D[0][:, idxs2D]) # 3D model parameter optimization modelParams = NonLinearLeastSquares.GaussNewton(modelParams, projectionModel.residual, projectionModel.jacobian, ( [mean3DShape[:, idxs3D], blendshapes[:, :, idxs3D]], shapes2D[0][:, idxs2D]), verbose=0) drawProjectedShape(black_image, [mean3DShape, blendshapes], projectionModel, mesh, modelParams, lockedTranslation) # generate prediction combined_image = np.concatenate([resize(black_image), resize(frame_resize)], axis=1) image_rgb = cv2.cvtColor(combined_image, cv2.COLOR_BGR2RGB) # OpenCV uses BGR instead of RGB generated_image = sess.run(output_tensor, feed_dict={image_tensor: image_rgb}) image_bgr = cv2.cvtColor(np.squeeze(generated_image), cv2.COLOR_RGB2BGR) image_normal = np.concatenate([resize(frame_resize), image_bgr], axis=1) image_landmark = np.concatenate([resize(black_image), image_bgr], axis=1) if args.display_landmark == 0: cv2.imshow('frame', image_normal) else: cv2.imshow('frame', image_landmark) if writer is not None: writer.write(image_normal) fps.update() if cv2.waitKey(1) & 0xFF == ord('q'): break writer.release() fps.stop() print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed())) print('[INFO] approx. FPS: {:.2f}'.format(fps.fps())) sess.close() cap.release() cv2.destroyAllWindows()