def estimate_params(bfm_all, lms, lm_real, id_comps=30, exp_comps=20, reg_a=10., reg_d=10., h=480., w=640., steps=10000, lr=.1, threshold=1., R_init=None): bfm_params, color, triangles = bfm_all # define parameters to be optimized alpha, delta = morph.sample_alpha_delta(id_comps=id_comps, exp_comps=exp_comps) alpha, delta = Variable(alpha, requires_grad=True), Variable(delta, requires_grad=True) rotation = Variable(torch.rand(3)*2-1, requires_grad=True) if R_init is None else Variable(R_init, requires_grad=True) translation = Variable(torch.cat((torch.rand(2)*2-1, torch.tensor([-500.]))), requires_grad=True) optimizer = torch.optim.Adam([alpha, delta, rotation, translation], lr=lr) losses = [] print("Optimizing...") # optimize for the specified loss function for i in range(steps): optimizer.zero_grad() G = morph.compute_G(bfm_params, alpha=alpha, delta=delta) G_pinhole = pinhole.camera_model(G, rotation, translation, h=h, w=w) lm_pred = utils.get_landmarks(G_pinhole[:, :2], lms) loss = loss_fn(lm_pred, lm_real, alpha, delta, reg_a=reg_a, reg_d=reg_d) loss.backward() optimizer.step() losses.append(loss.item()) if i > 0 and losses[-2] - loss < threshold: # stop if difference with prev loss is less than threshold print(f"... stopping early at iteration {i}") break return alpha.detach(), delta.detach(), rotation.detach(), translation.detach(), losses
def pinhole_demo(rotation=None, translation=None, save_fname="pinhole", idx=0): # assignment: section 3 bfm = h5py.File(MODELS_PATH + BFM_FNAME, "r") bfm_params, color, triangles = utils.read_bfm(bfm) lms = utils.read_landmarks(MODELS_PATH + LM_FNAME) # landmark annotations rotation = torch.tensor([0., 0., 0.]) if rotation is None else torch.tensor(rotation) translation = torch.tensor([0., 0., -500.]) if translation is None else torch.tensor(translation) G = morph.compute_G(bfm_params) G_transformed = pinhole.transform(G, rotation, translation) G_pinhole = pinhole.camera_model(G, rotation, translation) save_obj(OBJ_3D_PATH + save_fname + str(idx) + "_3d.obj", G_transformed, color, triangles) save_obj(OBJ_2D_PATH + save_fname + str(idx) + "_2d.obj", G_pinhole, color, triangles) print("Rendering...") img_2d = utils.get_image(G_pinhole, color, triangles) # render img img_lm = utils.get_landmarks(G_pinhole[:, :2], lms) # landmark coords utils.show_face(img_2d) utils.flip_y() plt.savefig(PINHOLE_PATH + save_fname + str(idx) + ".pdf") utils.show_landmarks(img_lm, indices=True) # overlays landmarks on image plt.savefig(PINHOLE_PATH + save_fname + str(idx) + "_lm.pdf") plt.close()
def cascade_detector(frame_orig, detector): if not detector in ["haar", "lbp"]: raise ValueError("Invalid cascade detector") face_detector = (haar_detector if detector == "haar" else lbp_detector) frame = frame_orig.copy() gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = face_detector.detectMultiScale(gray_frame, 1.32, 5) for face in faces: landmarks_coord = utils.get_landmarks(gray_frame, frame, utils.bb_to_rect(face), annotate=args["landmarks"]) if args["delaunay_triangulation"]: utils.annotate_delaunay_triangulation(frame, landmarks_coord, line_thickness=2) if args["region_of_interest"]: utils.annotate_ROI(frame, landmarks_coord) if args["bounding_box"]: x, y, w, h = face cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) return frame
def multiple_demo(load_fnames, save_fname="multi", reg_a=10., reg_d=10.): # assignment: section 6 bfm = h5py.File(MODELS_PATH + BFM_FNAME, "r") bfm_params, color, triangles = utils.read_bfm(bfm) lms = utils.read_landmarks(MODELS_PATH + LM_FNAME) # landmark annotations N = len(load_fnames) # number of images to be loaded imgs_real = [utils.read_image(IMAGE_PATH + fname) for fname in load_fnames] # load all images hs = [np.size(img, 0) for img in imgs_real] # store all heights ws = [np.size(img, 1) for img in imgs_real] # store all widths lms_real = [torch.from_numpy(detect_landmark(img)) for img in imgs_real] # detect all ground truth landmarks lms_real_flip = [utils.flip_ycoords(lms_real[i], H=hs[i]) for i in range(N)] # flip y axis alpha, deltas, rotations, translations, loss = multiple.estimate_params((bfm_params, color, triangles), lms, lms_real_flip, hs=hs, ws=ws, reg_a=reg_a, reg_d=reg_d) utils.save_loss(loss, save_fname=save_fname + "_loss.pdf") # save results for each image for i in range(N): print(load_fnames[i] + ":") # print stats for each image (alpha is the same for each img) utils.print_stats(alpha, deltas[i], rotations[i], translations[i]) G = morph.compute_G(bfm_params, alpha=alpha, delta=deltas[i]) G_transformed = pinhole.transform(G, rotations[i], translations[i]) G_pinhole = pinhole.camera_model(G, rotations[i], translations[i], h=hs[i], w=ws[i]) color = texture.get_color(imgs_real[i], G_pinhole[:, :2]) print("Rendering...") img_pred = utils.get_image(G_pinhole, color, triangles, h=hs[i], w=ws[i]) utils.show_face(img_pred) utils.flip_y() plt.savefig(PINHOLE_PATH + save_fname + str(i) + ".pdf") plt.close() save_obj(OBJ_3D_PATH + save_fname + str(i) + "_3d.obj", G_transformed, color, triangles) save_obj(OBJ_2D_PATH + save_fname + str(i) + "_2d.obj", G_pinhole, color, triangles) lm_pred_flip = utils.get_landmarks(G_pinhole[:, :2], lms) lm_pred = utils.flip_ycoords(lm_pred_flip, H=hs[i]) utils.show_face(imgs_real[i], white_background=False) utils.show_landmarks(lms_real[i], indices=False, label="ground-truth") try: utils.show_landmarks(lm_pred, indices=False, label="model") except TypeError: print("... unable to show predicted landmarks") plt.savefig(PINHOLE_PATH + save_fname + str(i) + "_lm.pdf") plt.close()
def texture_demo(load_fname="yke_neutral.jpeg", save_fname="texture", reg_a=10., reg_d=10., idx=0): # assignment: section 5 bfm = h5py.File(MODELS_PATH + BFM_FNAME, "r") bfm_params, color, triangles = utils.read_bfm(bfm) lms = utils.read_landmarks(MODELS_PATH + LM_FNAME) # landmark annotations img_real = utils.read_image(IMAGE_PATH + load_fname) # load image of face we want to reconstruct h, w, _ = np.shape(img_real) lm_real = torch.from_numpy(detect_landmark(img_real)) # detect ground-truth landmarks lm_real_flip = utils.flip_ycoords(lm_real, H=h) # flip y axis because img is upside down compared to pinhole output alpha, delta, rotation, translation, loss = latent.estimate_params((bfm_params, color, triangles), lms, lm_real_flip, h=h, w=w, reg_a=reg_a, reg_d=reg_d) utils.print_stats(alpha, delta, rotation, translation) # latent params statistics utils.save_loss(loss, save_fname=save_fname + str(idx) + "_loss.pdf") G = morph.compute_G(bfm_params, alpha=alpha, delta=delta) G_pinhole = pinhole.camera_model(G, rotation, translation, h=h, w=w) color = texture.get_color(img_real, G_pinhole[:, :2]) # obtain vertex colors from provided image save_obj(OBJ_3D_PATH + save_fname + str(idx) + "_3d.obj", G, color, triangles) save_obj(OBJ_2D_PATH + save_fname + str(idx) + "_2d.obj", G_pinhole, color, triangles) print("Rendering...") img_pred = utils.get_image(G_pinhole, color, triangles, h=h, w=w) utils.show_face(img_pred) utils.flip_y() plt.savefig(PINHOLE_PATH + save_fname + str(idx) + ".pdf") plt.close() lm_pred_flip = utils.get_landmarks(G_pinhole[:, :2], lms) lm_pred = utils.flip_ycoords(lm_pred_flip, H=h) utils.show_face(img_real, white_background=False) utils.show_landmarks(lm_real, indices=False, label="ground-truth") try: utils.show_landmarks(lm_pred, indices=False, label="model") except TypeError: print("... unable to show predicted landmarks") plt.savefig(PINHOLE_PATH + save_fname + str(idx) + "_lm.pdf") plt.close()
def dlib_detector(frame_orig): frame = frame_orig.copy() gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = hog_detector(gray_frame) for face in faces: landmarks_coord = utils.get_landmarks(gray_frame, frame, face, annotate=args["landmarks"]) if args["delaunay_triangulation"]: utils.annotate_delaunay_triangulation(frame, landmarks_coord, line_thickness=2) if args["region_of_interest"]: utils.annotate_ROI(frame, landmarks_coord) if args["bounding_box"]: x, y, w, h = rect_to_bb(face) cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) return frame
def build_from_directory(self): print("Extracting KeyLandmarks Distances...") kl_distances = [] for dir_ in os.listdir(self.path): if dir_ in self.classes: print(f"processing {dir_} images...") for f in os.listdir(self.path + dir_ + "/"): if not dir_ + "/" + f in self.img_to_exclude: gray_img = cv2.imread(self.path + dir_ + "/" + f, 0) gray_img = cv2.resize(gray_img, (96,96)) faces = face_detector(gray_img) landmarks_coord = utils.get_landmarks(gray_img, gray_img, faces[0]) key_landmarks_coords = utils.get_keylandmarks_coords(landmarks_coord) all_kl_dists = utils.get_keylandmarks_distances(key_landmarks_coords) kl_distances.append(all_kl_dists) return np.array(kl_distances)
def dnn_detector(frame_orig): frame = frame_orig.copy() frame_height = frame.shape[0] frame_width = frame.shape[1] blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False) net.setInput(blob) detections = net.forward() bboxes = [] for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > conf_threshold: x1 = int(detections[0, 0, i, 3] * frame_width) y1 = int(detections[0, 0, i, 4] * frame_height) x2 = int(detections[0, 0, i, 5] * frame_width) y2 = int(detections[0, 0, i, 6] * frame_height) bboxes.append([x1, y1, x2, y2]) face = [x1, y1, x2 - x1, y2 - y1] gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) landmarks_coord = utils.get_landmarks(gray_frame, frame, utils.bb_to_rect(face), annotate=args["landmarks"]) if args["delaunay_triangulation"]: utils.annotate_delaunay_triangulation(frame, landmarks_coord, line_thickness=2) if args["region_of_interest"]: utils.annotate_ROI(frame, landmarks_coord) if args["bounding_box"]: cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) return frame
def main(args): # Get landmarks target points target_lmks = [] for target in args.targets: print("Extracting landmarks from target ", target) target_img = im2np(target) target = Tensor(detect_landmark(target_img)) if args.plotting: plt.imshow(target_img) lmks = target.detach().numpy() plt.scatter(lmks[:, 0], lmks[:, 1]) plt.show() target_lmks.append(torch_norm(target * -1)) # Get full pipeline model print("Init pipeline model for rendering") pipeline = Pipeline( renderer3D=Render3DPipe( basis=get_face_basis(h5py.File(args.prior, 'r'), args.size_id, args.size_exp), transform=FaceTransform(), ), rendererUV=RenderUVPipe( camera=Camera(args.fov, args.aratio, args.near_far), normalizer=UVNormalizer(), ), lmksPipe=LandmarkPipe(landmarks=get_landmarks(args.landmarks))) # Init Loss module print("Constructing full loss end-to-end pipeline") loss = FitLoss(pipeline=pipeline, L_lan=LandmarkLoss(), L_reg=RegularizationLoss(*args.reg)) # Init random latent variavbles print("Init latent variables") def init_latent(size): return Variable(trand(size) * 2 - 1, requires_grad=True) def set_latent(val): return Variable(Tensor(np.array(val)), requires_grad=True) alpha = init_latent(args.size_id) deltas = [] transforms = [] for _ in range(len(args.targets)): deltas.append(init_latent(args.size_exp)) transforms.append( (init_latent(3) if args.omega is None else set_latent(args.omega), init_latent(3) if args.t is None else set_latent(args.t))) # Init optimizer optim = Adam([alpha] + deltas + [i for transform in transforms for i in transform], lr=args.lr) # Fit latent parameters print("Starting to fit latent parameters") if args.plotting: for delta, transform, target in zip(deltas, transforms, target_lmks): _ = loss((alpha, delta), transform, target) plot_status(loss.pred.detach().numpy(), target.detach().numpy(), title="Initial Setting") epoch_bar = tqdm(range(args.epochs)) for epoch in epoch_bar: # Reset gradients optim.zero_grad() err_tot = 0 for delta, transform, target in zip(deltas, transforms, target_lmks): # Compute loss err = loss((alpha, delta), transform, target) # Backpropagate loss err.backward() err_tot += err.item() # Update estimate of latent variables optim.step() # Display results epoch_bar.set_description("err: %.3f" % (err_tot / len(args.targets))) if args.plotting: for delta, transform, target in zip(deltas, transforms, target_lmks): _ = loss((alpha, delta), transform, target) plot_status(loss.pred.detach().numpy(), target.detach().numpy(), title="Final Setting") with open(args.output, "wb") as f: pickle.dump(((alpha, deltas), transforms), f)
def main(unused_argv): """MAIN""" est_config = tf.estimator.RunConfig( save_checkpoints_steps=5000, # Save checkpoints every 100 steps. keep_checkpoint_max=10, # Retain the 10 most recent checkpoints. save_summary_steps=100, ) exporter = tf.estimator.BestExporter( serving_input_receiver_fn=model._serving_input_receiver_fn, exports_to_keep=5) train_spec = tf.estimator.TrainSpec(input_fn=model._train_input_fn, max_steps=TRAIN_MAX_STEPS) eval_spec = tf.estimator.EvalSpec(input_fn=model._eval_input_fn, steps=1000, throttle_secs=15 * 60, exporters=exporter) estimator = tf.estimator.Estimator(model_fn=model.cnn_model_fn, model_dir=MODEL_DIR, config=est_config) # Choose mode between Train, Evaluate and Predict mode_dict = { 'train': tf.estimator.ModeKeys.TRAIN, 'eval': tf.estimator.ModeKeys.EVAL, 'predict': tf.estimator.ModeKeys.PREDICT } mode = 'export' #mode_dict['export'] if mode == mode_dict['train']: tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) elif mode == mode_dict['eval']: evaluation = estimator.evaluate(input_fn=model._eval_input_fn) tf.print(evaluation) elif mode == "export": estimator.export_saved_model('%s/saved_model' % EXPORT_DIR, model._serving_input_receiver_fn) elif mode == mode_dict['predict']: predictions = estimator.predict(input_fn=model._predict_input_fn, yield_single_examples=False) for result in predictions: filename = result['name'][0].decode('ASCII') print("Evaluating %s" % filename) img = result['image'] #cv2.imread(filename) heatmaps = result['heatmap'] pts = get_landmarks(heatmaps[0][-1]) print("Landmark Points" % pts) for i, heatmap in enumerate(heatmaps): heatmap = np.sum(heatmap[0], axis=2) # heatmap = (heatmap / -255).astype(np.uint8) heatmap = (heatmap - heatmap.min()) / (heatmap.max() - heatmap.min()) heatmap = cv2.resize(heatmap, (256, 256)) cv2.imshow("%d" % i, heatmap) for pt in pts: cv2.circle(img[0], (int(pt[1]), int(pt[0])), 2, (0, 255, 0), -1, cv2.LINE_AA) cv2.imshow('result', img[0]) cv2.waitKey(0)
image_paths = glob(images_dir + "*.jpg") return image_paths if __name__ == "__main__": image_paths = load_images() predict_fn = load_model_predictor() for img_path in tqdm(image_paths): img = cv2.imread(img_path) img = cv2.resize(img, (256, 256)) predictions = predict_fn({"image": img, "name": img_path}) heatmaps = predictions['heatmap'] pts = get_landmarks(heatmaps[-1][0]) for pt in pts: cv2.circle(img, (int(pt[1]), int(pt[0])), 2, (0, 255, 0), -1, cv2.LINE_AA) for heatmap in heatmaps: heatmap = np.sum(heatmap[0], axis=2) # heatmap = (heatmap / -255).astype(np.uint8) heatmap = (heatmap - heatmap.min()) / (heatmap.max() - heatmap.min()) heatmap = cv2.resize(heatmap, (256, 256)) cv2.imshow("hmap", heatmap) cv2.imshow("result", img) cv2.waitKey(0)
def estimate_params(bfm_all, lms, lms_real, id_comps=30, exp_comps=20, reg_a=10., reg_d=10., hs=480., ws=640., steps=10000, lr=.1, threshold=1.): bfm_params, color, triangles = bfm_all lms_real = lms_real if isinstance(lms_real, list) else [lms_real] hs = hs if isinstance(hs, list) else [hs] ws = ws if isinstance(ws, list) else [ws] N = len(lms_real) deltas = [] for i in range(N): alpha, delta = morph.sample_alpha_delta(id_comps=id_comps, exp_comps=exp_comps) deltas.append(delta) alpha = Variable(alpha, requires_grad=True) deltas = [Variable(delta, requires_grad=True) for delta in deltas] rotations = [ Variable(torch.rand(3) * 2 - 1, requires_grad=True) for i in range(N) ] translations = [ Variable(torch.cat((torch.rand(2) * 2 - 1, torch.tensor([-500.]))), requires_grad=True) for i in range(N) ] optimizer = torch.optim.Adam([alpha] + deltas + rotations + translations, lr=lr) losses = [] print("Optimizing...") # optimize for the specified loss function for i in range(steps): optimizer.zero_grad() Gs = [ morph.compute_G(bfm_params, alpha=alpha, delta=delta) for delta in deltas ] Gs_pinhole = [ pinhole.camera_model(Gs[i], rotations[i], translations[i], h=hs[i], w=ws[i]) for i in range(N) ] lms_pred = [ utils.get_landmarks(G_pinhole[:, :2], lms) for G_pinhole in Gs_pinhole ] loss = loss_fn(lms_pred, lms_real, alpha, deltas, reg_a=reg_a, reg_d=reg_d) loss.backward() optimizer.step() losses.append(loss.item()) if i > 0 and losses[-2] - loss < threshold: # stop if difference with prev loss is less than threshold print(f"... stopping early at iteration {i}") break return alpha.detach(), [delta.detach() for delta in deltas], \ [rotation.detach() for rotation in rotations], \ [translation.detach() for translation in translations], \ losses
while cap.isOpened(): success, image = cap.read() if not success: print("Ignoring empty camera frame.") # If loading a video, use 'break' instead of 'continue'. continue # Flip the image horizontally for a later selfie-view display image = cv2.flip(image, 1) # search for faces in the image results = pose_detector.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # Draw the face detection annotations on the image. if results.pose_landmarks is not None: kpts = get_landmarks(results.pose_landmarks) lft, rgt = calc_arm_movement(kpts) mesg = json.dumps({'x':lft, 'y':rgt}) s.sendall(mesg.encode()) # Draw the pose annotations on the image. mp_drawing.draw_landmarks(image, results.pose_landmarks, mp.solutions.pose.POSE_CONNECTIONS) cv2.imshow('MediaPipe Pose Recognition', image) if cv2.waitKey(5) & 0xFF == 27: break cap.release()