def __init__(self, mode='gpu'): # load model checkpoint_fp = os.path.join(baseFolder, 'models/phase1_wpdc_vdc_v2.pth.tar') checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] self.model = getattr(mobilenet_v1, 'mobilenet_1')( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = self.model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] self.model.load_state_dict(model_dict) self.mode = mode if self.mode == 'gpu': cudnn.benchmark = True self.model = self.model.cuda() self.model.eval() # load dlib model for face detection and landmark for face cropping dlib_landmark_model = os.path.join( baseFolder, 'models/shape_predictor_68_face_landmarks.dat') self.face_regressor = dlib.shape_predictor(dlib_landmark_model) self.face_detector = dlib.get_frontal_face_detector() self.tri = sio.loadmat(os.path.join(baseFolder, 'visualize/tri.mat'))['tri'] self.transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
def extract_param(checkpoint_fp, root='', filelists=None, arch='mobilenet_1', num_classes=62, device_ids=[0], batch_size=128, num_workers=4): map_location = {f'cuda:{i}': 'cuda:0' for i in range(8)} checkpoint = torch.load(checkpoint_fp, map_location=map_location)['state_dict'] torch.cuda.set_device(device_ids[0]) model = getattr(mobilenet_v1, arch)(num_classes=num_classes) model = nn.DataParallel(model, device_ids=device_ids).cuda() model.load_state_dict(checkpoint) dataset = DDFATestDataset(filelists=filelists, root=root, transform=transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])) data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers) cudnn.benchmark = True model.eval() end = time.time() outputs = [] with torch.no_grad(): for _, inputs in enumerate(data_loader): inputs = inputs.cuda() output = model(inputs) for i in range(output.shape[0]): param_prediction = output[i].cpu().numpy().flatten() outputs.append(param_prediction) outputs = np.array(outputs, dtype=np.float32) print(f'Extracting params take {time.time() - end: .3f}s') return outputs
def get_landmark(path): device = torch.device("cuda") cap = cv2.VideoCapture(path) cudnn.benchmark = True smooth_flag = False ret, frame = cap.read() frame = resize_frame(frame, cfg.resize_size) height, width, _ = frame.shape face_detect = detector.SFDDetector(device, cfg.rect_model_path) face_landmark = FAN_landmark.FANLandmarks(device, cfg.landmark_model_path, cfg.detect_type) transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) model = mobilenet_v1.mobilenet(cfg.checkpoint_fp, num_classes=240) landmark_list = [] frame_list = [] while ret: frame = resize_frame(frame, cfg.resize_size) bbox = face_detect.extract(frame, cfg.thresh) if len(bbox) > 0: frame, landmarks = face_landmark.extract([frame, bbox]) roi = roi_box_from_landmark(landmarks[0]) img = crop_img(frame, roi) img = cv2.resize(img, dsize=(cfg.STD_SIZE, cfg.STD_SIZE), interpolation=cv2.INTER_LINEAR) input_data = transform(img).unsqueeze(0) with torch.no_grad(): param = model(input_data.cuda()) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) pts68 = predict_68pts(param, roi, img.shape[0]) if smooth_flag: show_frame, landmark_3d = get_smooth_data( frame, pts68.T, frame_list, landmark_list, cfg.list_size) else: show_frame = frame landmark_3d = pts68.T for x, y in landmark_3d[:, :2]: cv2.circle(show_frame, (int(x), int(y)), 1, (255, 255, 0)) cv2.imshow('3ddfa', show_frame) cv2.waitKey(1) else: print("cannot find face") continue ret, frame = cap.read() cap.release() cv2.destroyAllWindows()
def extract_param(checkpoint_fp, root='', filelists=None, num_classes=62, device_ids=[0], batch_size=1, num_workers=0): map_location = {f'cuda:{i}': 'cuda:0' for i in range(8)} # checkpoint = torch.load(checkpoint_fp, map_location=map_location)['state_dict'] torch.cuda.set_device(device_ids[0]) model = PRN(checkpoint_fp) # model = nn.DataParallel(model, device_ids=device_ids).cuda() # model.load_state_dict(checkpoint) dataset = DDFATestDataset(filelists=filelists, root=root, transform=transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])) data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers) cudnn.benchmark = True # model.eval() end = time.time() outputs = [] with torch.no_grad(): for _, inputs in enumerate(data_loader): inputs = inputs.cuda() # Get the output landmarks pos = model.net_forward(inputs) out = pos.cpu().detach().numpy() pos = np.squeeze(out) cropped_pos = pos * 255 pos = cropped_pos.transpose(1, 2, 0) if pos is None: continue # print(pos.shape) output = model.get_landmarks(pos) # print(output.shape) outputs.append(output) outputs = np.array(outputs, dtype=np.float32) print("outputs",outputs.shape) print(f'Extracting params take {time.time() - end: .3f}s') return outputs
def get_inputs(files: List[str]) -> Tuple[torch.Tensor, List[np.ndarray]]: """Get Inputs Parameters ---------- files: List[ str ] List of frames to process. Returns ------- inputs : torch.Tensor Frame cropped to the face. imgs_roi: List[ np.ndarray ] Frame face rectangles ( region of interest ). """ face_reg, face_det = get_face_model() transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) imgs_ori = [cv2.imread(file) for file in files] imgs_rect = [face_det(img)[0] for img in imgs_ori] imgs_pts = [ face_reg(imgs_ori[i], imgs_rect[i]).parts() for i in range(len(imgs_ori)) ] imgs_pts = [ np.array([[pt.x, pt.y] for pt in imgs_pts[i]]).T for i in range(len(imgs_ori)) ] imgs_roi = [parse_roi_box_from_landmark(pts) for pts in imgs_pts] imgs_cropped = [ crop_img(imgs_ori[i], imgs_roi[i]) for i in range(len(imgs_ori)) ] imgs_new = [ cv2.resize(img, dsize=(120, 120), interpolation=cv2.INTER_LINEAR) for img in imgs_cropped ] inputs = [transform(img).unsqueeze(0) for img in imgs_new] inputs = torch.cat(inputs, axis=0) return inputs, imgs_roi
def getFaceTextureCoords(textImag): checkpoint_fp = 'models/MobDenseNet.pth.tar' arch = 'densemobilenetv4_19' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(MobDenseNet, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) cudnn.benchmark = True model = model.cuda() model.eval() face_detector = dlib.get_frontal_face_detector() # 3. forward transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) img_ori = textImag rects = face_detector(img_ori, 1) for rect in rects: # - use detected face bbox bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()] roi_box = parse_roi_box_from_bbox(bbox) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) return pts68[[0, 1], :]
def detect_pose(img_ori, target, model): STD_SIZE = 120 pose_list = [] for rect in target: roi_box = rect[0:4] # print(roi_box) try: img = crop_img(img_ori, roi_box) except Exception as e: print(e) continue shape_img = img.shape if shape_img[0] < 100 or shape_img[1] < 100: continue if len(img) == 0: return pose_list try: img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) except Exception as e: print(e) continue transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) input = transform(img).unsqueeze(0) with torch.no_grad(): param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) pts68 = predict_68pts(param, roi_box) P, pose, pose_angle = parse_pose(param) # if shape_img[0]>100 and shape_img[1]>100 and np.abs(pose_angle[0]) <= 60 and np.abs(pose_angle[1]) <= 60 and np.abs(pose_angle[2])<= 60: # print(shape_img) # plt.imshow(img) # plt.show() pose_list.append(pose_angle) return pose_list
def r3fa_landmark(image,rect,model,predictor,imgScale): transform=transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) Ps=[] # Camera matrix collection pts_res=[] faceRectangle=rectangle(int(rect.left() / imgScale), int(rect.top() / imgScale), int(rect.right() / imgScale), int(rect.bottom() / imgScale)) # - use landmark for cropping pts=predictor(image, faceRectangle).parts() pts=np.array([[pt.x, pt.y] for pt in pts]).T roi_box=parse_roi_box_from_landmark(pts) # # - use detected face bbox # bbox=[rects.left(), rects.top(), rects.right(), rects.bottom()] # roi_box=parse_roi_box_from_bbox(bbox) img=crop_img(image, roi_box) # forward: one step img=cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input=transform(img).unsqueeze(0) with torch.no_grad(): input=input.cuda() param=model(input) param=param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68=predict_68pts(param, roi_box) pts_res.append(pts68) P, pose=parse_pose(param) Ps.append(P) # # 绘制landmark # for indx in range(68): # pos=(pts68[0, indx], pts68[1, indx]) # # pts.append(pos) # cv2.circle(img, pos, 3, color=(255, 255, 255), thickness=-1) # img_ori=plot_pose_box(img, Ps, pts_res) return Ps,pts68[[0,1],:]
def main(): parse_args() # parse global argsl # logging setup logging.basicConfig( format= '[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s', level=logging.INFO, handlers=[ logging.FileHandler(args.log_file, mode=args.log_mode), logging.StreamHandler() ]) print_args(args) # print args # step1: define the model structure model = getattr(mobilenet_v1, args.arch)(num_classes=args.num_classes) torch.cuda.set_device( args.devices_id[0] ) # fix bug for `ERROR: all tensors must be on devices[0]` model = nn.DataParallel(model, device_ids=args.devices_id).cuda() # -> GPU # step2: optimization: loss and optimization method # criterion = nn.MSELoss(size_average=args.size_average).cuda() if args.loss.lower() == 'wpdc': print(args.opt_style) criterion = WPDCLoss(opt_style=args.opt_style).cuda() logging.info('Use WPDC Loss') elif args.loss.lower() == 'vdc': criterion = VDCLoss(opt_style=args.opt_style).cuda() logging.info('Use VDC Loss') elif args.loss.lower() == 'pdc': criterion = nn.MSELoss(size_average=args.size_average).cuda() logging.info('Use PDC loss') else: raise Exception(f'Unknown Loss {args.loss}') optimizer = torch.optim.SGD(model.parameters(), lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) # step 2.1 resume if args.resume: if Path(args.resume).is_file(): logging.info(f'=> loading checkpoint {args.resume}') checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage)['state_dict'] # checkpoint = torch.load(args.resume)['state_dict'] model.load_state_dict(checkpoint) else: logging.info(f'=> no checkpoint found at {args.resume}') # step3: data normalize = NormalizeGjz(mean=127.5, std=128) # may need optimization train_dataset = DDFADataset(root=args.root, filelists=args.filelists_train, param_fp=args.param_fp_train, transform=transforms.Compose( [ToTensorGjz(), normalize])) val_dataset = DDFADataset(root=args.root, filelists=args.filelists_val, param_fp=args.param_fp_val, transform=transforms.Compose( [ToTensorGjz(), normalize])) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=True, pin_memory=True, drop_last=True) val_loader = DataLoader(val_dataset, batch_size=args.val_batch_size, num_workers=args.workers, shuffle=False, pin_memory=True) # step4: run cudnn.benchmark = True if args.test_initial: logging.info('Testing from initial') validate(val_loader, model, criterion, args.start_epoch) for epoch in range(args.start_epoch, args.epochs + 1): # adjust learning rate adjust_learning_rate(optimizer, epoch, args.milestones) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) filename = f'{args.snapshot}_checkpoint_epoch_{epoch}.pth.tar' save_checkpoint( { 'epoch': epoch, 'state_dict': model.state_dict(), # 'optimizer': optimizer.state_dict() }, filename) validate(val_loader, model, criterion, epoch)
def main(args): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) # 2. parse images list img_list = listdir(args.img_prefix) alignment_model = face_alignment.FaceAlignment( face_alignment.LandmarksType._2D, flip_input=False, device=args.mode) if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) for img_idx, img_fp in enumerate(tqdm(img_list)): img_ori = cv2.imread(os.path.join(args.img_prefix, img_fp)) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices ind = 0 suffix = get_suffix(img_fp) # face alignment model use RGB as input, result is a tuple with landmarks and boxes preds = alignment_model.get_landmarks(img_ori[:, :, ::-1]) try: pts_2d_68 = preds[0] except: continue roi_box = parse_roi_box_from_landmark(pts_2d_68.T) img = crop_img(img_ori, roi_box) # import pdb; pdb.set_trace() # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # two-step for more accurate bbox to crop face if args.bbox_init == 'two': roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_step2).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) pts68 = predict_68pts(param, roi_box) pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) # dense face 3d vertices vertices = predict_dense(param, roi_box) if args.dump_2d_img: wfp_2d_img = os.path.join(args.save_dir, os.path.basename(img_fp)) colors = get_colors(img_ori, vertices) # aligned_param = get_aligned_param(param) # vertices_aligned = predict_dense(aligned_param, roi_box) # h, w, c = 120, 120, 3 h, w, c = img_ori.shape img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1], h, w) cv2.imwrite(wfp_2d_img, img_2d[:, :, ::-1]) del img_ori del img del img_2d
model_dict = model.state_dict() for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. Function de detection des visages face_detector = dlib.get_frontal_face_detector() # 3. Detection tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) print('len(folder) :', len(folder)) for item in tqdm(folder): try: img_ori = cv2.imread(str(folder[item])) rects = face_detector(img_ori, 1) if len(rects) != 0: for rect in rects: bbox = [ rect.left(), rect.top(), rect.right(), rect.bottom()
model_dict = model.state_dict() for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. Fonction de détection des visages face_detector = dlib.get_frontal_face_detector() # 3. Détection tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) for item in folder: img_ori = cv2.imread(folder[item]) rects = face_detector(img_ori, 1) if len(rects) != 0: for rect in rects: bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()] roi_box = parse_roi_box_from_bbox(bbox) img = crop_img(img_ori, roi_box) img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda()
def main(args): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' app = RenderPipeline(**cfg) checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() face_detector = dlib.get_frontal_face_detector() # 3. forward tri = sio.loadmat('tri_refine.mat')['tri'] tri = _to_ctype(tri).astype(np.int32) # for type compatible transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) last_frame_lmks = [] vc = cv2.VideoCapture("C:\\Users\\zh13\\Videos\\TrackingTest.mov") success, frame = vc.read() while success: roi_box = [] if len(last_frame_lmks) == 0: rects = face_detector(frame, 1) for rect in rects: bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()] roi_box.append(parse_roi_box_from_bbox(bbox)) else: for lmk in last_frame_lmks: roi_box.append(parse_roi_box_from_landmark(lmk)) this_frame_lmk = [] params = [] for box in roi_box: img_to_net = crop_img(frame, box) img_to_net = cv2.resize(img_to_net, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_to_net).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) params.append(param) this_frame_lmk.append(predict_68pts(param, box)) last_frame_lmks = this_frame_lmk if args.render_mesh: for box, param in zip(roi_box, params): vertices = predict_dense(param, box) frame = app(_to_ctype(vertices.T), tri, _to_ctype(frame.astype(np.float32) / 255.)) else: for lmk in last_frame_lmks: for p in lmk.T: cv2.circle(frame, (int(round(p[0] * draw_multiplier)), int(round(p[1] * draw_multiplier))), draw_multiplier, (255, 0, 0), 1, cv2.LINE_AA, draw_shiftbits) cv2.imshow("3ddfa video demo", frame) cv2.waitKey(1) success, frame = vc.read()
def main(args): # 1. load pre-trained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. load pre-trained model uv-gan if args.uvgan: if args.checkpoint_uv_gan == "": print("Specify the path to checkpoint uv_gan") exit() uvgan = infer_uv_gan.UV_GAN(args.checkpoint_uv_gan) # 3. load dlib model for face detection and landmark used for face cropping if args.dlib_landmark: dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat' face_regressor = dlib.shape_predictor(dlib_landmark_model) if args.dlib_bbox: face_detector = dlib.get_frontal_face_detector() # 4. forward tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) for img_fp in args.files: img_ori = cv2.imread(img_fp) if args.dlib_bbox: rects = face_detector(img_ori, 1) else: rects = [] if len(rects) == 0: rects = dlib.rectangles() rect_fp = img_fp + '.bbox' lines = open(rect_fp).read().strip().split('\n')[1:] for l in lines: l, r, t, b = [int(_) for _ in l.split(' ')[1:]] rect = dlib.rectangle(l, r, t, b) rects.append(rect) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices ind = 0 suffix = get_suffix(img_fp) for rect in rects: # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping if args.dlib_landmark: # - use landmark for cropping pts = face_regressor(img_ori, rect).parts() pts = np.array([[pt.x, pt.y] for pt in pts]).T roi_box = parse_roi_box_from_landmark(pts) else: # - use detected face bbox bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()] roi_box = parse_roi_box_from_bbox(bbox) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # two-step for more accurate bbox to crop face if args.bbox_init == 'two': roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_step2).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) pts68 = predict_68pts(param, roi_box) pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) if args.dump_obj: vertices = predict_dense(param, roi_box) vertices_lst.append(vertices) wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind) colors = get_colors(img_ori, vertices) p, offset, alpha_shp, alpha_exp = _parse_param(param) vertices = (u + w_shp @ alpha_shp + w_exp @ alpha_exp).reshape( 3, -1, order='F') + offset vertices = vertices.T tri = tri.T - 1 print('Dump obj with sampled texture to {}'.format(wfp)) unwraps = create_unwraps(vertices) h, w = args.height, args.width tcoords = process_uv(unwraps[:, :2], h, w) texture = render_colors(tcoords, tri, colors, h, w, c=3).astype('uint8') scaled_tcoords = scale_tcoords(tcoords) if args.uvgan: texture = uvgan.infer(texture) else: texture = cv2.cvtColor(texture, cv2.COLOR_BGR2RGB) vertices, colors, uv_coords = vertices.astype( np.float32).copy(), colors.astype( np.float32).copy(), scaled_tcoords.astype( np.float32).copy() write_obj_with_colors_texture(wfp, vertices, colors, tri, texture * 255.0, uv_coords) ind += 1
def classify(model, inputs): in_img = inputs['photo'] img_ori = np.array(in_img) img_fp = 'samples/test1.jpg' face_detector = dlib.get_frontal_face_detector() # 3. forward tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) #print(transform) rects = face_detector(img_ori, 1) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices ind = 0 suffix = get_suffix(img_fp) for rect in rects: # - use detected face bbox bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()] roi_box = parse_roi_box_from_bbox(bbox) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) print(input) with torch.no_grad(): if mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # two-step for more accurate bbox to crop face if bbox_init == 'two': roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_step2).unsqueeze(0) with torch.no_grad(): if mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) pts68 = predict_68pts(param, roi_box) pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) vertices = predict_dense(param, roi_box) vertices_lst.append(vertices) ind += 1 pncc_feature = cpncc(img_ori, vertices_lst, tri - 1) output = pncc_feature[:, :, ::-1] print(type(output)) pilImg = transforms.ToPILImage()(np.uint8(output)) return {"image": pilImg}
def main(args): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. load dlib model for face detection and landmark used for face cropping if args.dlib_landmark: dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat' face_regressor = dlib.shape_predictor(dlib_landmark_model) if args.dlib_bbox: face_detector = dlib.get_frontal_face_detector() # 3. forward tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) for img_fp in args.files: img_ori = cv2.imread(img_fp) if args.dlib_bbox: rects = face_detector(img_ori, 1) else: rects = [] if len(rects) == 0: rects = dlib.rectangles() rect_fp = img_fp + '.bbox' try: lines = open(rect_fp).read().strip().split('\n')[1:] except FileNotFoundError: print('Cannot load bbox file') continue for l in lines: l, r, t, b = [int(_) for _ in l.split(' ')[1:]] rect = dlib.rectangle(l, r, t, b) rects.append(rect) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices ind = 0 suffix = get_suffix(img_fp) for rect in rects: # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping if args.dlib_landmark: # - use landmark for cropping pts = face_regressor(img_ori, rect).parts() pts = np.array([[pt.x, pt.y] for pt in pts]).T roi_box = parse_roi_box_from_landmark(pts) else: # - use detected face bbox bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()] roi_box = parse_roi_box_from_bbox(bbox) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # two-step for more accurate bbox to crop face if args.bbox_init == 'two': roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_step2).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) pts68 = predict_68pts(param, roi_box) pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) # dense face 3d vertices if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj: vertices = predict_dense(param, roi_box) vertices_lst.append(vertices) if args.dump_ply: dump_to_ply( vertices, tri, '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind)) if args.dump_vertex: dump_vertex( vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''), ind)) if args.dump_pts: wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind) np.savetxt(wfp, pts68, fmt='%.3f') print('Save 68 3d landmarks to {}'.format(wfp)) if args.dump_roi_box: wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind) np.savetxt(wfp, roi_box, fmt='%.3f') print('Save roi box to {}'.format(wfp)) if args.dump_paf: wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''), ind) wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''), ind) paf_feature = gen_img_paf(img_crop=img, param=param, kernel_size=args.paf_size) cv2.imwrite(wfp_paf, paf_feature) cv2.imwrite(wfp_crop, img) print('Dump to {} and {}'.format(wfp_crop, wfp_paf)) if args.dump_obj: wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind) colors = get_colors(img_ori, vertices) write_obj_with_colors(wfp, vertices, tri, colors) print('Dump obj with sampled texture to {}'.format(wfp)) ind += 1 if args.dump_pose: # P, pose = parse_pose(param) # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify) img_pose = plot_pose_box(img_ori, Ps, pts_res) wfp = img_fp.replace(suffix, '_pose.jpg') cv2.imwrite(wfp, img_pose) print('Dump to {}'.format(wfp)) if args.dump_depth: wfp = img_fp.replace(suffix, '_depth.png') # depths_img = get_depths_image(img_ori, vertices_lst, tri-1) # python version depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1) # cython version cv2.imwrite(wfp, depths_img) print('Dump to {}'.format(wfp)) if args.dump_pncc: wfp = img_fp.replace(suffix, '_pncc.png') pncc_feature = cpncc(img_ori, vertices_lst, tri - 1) # cython version cv2.imwrite( wfp, pncc_feature[:, :, ::-1]) # cv2.imwrite will swap RGB -> BGR print('Dump to {}'.format(wfp)) if args.dump_res: draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=args.show_flg)
def main(args): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. load dlib model for face detection and landmark used for face cropping if args.dlib_landmark: dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat' face_regressor = dlib.shape_predictor(dlib_landmark_model) if args.dlib_bbox: face_detector = dlib.get_frontal_face_detector() # 3. forward tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) files = sorted( glob.glob(os.path.join(args.folder, '*.jpg')) + glob.glob(os.path.join(args.folder, '*.png'))) p_bar = tqdm(total=len(files)) for img_fp in files: img_ori = cv2.imread(img_fp) if args.dlib_bbox: rects = face_detector(img_ori, 1) else: rects = [] if len(rects) == 0: rects = dlib.rectangles() rect_fp = img_fp + '.bbox' lines = open(rect_fp).read().strip().split('\n')[1:] for l in lines: l, r, t, b = [int(_) for _ in l.split(' ')[1:]] rect = dlib.rectangle(l, r, t, b) rects.append(rect) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices ind = 0 suffix = get_suffix(img_fp) for rect in rects: # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping if args.dlib_landmark: # - use landmark for cropping pts = face_regressor(img_ori, rect).parts() pts = np.array([[pt.x, pt.y] for pt in pts]).T roi_box = parse_roi_box_from_landmark(pts) else: # - use detected face bbox bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()] roi_box = parse_roi_box_from_bbox(bbox) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input_ = transform(img).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input_ = input_.cuda() param = model(input_) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # two-step for more accurate bbox to crop face if args.bbox_init == 'two': roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input_ = transform(img_step2).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input_ = input_.cuda() param = model(input_) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) pts68 = predict_68pts(param, roi_box) pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) points = np.array(pts_res)[0].T rotated = eulerAnglesToRotationMatrix(np.array([0., np.pi, 0.])) points = points.dot(rotated) scaler = MinMaxScaler(feature_range=(-1., 1)) scaled_points = scaler.fit_transform(points) points = scaled_points f_name = img_fp.replace(args.folder + '/', '').replace('.png', '').replace('.jpg', '') np.save('./results/{}.npy'.format(f_name), points.reshape(-1)) if args.plot: plot_face(points, img_fp) if args.show_flg: plt.show() else: plt.savefig('./results/{}.png'.format(f_name)) p_bar.update(1)
def test_video(args): start_time = time.time() x = 1 # displays the frame rate every 1 second counter = 0 # 1. load pre-tained model # checkpoint_fp='models/phase1_wpdc_vdc_v2.pth.tar' # arch='mobilenet_1' checkpoint_fp = 'models/MobDenseNet.pth.tar' arch = 'mobdensenet_v1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(MobDenseNet, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. load dlib model for face detection and landmark used for face cropping if args.dlib_landmark: dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat' face_regressor = dlib.shape_predictor(dlib_landmark_model) if args.dlib_bbox: face_detector = dlib.get_frontal_face_detector() # 3. forward tri = sio.loadmat('visualize/tri.mat')['tri'] - 1 tri_pts68 = sio.loadmat('visualize/pats68_tri.mat')['tri'] textureImg = cv2.imread(image_name) cameraImg = cap.read()[1] # textureCoords=df.getFaceTextureCoords(textureImg) # drawface=Drawing3DFace.Draw3DFace(cameraImg,textureImg,textureCoords,tri_pts68.T) transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) while True: # get a frame img_ori = cap.read()[1] imgScale = 1 scaledImg = img_ori if max(img_ori.shape) > maxImgSizeForDetection: imgScale = maxImgSizeForDetection / float(max(img_ori.shape)) scaledImg = cv2.resize(img_ori, (int(img_ori.shape[1] * imgScale), int(img_ori.shape[0] * imgScale))) rects = face_detector(scaledImg, 1) Ps = [] # Camera matrix collection poses = [] # pose collection pts_res = [] # suffix=get_suffix(img_ori) for rect in rects: if args.dlib_landmark: faceRectangle = rectangle(int(rect.left() / imgScale), int(rect.top() / imgScale), int(rect.right() / imgScale), int(rect.bottom() / imgScale)) # - use landmark for cropping pts = face_regressor(img_ori, faceRectangle).parts() pts = np.array([[pt.x, pt.y] for pt in pts]).T roi_box = parse_roi_box_from_landmark(pts) else: bbox = [ int(rect.left() / imgScale), int(rect.top() / imgScale), int(rect.right() / imgScale), int(rect.bottom() / imgScale) ] roi_box = parse_roi_box_from_bbox(bbox) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # df.triDelaunay(pts68) densePts = predict_dense(param, roi_box) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) # two-step for more accurate bbox to crop face if args.bbox_init == 'two': roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_step2).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) pts68 = predict_68pts(param, roi_box) pts_res.append(pts68) pts = [] #draw landmark for indx in range(68): pos = (pts68[0, indx], pts68[1, indx]) pts.append(pos) cv2.circle(img_ori, pos, 3, color=(255, 255, 255), thickness=-1) ##draw pose box if args.dump_pose: img_ori = plot_pose_box(img_ori, Ps, pts_res) #draw face mesh if args.dump_2D_face_mesh: img_ori = df.drawMesh(img_ori, densePts.T, tri.T) if args.dump_3D_face_mesh: pass # img=drawface.render(pts68) cv2.imshow("faceDetector", img_ori) counter += 1 if (time.time() - start_time) > x: print("FPS: ", counter / (time.time() - start_time)) counter = 0 start_time = time.time() if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def run(use_gpu=False, use_two_step_bbox_init=False, is_dump_to_ply=False, is_dump_vertex=False, is_dump_pts=False, is_dump_roi_box=False, is_dump_paf=False, is_dump_obj=False, is_dump_pose=False, is_dump_depth=False, is_dump_pncc=False, is_dump_res=False, show_landmarks_fig=False, paf_size=3): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if use_gpu: cudnn.benchmark = True model = model.cuda() model.eval() # 2. load dlib model for face detection and landmark used for face cropping dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat' face_regressor = dlib.shape_predictor(dlib_landmark_model) face_detector = dlib.get_frontal_face_detector() # 3. forward tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) def process_one_frame(frame): if frame is None: print("No frame, check your camera") img_ori = frame img_fp = "camera.png" # 随便给一个filename 反正之后用不到 rects = face_detector(img_ori, 1) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices ind = 0 suffix = get_suffix(img_fp) for i in range(len(rects)): if i != 0: break # 这里只检测第一个脸 rect = rects[i] # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping # - use landmark for cropping pts = face_regressor(img_ori, rect).parts() pts = np.array([[pt.x, pt.y] for pt in pts]).T roi_box = parse_roi_box_from_landmark(pts) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): if use_gpu: input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # two-step for more accurate bbox to crop face if use_two_step_bbox_init: roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_step2).unsqueeze(0) with torch.no_grad(): if use_gpu: input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype( np.float32) pts68 = predict_68pts(param, roi_box) pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) # dense face 3d vertices vertices = predict_dense(param, roi_box) vertices_lst.append(vertices) if is_dump_to_ply: dump_to_ply( vertices, tri, '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind)) if is_dump_vertex: dump_vertex( vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''), ind)) if is_dump_pts: wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind) np.savetxt(wfp, pts68, fmt='%.3f') print('Save 68 3d landmarks to {}'.format(wfp)) if is_dump_roi_box: wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind) np.savetxt(wfp, roi_box, fmt='%.3f') print('Save roi box to {}'.format(wfp)) if is_dump_paf: wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''), ind) wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''), ind) paf_feature = gen_img_paf(img_crop=img, param=param, kernel_size=paf_size) cv2.imwrite(wfp_paf, paf_feature) cv2.imwrite(wfp_crop, img) print('Dump to {} and {}'.format(wfp_crop, wfp_paf)) if is_dump_obj: wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind) colors = get_colors(img_ori, vertices) write_obj_with_colors(wfp, vertices, tri, colors) print('Dump obj with sampled texture to {}'.format(wfp)) ind += 1 if is_dump_pose: # P, pose = parse_pose(param) # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify) img_pose = plot_pose_box(img_ori, Ps, pts_res) wfp = img_fp.replace(suffix, '_pose.jpg') cv2.imwrite(wfp, img_pose) print('Dump to {}'.format(wfp)) if is_dump_depth: wfp = img_fp.replace(suffix, '_depth.png') # depths_img = get_depths_image(img_ori, vertices_lst, tri-1) # python version depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1) # cython version cv2.imwrite(wfp, depths_img) print('Dump to {}'.format(wfp)) if is_dump_pncc: wfp = img_fp.replace(suffix, '_pncc.png') pncc_feature = cpncc(img_ori, vertices_lst, tri - 1) # cython version cv2.imwrite( wfp, pncc_feature[:, :, ::-1]) # cv2.imwrite will swap RGB -> BGR print('Dump to {}'.format(wfp)) if is_dump_res: draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=show_landmarks_fig) # 下面这一句将原始图像关掉 img_ori = np.ones_like(img_ori) * 255 img_pose = plot_pose_box(img_ori, Ps, pts_res) draw_landmarks_opencv(img_pose, pts_res) cv2.imshow("pose", img_pose) # img_pncc = cpncc(img_ori, vertices_lst, tri - 1) # depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1) # cv2.imwrite("temp.png", img_pose) # cv2.imshow("pncc",img_pncc) # cv2.imshow("depths",depths_img) cap = cv2.VideoCapture(0) while cv2.waitKey(1): ret, frame = cap.read() process_one_frame(frame)
def main(args): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) # 2. parse images list with open(args.img_list) as f: img_list = [x.strip() for x in f.readlines()] landmark_list = [] alignment_model = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False) if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) if not os.path.exists(args.save_lmk_dir): os.mkdir(args.save_lmk_dir) for img_idx, img_fp in enumerate(tqdm(img_list)): img_ori = cv2.imread(os.path.join(args.img_prefix, img_fp)) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices ind = 0 suffix = get_suffix(img_fp) # face alignment model use RGB as input, result is a tuple with landmarks and boxes, cv2读取图片的书序是 BGR preds = alignment_model.get_landmarks(img_ori[:, :, ::-1]) pts_2d_68 = preds[0] pts_2d_5 = get_5lmk_from_68lmk(pts_2d_68) landmark_list.append(pts_2d_5) roi_box = parse_roi_box_from_landmark(pts_2d_68.T) # 根据68个关键点,确定roi区域 img = crop_img(img_ori, roi_box) # import pdb; pdb.set_trace() # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) # 人脸的RT矩阵,形状和表情的系数 param = param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # 此时pts68是一个68*3的顶点坐标列表 # two-step for more accurate bbox to crop face if args.bbox_init == 'two': roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_step2).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) pts68 = predict_68pts(param, roi_box) pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) # dense face 3d vertices vertices = predict_dense(param, roi_box) if args.dump_2d_img: # 人脸区域的2d图像 wfp_2d_img = os.path.join(args.save_dir, os.path.basename(img_fp)) colors = get_colors(img_ori, vertices) # aligned_param = get_aligned_param(param) # vertices_aligned = predict_dense(aligned_param, roi_box) # h, w, c = 120, 120, 3 h, w, c = img_ori.shape img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1], h, w) cv2.imwrite(wfp_2d_img, img_2d[:, :, ::-1]) if args.dump_param: split = img_fp.split('/') save_name = os.path.join(args.save_dir, '{}.txt'.format(os.path.splitext(split[-1])[0])) this_param = param * param_std + param_mean this_param = np.concatenate((this_param, roi_box)) this_param.tofile(save_name, sep=' ') if args.dump_lmk: # 存储 save_path = os.path.join(args.save_lmk_dir, 'realign_lmk') with open(save_path, 'w') as f: for idx, (fname, land) in enumerate(zip(img_list, landmark_list)): # f.write('{} {} {} {}') land = land.astype(np.int) land_str = ' '.join([str(x) for x in land]) msg = f'{fname} {idx} {land_str}\n' f.write(msg)
def get_landmark_2d(root, image_path): # 0.read image img_ori = cv2.imread(os.path.join(root, image_path)) # 1. load pre-tained model checkpoint_fp = 'models/MobDenseNet.pth.tar' arch = 'mobdensenet_v1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(MobDenseNet, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() # 2. load dlib model for face detection and landmark used for face cropping if args.dlib_landmark: dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat' face_regressor = dlib.shape_predictor(dlib_landmark_model) if args.dlib_bbox: face_detector = dlib.get_frontal_face_detector() # 3. forward tri = sio.loadmat('visualize/tri.mat')['tri'] - 1 transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) imgScale = 1 scaledImg = img_ori if max(img_ori.shape) > maxImgSizeForDetection: imgScale = maxImgSizeForDetection / float(max(img_ori.shape)) scaledImg = cv2.resize(img_ori, (int( img_ori.shape[1] * imgScale), int(img_ori.shape[0] * imgScale))) rects = face_detector(scaledImg, 1) for rect in rects: if args.dlib_landmark: faceRectangle = rectangle(int(rect.left() / imgScale), int(rect.top() / imgScale), int(rect.right() / imgScale), int(rect.bottom() / imgScale)) # - use landmark for cropping pts = face_regressor(img_ori, faceRectangle).parts() pts = np.array([[pt.x, pt.y] for pt in pts]).T roi_box = parse_roi_box_from_landmark(pts) else: bbox = [ int(rect.left() / imgScale), int(rect.top() / imgScale), int(rect.right() / imgScale), int(rect.bottom() / imgScale) ] roi_box = parse_roi_box_from_bbox(bbox) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): if args.mode == 'gpu': input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) return pts68
def run(image): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) model.eval() # 2. load dlib model for face detection and landmark used for face cropping dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat' face_regressor = dlib.shape_predictor(dlib_landmark_model) face_detector = dlib.get_frontal_face_detector() # 3. forward tri = sio.loadmat('tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) img_ori = cv2.imread(image) # img_ori = img_ori[:, :, [2, 1, 0]] rects = face_detector(img_ori, 1) # if len(rects) == 0: # rects = dlib.rectangles() # rect_fp = img_fp + '.bbox' # lines = open(rect_fp).read().strip().split('\n')[1:] # for l in lines: # l, r, t, b = [int(_) for _ in l.split(' ')[1:]] # rect = dlib.rectangle(l, r, t, b) # rects.append(rect) pts_res = [] Ps = [] # Camera matrix collection poses = [] # pose collection, [todo: validate it] vertices_lst = [] # store multiple face vertices ind = 0 suffix = get_suffix(image) for rect in rects: # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping # - use landmark for cropping pts = face_regressor(img_ori, rect).parts() pts = np.array([[pt.x, pt.y] for pt in pts]).T roi_box = parse_roi_box_from_landmark(pts) img = crop_img(img_ori, roi_box) # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) # two-step for more accurate bbox to crop face pts_res.append(pts68) P, pose = parse_pose(param) Ps.append(P) poses.append(pose) vertices = predict_dense(param, roi_box) vertices_lst.append(vertices) # dense face 3d vertices wfp = './output/obj_{}.obj'.format(image.split('/')[-1]) colors = get_colors(img_ori, vertices) write_obj_with_colors(wfp, vertices, tri, colors) print('Dump obj with sampled texture to {}'.format(wfp)) ind += 1
def getPoses(img_ori): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)(num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) cudnn.benchmark = True model = model.cuda() model.eval() tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) alignment_model = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False,device='cuda') # face alignment model use RGB as input, result is a tuple with landmarks and boxes preds = alignment_model.get_landmarks(img_ori[:, :, ::-1]) pts_2d_68 = preds[0] roi_box = parse_roi_box_from_landmark(pts_2d_68.T) img = crop_img(img_ori, roi_box) # import pdb; pdb.set_trace() # forward: one step img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img).unsqueeze(0) with torch.no_grad(): input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) # 68 pts pts68 = predict_68pts(param, roi_box) roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(img_ori, roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) input = transform(img_step2).unsqueeze(0) with torch.no_grad(): input = input.cuda() param = model(input) param = param.squeeze().cpu().numpy().flatten().astype(np.float32) P, pose = parse_pose(param) # dense face 3d vertices vertices = predict_dense(param, roi_box) colors = get_colors(img_ori, vertices) # aligned_param = get_aligned_param(param) # vertices_aligned = predict_dense(aligned_param, roi_box) # h, w, c = 120, 120, 3 h, w, c = img_ori.shape img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1], h, w) img_2d = img_2d[:,:,::-1] return img_2d, pose
def main(args): # 1. load pre-tained model checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar' arch = 'mobilenet_1' checkpoint = torch.load( checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict'] model = getattr(mobilenet_v1, arch)( num_classes=62) # 62 = 12(pose) + 40(shape) +10(expression) model_dict = model.state_dict() # because the model is trained by multiple gpus, prefix module should be removed for k in checkpoint.keys(): model_dict[k.replace('module.', '')] = checkpoint[k] model.load_state_dict(model_dict) if args.mode == 'gpu': cudnn.benchmark = True model = model.cuda() model.eval() tri = sio.loadmat('visualize/tri.mat')['tri'] transform = transforms.Compose( [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]) if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) # 2. parse images list and landmark lmk_file = args.lmk_file ts = time.time() rank_land, rank_img_list, start, end = parse_quality_list_part( lmk_file, args.world_size, args.rank, args.resume_idx) print('parse land file in {:.3f} seconds'.format(time.time() - ts)) # for batch processing print('World size {}, rank {}, start from {}, end with {}'.format( args.world_size, args.rank, start, end)) dataset = McDataset(rank_img_list, rank_land, transform=transform, std_size=STD_SIZE) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=2, pin_memory=True) for img_idx, (inputs, ori_imgs, img_fps, roi_boxes) in enumerate(tqdm(dataloader)): # forward: one step with torch.no_grad(): if args.mode == 'gpu': inputs = inputs.cuda() params = model(inputs) params = params.cpu().numpy() roi_boxes = roi_boxes.numpy() outputs_roi_boxes = roi_boxes if args.bbox_init == 'two': step_two_ori_imgs = [] step_two_roi_boxes = [] ori_imgs = ori_imgs.numpy() for ii in range(params.shape[0]): # 68 pts pts68 = predict_68pts(params[ii], roi_boxes[ii]) # two-step for more accurate bbox to crop face roi_box = parse_roi_box_from_landmark(pts68) img_step2 = crop_img(ori_imgs[ii], roi_box) img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR) # input = transform(img_step2).unsqueeze(0) step_two_ori_imgs.append(transform(img_step2)) step_two_roi_boxes.append(roi_box) with torch.no_grad(): step_two_ori_imgs = torch.stack(step_two_ori_imgs, dim=0) inputs = step_two_ori_imgs if args.mode == 'gpu': inputs = inputs.cuda() params = model(inputs) params = params.cpu().numpy() outputs_roi_boxes = step_two_roi_boxes # dump results if args.dump_param: for img_fp, param, roi_box in zip(img_fps, params, outputs_roi_boxes): split = img_fp.split('/') save_name = os.path.join( args.save_dir, '{}.txt'.format(os.path.splitext(split[-1])[0])) this_param = param * param_std + param_mean this_param = np.concatenate((this_param, roi_box)) this_param.tofile(save_name, sep=' ')