Example #1
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    # 2. load dlib model for face detection and landmark used for face cropping
    if args.dlib_landmark:
        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
        face_regressor = dlib.shape_predictor(dlib_landmark_model)
    if args.dlib_bbox:
        face_detector = dlib.get_frontal_face_detector()

    # 3. forward
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    for img_fp in args.files:
        img_ori = cv2.imread(img_fp)
        if args.dlib_bbox:
            rects = face_detector(img_ori, 1)
        else:
            rects = []

        if len(rects) == 0:
            rects = dlib.rectangles()
            rect_fp = img_fp + '.bbox'
            try:
                lines = open(rect_fp).read().strip().split('\n')[1:]
            except FileNotFoundError:
                print('Cannot load bbox file')
                continue
            for l in lines:
                l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
                rect = dlib.rectangle(l, r, t, b)
                rects.append(rect)

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)
        for rect in rects:
            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
            if args.dlib_landmark:
                # - use landmark for cropping
                pts = face_regressor(img_ori, rect).parts()
                pts = np.array([[pt.x, pt.y] for pt in pts]).T
                roi_box = parse_roi_box_from_landmark(pts)
            else:
                # - use detected face bbox
                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
                roi_box = parse_roi_box_from_bbox(bbox)

            img = crop_img(img_ori, roi_box)

            # forward: one step
            img = cv2.resize(img,
                             dsize=(STD_SIZE, STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
            input = transform(img).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)

            # 68 pts
            pts68 = predict_68pts(param, roi_box)

            # two-step for more accurate bbox to crop face
            if args.bbox_init == 'two':
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(img_ori, roi_box)
                img_step2 = cv2.resize(img_step2,
                                       dsize=(STD_SIZE, STD_SIZE),
                                       interpolation=cv2.INTER_LINEAR)
                input = transform(img_step2).unsqueeze(0)
                with torch.no_grad():
                    if args.mode == 'gpu':
                        input = input.cuda()
                    param = model(input)
                    param = param.squeeze().cpu().numpy().flatten().astype(
                        np.float32)

                pts68 = predict_68pts(param, roi_box)

            pts_res.append(pts68)
            P, pose = parse_pose(param)
            Ps.append(P)
            poses.append(pose)

            # dense face 3d vertices
            if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj:
                vertices = predict_dense(param, roi_box)
                vertices_lst.append(vertices)
            if args.dump_ply:
                dump_to_ply(
                    vertices, tri,
                    '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind))
            if args.dump_vertex:
                dump_vertex(
                    vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''),
                                                 ind))
            if args.dump_pts:
                wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, pts68, fmt='%.3f')
                print('Save 68 3d landmarks to {}'.format(wfp))
            if args.dump_roi_box:
                wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, roi_box, fmt='%.3f')
                print('Save roi box to {}'.format(wfp))
            if args.dump_paf:
                wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''),
                                                 ind)
                wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''),
                                                   ind)
                paf_feature = gen_img_paf(img_crop=img,
                                          param=param,
                                          kernel_size=args.paf_size)

                cv2.imwrite(wfp_paf, paf_feature)
                cv2.imwrite(wfp_crop, img)
                print('Dump to {} and {}'.format(wfp_crop, wfp_paf))
            if args.dump_obj:
                wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
                colors = get_colors(img_ori, vertices)
                write_obj_with_colors(wfp, vertices, tri, colors)
                print('Dump obj with sampled texture to {}'.format(wfp))
            ind += 1

        if args.dump_pose:
            # P, pose = parse_pose(param)  # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
            img_pose = plot_pose_box(img_ori, Ps, pts_res)
            wfp = img_fp.replace(suffix, '_pose.jpg')
            cv2.imwrite(wfp, img_pose)
            print('Dump to {}'.format(wfp))
        if args.dump_depth:
            wfp = img_fp.replace(suffix, '_depth.png')
            # depths_img = get_depths_image(img_ori, vertices_lst, tri-1)  # python version
            depths_img = cget_depths_image(img_ori, vertices_lst,
                                           tri - 1)  # cython version
            cv2.imwrite(wfp, depths_img)
            print('Dump to {}'.format(wfp))
        if args.dump_pncc:
            wfp = img_fp.replace(suffix, '_pncc.png')
            pncc_feature = cpncc(img_ori, vertices_lst,
                                 tri - 1)  # cython version
            cv2.imwrite(
                wfp,
                pncc_feature[:, :, ::-1])  # cv2.imwrite will swap RGB -> BGR
            print('Dump to {}'.format(wfp))
        if args.dump_res:
            draw_landmarks(img_ori,
                           pts_res,
                           wfp=img_fp.replace(suffix, '_3DDFA.jpg'),
                           show_flg=args.show_flg)
Example #2
0
def getPoses(img_ori):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    cudnn.benchmark = True
    model = model.cuda()
    model.eval()

    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    alignment_model = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False,device='cuda')

    # face alignment model use RGB as input, result is a tuple with landmarks and boxes
    preds = alignment_model.get_landmarks(img_ori[:, :, ::-1])
    pts_2d_68 = preds[0]
    roi_box = parse_roi_box_from_landmark(pts_2d_68.T)

    img = crop_img(img_ori, roi_box)
    # import pdb; pdb.set_trace()

    # forward: one step
    img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
    input = transform(img).unsqueeze(0)
    with torch.no_grad():
        input = input.cuda()
        param = model(input)
        param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

    # 68 pts
    pts68 = predict_68pts(param, roi_box)

    roi_box = parse_roi_box_from_landmark(pts68)
    img_step2 = crop_img(img_ori, roi_box)
    img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
    input = transform(img_step2).unsqueeze(0)
    with torch.no_grad():
        input = input.cuda()
        param = model(input)
        param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

    P, pose = parse_pose(param)        
    # dense face 3d vertices
    vertices = predict_dense(param, roi_box)
    colors = get_colors(img_ori, vertices)
    # aligned_param = get_aligned_param(param)
    # vertices_aligned = predict_dense(aligned_param, roi_box)
    # h, w, c = 120, 120, 3
    h, w, c = img_ori.shape
    img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1], h, w)
    img_2d = img_2d[:,:,::-1]
        
    return img_2d, pose
Example #3
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    # 2. parse images list
    img_list = listdir(args.img_prefix)

    alignment_model = face_alignment.FaceAlignment(
        face_alignment.LandmarksType._2D, flip_input=False, device=args.mode)

    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)

    for img_idx, img_fp in enumerate(tqdm(img_list)):
        img_ori = cv2.imread(os.path.join(args.img_prefix, img_fp))

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)

        # face alignment model use RGB as input, result is a tuple with landmarks and boxes
        preds = alignment_model.get_landmarks(img_ori[:, :, ::-1])
        try:
            pts_2d_68 = preds[0]
        except:
            continue
        roi_box = parse_roi_box_from_landmark(pts_2d_68.T)

        img = crop_img(img_ori, roi_box)
        # import pdb; pdb.set_trace()

        # forward: one step
        img = cv2.resize(img,
                         dsize=(STD_SIZE, STD_SIZE),
                         interpolation=cv2.INTER_LINEAR)
        input = transform(img).unsqueeze(0)
        with torch.no_grad():
            if args.mode == 'gpu':
                input = input.cuda()
            param = model(input)
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        # 68 pts
        pts68 = predict_68pts(param, roi_box)

        # two-step for more accurate bbox to crop face
        if args.bbox_init == 'two':
            roi_box = parse_roi_box_from_landmark(pts68)
            img_step2 = crop_img(img_ori, roi_box)
            img_step2 = cv2.resize(img_step2,
                                   dsize=(STD_SIZE, STD_SIZE),
                                   interpolation=cv2.INTER_LINEAR)
            input = transform(img_step2).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)

            pts68 = predict_68pts(param, roi_box)

        pts_res.append(pts68)
        P, pose = parse_pose(param)
        Ps.append(P)
        poses.append(pose)

        # dense face 3d vertices
        vertices = predict_dense(param, roi_box)

        if args.dump_2d_img:
            wfp_2d_img = os.path.join(args.save_dir, os.path.basename(img_fp))
            colors = get_colors(img_ori, vertices)
            # aligned_param = get_aligned_param(param)
            # vertices_aligned = predict_dense(aligned_param, roi_box)
            # h, w, c = 120, 120, 3
            h, w, c = img_ori.shape
            img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1],
                                    h, w)
            cv2.imwrite(wfp_2d_img, img_2d[:, :, ::-1])
        del img_ori
        del img
        del img_2d
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    # 2. parse images list 
    with open(args.img_list) as f:
        img_list = [x.strip() for x in f.readlines()]
    landmark_list = []

    alignment_model = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False)

    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)
    if not os.path.exists(args.save_lmk_dir):
        os.mkdir(args.save_lmk_dir)

    for img_idx, img_fp in enumerate(tqdm(img_list)):
        img_ori = cv2.imread(os.path.join(args.img_prefix, img_fp))

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)

        # face alignment model use RGB as input, result is a tuple with landmarks and boxes, cv2读取图片的书序是 BGR
        preds = alignment_model.get_landmarks(img_ori[:, :, ::-1])
        pts_2d_68 = preds[0]
        pts_2d_5 = get_5lmk_from_68lmk(pts_2d_68)
        landmark_list.append(pts_2d_5)
        roi_box = parse_roi_box_from_landmark(pts_2d_68.T)      # 根据68个关键点,确定roi区域

        img = crop_img(img_ori, roi_box)
        # import pdb; pdb.set_trace()

        # forward: one step
        img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
        input = transform(img).unsqueeze(0)
        with torch.no_grad():
            if args.mode == 'gpu':
                input = input.cuda()
            param = model(input)                        # 人脸的RT矩阵,形状和表情的系数
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        # 68 pts
        pts68 = predict_68pts(param, roi_box)                   # 此时pts68是一个68*3的顶点坐标列表

        # two-step for more accurate bbox to crop face
        if args.bbox_init == 'two':
            roi_box = parse_roi_box_from_landmark(pts68)
            img_step2 = crop_img(img_ori, roi_box)
            img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
            input = transform(img_step2).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

            pts68 = predict_68pts(param, roi_box)

        pts_res.append(pts68)
        P, pose = parse_pose(param)
        Ps.append(P)
        poses.append(pose)

        # dense face 3d vertices
        vertices = predict_dense(param, roi_box)

        if args.dump_2d_img:                # 人脸区域的2d图像
            wfp_2d_img = os.path.join(args.save_dir, os.path.basename(img_fp))
            colors = get_colors(img_ori, vertices)
            # aligned_param = get_aligned_param(param)
            # vertices_aligned = predict_dense(aligned_param, roi_box)
            # h, w, c = 120, 120, 3
            h, w, c = img_ori.shape
            img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1], h, w)
            cv2.imwrite(wfp_2d_img, img_2d[:, :, ::-1])
        if args.dump_param:
            split = img_fp.split('/')
            save_name = os.path.join(args.save_dir, '{}.txt'.format(os.path.splitext(split[-1])[0]))
            this_param = param * param_std + param_mean
            this_param = np.concatenate((this_param, roi_box))
            this_param.tofile(save_name, sep=' ')
    if args.dump_lmk:                   # 存储
        save_path = os.path.join(args.save_lmk_dir, 'realign_lmk')
        with open(save_path, 'w') as f:
            for idx, (fname, land) in enumerate(zip(img_list, landmark_list)):
                # f.write('{} {} {} {}')
                land = land.astype(np.int)
                land_str = ' '.join([str(x) for x in land])
                msg = f'{fname} {idx} {land_str}\n'
                f.write(msg)
Example #5
0
input_shape = (300, 300)
image_array = load_image(image_path, input_shape)
prior_boxes = to_point_form(prior_boxes)
box_coordinates = prior_boxes[7010:7015, :]
plot_box_data(box_coordinates, image_array)
plt.imshow(image_array)
plt.show()

# ground truth
# ------------------------------------------------------------------
image_name, box_data = random.sample(ground_truth_data.items(), 1)[0]
print('Data sample: \n', box_data)
# image_path = dataset_manager.images_path + image_name
image_path = image_name
arg_to_class = dataset_manager.arg_to_class
colors = get_colors(len(class_names))
image_array = load_image(image_path, input_shape)
plot_box_data(box_data, image_array, arg_to_class, colors=colors)
plt.imshow(image_array)
plt.show()

# assigned boxes
assigned_boxes = assign_prior_boxes(prior_boxes,
                                    box_data,
                                    len(class_names),
                                    regress=False,
                                    overlap_threshold=.5)
positive_mask = assigned_boxes[:, 4] != 1
positive_boxes = assigned_boxes[positive_mask]
image_array = load_image(image_path, input_shape)
plot_box_data(positive_boxes, image_array, arg_to_class, colors=colors)
Example #6
0
    def forward(self, img_ori, saveFolder, imgName):
        rects = self.face_detector(img_ori, 1)
        if len(rects) == 0:
            return
        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = saveFolder

        rect = rects[0]
        # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
        # - use landmark for cropping
        pts = self.face_regressor(img_ori, rect).parts()
        pts = np.array([[pt.x, pt.y] for pt in pts
                        ]).T  # detected landmarks, should record this
        roi_box = parse_roi_box_from_landmark(pts)
        img = crop_img(img_ori, roi_box)

        # forward: one step
        img = cv2.resize(img,
                         dsize=(STD_SIZE, STD_SIZE),
                         interpolation=cv2.INTER_LINEAR)
        input = self.transform(img).unsqueeze(0)
        with torch.no_grad():
            if self.mode == 'gpu':
                input = input.cuda()
            param = self.model(input)
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        # 68 pts
        pts68 = predict_68pts(param, roi_box)

        #pts_res.append(pts68)
        pts_res.append(pts)
        P, pose = parse_pose(param)
        Ps.append(P)
        poses.append(pose)

        # dense face 3d vertices
        vertices = predict_dense(param, roi_box)
        vertices_lst.append(vertices)

        # save projected 3D points
        wfp = '{}_projected.txt'.format(os.path.join(saveFolder, imgName))
        np.savetxt(wfp, pts68, fmt='%.3f')
        #print('Save 68 3d landmarks to {}'.format(wfp))

        wfp = '{}_detected.txt'.format(os.path.join(saveFolder, imgName))
        np.savetxt(wfp, pts, fmt='%.3f')
        #print('Save 68 3d landmarks to {}'.format(wfp))

        # save obj file
        wfp = '{}.obj'.format(os.path.join(saveFolder, imgName))
        colors = get_colors(img_ori, vertices)
        write_obj_with_colors(wfp, vertices, self.tri, colors)
        #print('Dump obj with sampled texture to {}'.format(wfp))

        wfp = os.path.join(saveFolder, imgName) + '_depth.png'
        depths_img = cget_depths_image(img_ori, vertices_lst,
                                       self.tri - 1)  # cython version
        cv2.imwrite(wfp, depths_img)
        #print('Dump to {}'.format(wfp))

        draw_landmarks(img_ori,
                       pts_res,
                       wfp=os.path.join(saveFolder, imgName) + '_3DDFA.png',
                       show_flg=False)
from datasets.data_manager import DataManager
from utils.boxes import create_prior_boxes
from utils.inference import plot_box_data
from utils.inference import get_colors
from utils.boxes import unregress_boxes
from utils.boxes import to_point_form
from utils.generator import ImageGenerator
import matplotlib.pyplot as plt

# parameters
dataset_name = 'VOC2012'
batch_size = 20
colors = get_colors(25)
prior_boxes = to_point_form(create_prior_boxes())

# loading training data
data_manager = DataManager(dataset_name, 'train')
train_data = data_manager.load_data()
arg_to_class = data_manager.arg_to_class
# loading validation data
val_data = DataManager(dataset_name, 'val').load_data()

# generating output
generator = ImageGenerator(train_data, val_data, prior_boxes, batch_size)
generated_data = next(generator.flow('train'))
transformed_image_batch = generated_data[0]['input_1']
generated_output = generated_data[1]['predictions']

for batch_arg, transformed_image in enumerate(transformed_image_batch):
    positive_mask = generated_output[batch_arg, :, 4] != 1
    regressed_boxes = generated_output[batch_arg]
Example #8
0
    def process_one_frame(frame):
        if frame is None: print("No frame, check your camera")
        img_ori = frame
        img_fp = "camera.png"  # 随便给一个filename 反正之后用不到
        rects = face_detector(img_ori, 1)

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)

        for i in range(len(rects)):
            if i != 0: break  # 这里只检测第一个脸
            rect = rects[i]
            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
            # - use landmark for cropping
            pts = face_regressor(img_ori, rect).parts()
            pts = np.array([[pt.x, pt.y] for pt in pts]).T
            roi_box = parse_roi_box_from_landmark(pts)
            img = crop_img(img_ori, roi_box)
            # forward: one step
            img = cv2.resize(img,
                             dsize=(STD_SIZE, STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
            input = transform(img).unsqueeze(0)
            with torch.no_grad():
                if use_gpu:
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)
            # 68 pts
            pts68 = predict_68pts(param, roi_box)
            # two-step for more accurate bbox to crop face
            if use_two_step_bbox_init:
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(img_ori, roi_box)
                img_step2 = cv2.resize(img_step2,
                                       dsize=(STD_SIZE, STD_SIZE),
                                       interpolation=cv2.INTER_LINEAR)
                input = transform(img_step2).unsqueeze(0)
                with torch.no_grad():
                    if use_gpu:
                        input = input.cuda()
                    param = model(input)
                    param = param.squeeze().cpu().numpy().flatten().astype(
                        np.float32)
                pts68 = predict_68pts(param, roi_box)
            pts_res.append(pts68)
            P, pose = parse_pose(param)
            Ps.append(P)
            poses.append(pose)
            # dense face 3d vertices
            vertices = predict_dense(param, roi_box)
            vertices_lst.append(vertices)
            if is_dump_to_ply:
                dump_to_ply(
                    vertices, tri,
                    '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind))
            if is_dump_vertex:
                dump_vertex(
                    vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''),
                                                 ind))
            if is_dump_pts:
                wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, pts68, fmt='%.3f')
                print('Save 68 3d landmarks to {}'.format(wfp))
            if is_dump_roi_box:
                wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, roi_box, fmt='%.3f')
                print('Save roi box to {}'.format(wfp))
            if is_dump_paf:
                wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''),
                                                 ind)
                wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''),
                                                   ind)
                paf_feature = gen_img_paf(img_crop=img,
                                          param=param,
                                          kernel_size=paf_size)
                cv2.imwrite(wfp_paf, paf_feature)
                cv2.imwrite(wfp_crop, img)
                print('Dump to {} and {}'.format(wfp_crop, wfp_paf))
            if is_dump_obj:
                wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
                colors = get_colors(img_ori, vertices)
                write_obj_with_colors(wfp, vertices, tri, colors)
                print('Dump obj with sampled texture to {}'.format(wfp))
            ind += 1
        if is_dump_pose:
            # P, pose = parse_pose(param)  # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
            img_pose = plot_pose_box(img_ori, Ps, pts_res)
            wfp = img_fp.replace(suffix, '_pose.jpg')
            cv2.imwrite(wfp, img_pose)
            print('Dump to {}'.format(wfp))
        if is_dump_depth:
            wfp = img_fp.replace(suffix, '_depth.png')
            # depths_img = get_depths_image(img_ori, vertices_lst, tri-1)  # python version
            depths_img = cget_depths_image(img_ori, vertices_lst,
                                           tri - 1)  # cython version
            cv2.imwrite(wfp, depths_img)
            print('Dump to {}'.format(wfp))
        if is_dump_pncc:
            wfp = img_fp.replace(suffix, '_pncc.png')
            pncc_feature = cpncc(img_ori, vertices_lst,
                                 tri - 1)  # cython version
            cv2.imwrite(
                wfp,
                pncc_feature[:, :, ::-1])  # cv2.imwrite will swap RGB -> BGR
            print('Dump to {}'.format(wfp))
        if is_dump_res:
            draw_landmarks(img_ori,
                           pts_res,
                           wfp=img_fp.replace(suffix, '_3DDFA.jpg'),
                           show_flg=show_landmarks_fig)

        # 下面这一句将原始图像关掉
        img_ori = np.ones_like(img_ori) * 255

        img_pose = plot_pose_box(img_ori, Ps, pts_res)
        draw_landmarks_opencv(img_pose, pts_res)
        cv2.imshow("pose", img_pose)
Example #9
0
def main(args):
    # 1. load pre-trained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    # 2. load pre-trained model uv-gan
    if args.uvgan:
        if args.checkpoint_uv_gan == "":
            print("Specify the path to checkpoint uv_gan")
            exit()
        uvgan = infer_uv_gan.UV_GAN(args.checkpoint_uv_gan)

    # 3. load dlib model for face detection and landmark used for face cropping
    if args.dlib_landmark:
        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
        face_regressor = dlib.shape_predictor(dlib_landmark_model)
    if args.dlib_bbox:
        face_detector = dlib.get_frontal_face_detector()

    # 4. forward
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    for img_fp in args.files:
        img_ori = cv2.imread(img_fp)
        if args.dlib_bbox:
            rects = face_detector(img_ori, 1)
        else:
            rects = []

        if len(rects) == 0:
            rects = dlib.rectangles()
            rect_fp = img_fp + '.bbox'
            lines = open(rect_fp).read().strip().split('\n')[1:]
            for l in lines:
                l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
                rect = dlib.rectangle(l, r, t, b)
                rects.append(rect)

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)
        for rect in rects:
            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
            if args.dlib_landmark:
                # - use landmark for cropping
                pts = face_regressor(img_ori, rect).parts()
                pts = np.array([[pt.x, pt.y] for pt in pts]).T
                roi_box = parse_roi_box_from_landmark(pts)
            else:
                # - use detected face bbox
                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
                roi_box = parse_roi_box_from_bbox(bbox)

            img = crop_img(img_ori, roi_box)

            # forward: one step
            img = cv2.resize(img,
                             dsize=(STD_SIZE, STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
            input = transform(img).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)

            # 68 pts
            pts68 = predict_68pts(param, roi_box)

            # two-step for more accurate bbox to crop face
            if args.bbox_init == 'two':
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(img_ori, roi_box)
                img_step2 = cv2.resize(img_step2,
                                       dsize=(STD_SIZE, STD_SIZE),
                                       interpolation=cv2.INTER_LINEAR)
                input = transform(img_step2).unsqueeze(0)
                with torch.no_grad():
                    if args.mode == 'gpu':
                        input = input.cuda()
                    param = model(input)
                    param = param.squeeze().cpu().numpy().flatten().astype(
                        np.float32)

                pts68 = predict_68pts(param, roi_box)

            pts_res.append(pts68)
            P, pose = parse_pose(param)

            Ps.append(P)
            poses.append(pose)

            if args.dump_obj:
                vertices = predict_dense(param, roi_box)
                vertices_lst.append(vertices)
                wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
                colors = get_colors(img_ori, vertices)

                p, offset, alpha_shp, alpha_exp = _parse_param(param)

                vertices = (u + w_shp @ alpha_shp + w_exp @ alpha_exp).reshape(
                    3, -1, order='F') + offset
                vertices = vertices.T
                tri = tri.T - 1
                print('Dump obj with sampled texture to {}'.format(wfp))
                unwraps = create_unwraps(vertices)
                h, w = args.height, args.width
                tcoords = process_uv(unwraps[:, :2], h, w)
                texture = render_colors(tcoords, tri, colors, h, w,
                                        c=3).astype('uint8')
                scaled_tcoords = scale_tcoords(tcoords)
                if args.uvgan:
                    texture = uvgan.infer(texture)
                else:
                    texture = cv2.cvtColor(texture, cv2.COLOR_BGR2RGB)
                vertices, colors, uv_coords = vertices.astype(
                    np.float32).copy(), colors.astype(
                        np.float32).copy(), scaled_tcoords.astype(
                            np.float32).copy()
                write_obj_with_colors_texture(wfp, vertices, colors, tri,
                                              texture * 255.0, uv_coords)

            ind += 1
Example #10
0
    print('param: ' + str(param))
    p, offset, alpha_shp, alpha_exp = _parse_param(param)
    print('alpha_exp: ' + str(alpha_exp))

    # 68 pts
    # bbox = [0, 0, 120, 120]
    # roi_box = parse_roi_box_from_bbox(bbox)
    pts68 = predict_68pts(param, roi_box)
    # print('pts68: ' + str(pts68))
    print('pts68.shape: ' + str(pts68.shape))

    P, pose = parse_pose(param)
    # print('P: ' + str(P))
    print('P.shape: ' + str(P.shape))
    print('pose: ' + str(pose))

    vertices = predict_dense(param, roi_box)
    # print('vertices: ' + str(vertices))
    print('vertices.shape: ' + str(vertices.shape))

    ensure_folder('result')
    suffix = get_suffix(filename)
    print('suffix: ' + suffix)
    tri = sio.loadmat('visualize/tri.mat')['tri']
    dump_to_ply(vertices, tri, '{}.ply'.format(filename.replace(suffix, '')))

    wfp = '{}.obj'.format(filename.replace(suffix, ''))
    colors = get_colors(img_ori, vertices)
    write_obj_with_colors(wfp, vertices, tri, colors)
    print('Dump obj with sampled texture to {}'.format(wfp))
Example #11
0
def run(image):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    model.eval()

    # 2. load dlib model for face detection and landmark used for face cropping
    dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
    face_regressor = dlib.shape_predictor(dlib_landmark_model)
    face_detector = dlib.get_frontal_face_detector()

    # 3. forward
    tri = sio.loadmat('tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    img_ori = cv2.imread(image)
    # img_ori = img_ori[:, :, [2, 1, 0]]
    rects = face_detector(img_ori, 1)

    # if len(rects) == 0:
    #     rects = dlib.rectangles()
    #     rect_fp = img_fp + '.bbox'
    #     lines = open(rect_fp).read().strip().split('\n')[1:]
    #     for l in lines:
    #         l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
    #         rect = dlib.rectangle(l, r, t, b)
    #         rects.append(rect)

    pts_res = []
    Ps = []  # Camera matrix collection
    poses = []  # pose collection, [todo: validate it]
    vertices_lst = []  # store multiple face vertices
    ind = 0
    suffix = get_suffix(image)
    for rect in rects:
        # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
        # - use landmark for cropping
        pts = face_regressor(img_ori, rect).parts()
        pts = np.array([[pt.x, pt.y] for pt in pts]).T
        roi_box = parse_roi_box_from_landmark(pts)

        img = crop_img(img_ori, roi_box)

        # forward: one step
        img = cv2.resize(img,
                         dsize=(STD_SIZE, STD_SIZE),
                         interpolation=cv2.INTER_LINEAR)
        input = transform(img).unsqueeze(0)
        with torch.no_grad():
            param = model(input)
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        # 68 pts
        pts68 = predict_68pts(param, roi_box)

        # two-step for more accurate bbox to crop face

        pts_res.append(pts68)
        P, pose = parse_pose(param)
        Ps.append(P)
        poses.append(pose)

        vertices = predict_dense(param, roi_box)
        vertices_lst.append(vertices)

        # dense face 3d vertices
        wfp = './output/obj_{}.obj'.format(image.split('/')[-1])
        colors = get_colors(img_ori, vertices)
        write_obj_with_colors(wfp, vertices, tri, colors)
        print('Dump obj with sampled texture to {}'.format(wfp))
        ind += 1