Ejemplo n.º 1
0
    def __init__(self, mode='gpu'):
        # load model
        checkpoint_fp = os.path.join(baseFolder,
                                     'models/phase1_wpdc_vdc_v2.pth.tar')
        checkpoint = torch.load(
            checkpoint_fp,
            map_location=lambda storage, loc: storage)['state_dict']
        self.model = getattr(mobilenet_v1, 'mobilenet_1')(
            num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

        model_dict = self.model.state_dict()
        # because the model is trained by multiple gpus, prefix module should be removed
        for k in checkpoint.keys():
            model_dict[k.replace('module.', '')] = checkpoint[k]
        self.model.load_state_dict(model_dict)
        self.mode = mode
        if self.mode == 'gpu':
            cudnn.benchmark = True
            self.model = self.model.cuda()
        self.model.eval()

        # load dlib model for face detection and landmark  for face cropping
        dlib_landmark_model = os.path.join(
            baseFolder, 'models/shape_predictor_68_face_landmarks.dat')
        self.face_regressor = dlib.shape_predictor(dlib_landmark_model)
        self.face_detector = dlib.get_frontal_face_detector()

        self.tri = sio.loadmat(os.path.join(baseFolder,
                                            'visualize/tri.mat'))['tri']
        self.transform = transforms.Compose(
            [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
Ejemplo n.º 2
0
def extract_param(checkpoint_fp, root='', filelists=None, arch='mobilenet_1', num_classes=62, device_ids=[0],
                  batch_size=128, num_workers=4):
    map_location = {f'cuda:{i}': 'cuda:0' for i in range(8)}
    checkpoint = torch.load(checkpoint_fp, map_location=map_location)['state_dict']
    torch.cuda.set_device(device_ids[0])
    model = getattr(mobilenet_v1, arch)(num_classes=num_classes)
    model = nn.DataParallel(model, device_ids=device_ids).cuda()
    model.load_state_dict(checkpoint)

    dataset = DDFATestDataset(filelists=filelists, root=root,
                              transform=transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]))
    data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)

    cudnn.benchmark = True
    model.eval()

    end = time.time()
    outputs = []
    with torch.no_grad():
        for _, inputs in enumerate(data_loader):
            inputs = inputs.cuda()
            output = model(inputs)

            for i in range(output.shape[0]):
                param_prediction = output[i].cpu().numpy().flatten()

                outputs.append(param_prediction)
        outputs = np.array(outputs, dtype=np.float32)

    print(f'Extracting params take {time.time() - end: .3f}s')
    return outputs
Ejemplo n.º 3
0
def get_landmark(path):
    device = torch.device("cuda")
    cap = cv2.VideoCapture(path)

    cudnn.benchmark = True
    smooth_flag = False

    ret, frame = cap.read()
    frame = resize_frame(frame, cfg.resize_size)
    height, width, _ = frame.shape

    face_detect = detector.SFDDetector(device, cfg.rect_model_path)
    face_landmark = FAN_landmark.FANLandmarks(device, cfg.landmark_model_path,
                                              cfg.detect_type)
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    model = mobilenet_v1.mobilenet(cfg.checkpoint_fp, num_classes=240)

    landmark_list = []
    frame_list = []

    while ret:
        frame = resize_frame(frame, cfg.resize_size)
        bbox = face_detect.extract(frame, cfg.thresh)
        if len(bbox) > 0:
            frame, landmarks = face_landmark.extract([frame, bbox])
            roi = roi_box_from_landmark(landmarks[0])
            img = crop_img(frame, roi)
            img = cv2.resize(img,
                             dsize=(cfg.STD_SIZE, cfg.STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
            input_data = transform(img).unsqueeze(0)
            with torch.no_grad():
                param = model(input_data.cuda())
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)
            pts68 = predict_68pts(param, roi, img.shape[0])
            if smooth_flag:
                show_frame, landmark_3d = get_smooth_data(
                    frame, pts68.T, frame_list, landmark_list, cfg.list_size)
            else:
                show_frame = frame
                landmark_3d = pts68.T
            for x, y in landmark_3d[:, :2]:
                cv2.circle(show_frame, (int(x), int(y)), 1, (255, 255, 0))
            cv2.imshow('3ddfa', show_frame)
            cv2.waitKey(1)
        else:
            print("cannot find face")
            continue

        ret, frame = cap.read()
    cap.release()
    cv2.destroyAllWindows()
Ejemplo n.º 4
0
def extract_param(checkpoint_fp, root='', filelists=None, num_classes=62, device_ids=[0],
                  batch_size=1, num_workers=0):
    map_location = {f'cuda:{i}': 'cuda:0' for i in range(8)}
    # checkpoint = torch.load(checkpoint_fp, map_location=map_location)['state_dict']
    torch.cuda.set_device(device_ids[0])
    model = PRN(checkpoint_fp)
    # model = nn.DataParallel(model, device_ids=device_ids).cuda()
    # model.load_state_dict(checkpoint)

    dataset = DDFATestDataset(filelists=filelists, root=root,
                              transform=transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)]))
    data_loader = data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)

    cudnn.benchmark = True
    # model.eval()

    end = time.time()
    outputs = []
    with torch.no_grad():
        for _, inputs in enumerate(data_loader):
            inputs = inputs.cuda()

            # Get the output landmarks
            pos = model.net_forward(inputs)

            out = pos.cpu().detach().numpy()
            pos = np.squeeze(out)
            cropped_pos = pos * 255
            pos = cropped_pos.transpose(1, 2, 0)

            if pos is None:
                continue

            # print(pos.shape)
            output = model.get_landmarks(pos)
            # print(output.shape)

            outputs.append(output)

        outputs = np.array(outputs, dtype=np.float32)
        print("outputs",outputs.shape)
    print(f'Extracting params take {time.time() - end: .3f}s')
    return outputs
Ejemplo n.º 5
0
def get_inputs(files: List[str]) -> Tuple[torch.Tensor, List[np.ndarray]]:
    """Get Inputs

    Parameters
    ----------
    files: List[ str ]
           List of frames to process.

    Returns
    -------
    inputs  : torch.Tensor
              Frame cropped to the face.
    imgs_roi: List[ np.ndarray ]
              Frame face rectangles ( region of interest ).
    """
    face_reg, face_det = get_face_model()
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    imgs_ori = [cv2.imread(file) for file in files]
    imgs_rect = [face_det(img)[0] for img in imgs_ori]
    imgs_pts = [
        face_reg(imgs_ori[i], imgs_rect[i]).parts()
        for i in range(len(imgs_ori))
    ]
    imgs_pts = [
        np.array([[pt.x, pt.y] for pt in imgs_pts[i]]).T
        for i in range(len(imgs_ori))
    ]
    imgs_roi = [parse_roi_box_from_landmark(pts) for pts in imgs_pts]
    imgs_cropped = [
        crop_img(imgs_ori[i], imgs_roi[i]) for i in range(len(imgs_ori))
    ]
    imgs_new = [
        cv2.resize(img, dsize=(120, 120), interpolation=cv2.INTER_LINEAR)
        for img in imgs_cropped
    ]

    inputs = [transform(img).unsqueeze(0) for img in imgs_new]
    inputs = torch.cat(inputs, axis=0)

    return inputs, imgs_roi
Ejemplo n.º 6
0
def getFaceTextureCoords(textImag):
    checkpoint_fp = 'models/MobDenseNet.pth.tar'
    arch = 'densemobilenetv4_19'
    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(MobDenseNet, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    cudnn.benchmark = True
    model = model.cuda()
    model.eval()
    face_detector = dlib.get_frontal_face_detector()
    # 3. forward
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    img_ori = textImag
    rects = face_detector(img_ori, 1)
    for rect in rects:
        # - use detected face bbox
        bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
        roi_box = parse_roi_box_from_bbox(bbox)
        img = crop_img(img_ori, roi_box)

        # forward: one step
        img = cv2.resize(img,
                         dsize=(STD_SIZE, STD_SIZE),
                         interpolation=cv2.INTER_LINEAR)
        input = transform(img).unsqueeze(0)
        with torch.no_grad():
            input = input.cuda()
            param = model(input)
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        # 68 pts
        pts68 = predict_68pts(param, roi_box)
        return pts68[[0, 1], :]
Ejemplo n.º 7
0
def detect_pose(img_ori, target, model):
    STD_SIZE = 120
    pose_list = []
    for rect in target:
        roi_box = rect[0:4]
        # print(roi_box)
        try:
            img = crop_img(img_ori, roi_box)
        except Exception as e:
            print(e)
            continue

        shape_img = img.shape
        if shape_img[0] < 100 or shape_img[1] < 100:
            continue
        if len(img) == 0:
            return pose_list
        try:
            img = cv2.resize(img,
                             dsize=(STD_SIZE, STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
        except Exception as e:
            print(e)
            continue
        transform = transforms.Compose(
            [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
        input = transform(img).unsqueeze(0)
        with torch.no_grad():
            param = model(input)
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        pts68 = predict_68pts(param, roi_box)
        P, pose, pose_angle = parse_pose(param)

        # if shape_img[0]>100 and shape_img[1]>100 and np.abs(pose_angle[0]) <= 60 and np.abs(pose_angle[1]) <= 60 and np.abs(pose_angle[2])<= 60:
        #   print(shape_img)
        #   plt.imshow(img)
        #   plt.show()
        pose_list.append(pose_angle)
    return pose_list
Ejemplo n.º 8
0
def r3fa_landmark(image,rect,model,predictor,imgScale):

    transform=transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    Ps=[]  # Camera matrix collection
    pts_res=[]
    faceRectangle=rectangle(int(rect.left() / imgScale), int(rect.top() / imgScale), int(rect.right() / imgScale),
                            int(rect.bottom() / imgScale))

    # - use landmark for cropping
    pts=predictor(image, faceRectangle).parts()
    pts=np.array([[pt.x, pt.y] for pt in pts]).T
    roi_box=parse_roi_box_from_landmark(pts)
    #     # - use detected face bbox
    # bbox=[rects.left(), rects.top(), rects.right(), rects.bottom()]
    # roi_box=parse_roi_box_from_bbox(bbox)
    img=crop_img(image, roi_box)

    # forward: one step
    img=cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
    input=transform(img).unsqueeze(0)
    with torch.no_grad():
        input=input.cuda()
        param=model(input)
        param=param.squeeze().cpu().numpy().flatten().astype(np.float32)
    # 68 pts
    pts68=predict_68pts(param, roi_box)
    pts_res.append(pts68)
    P, pose=parse_pose(param)
    Ps.append(P)
    # # 绘制landmark
    # for indx in range(68):
    #     pos=(pts68[0, indx], pts68[1, indx])
    #     # pts.append(pos)
    #     cv2.circle(img, pos, 3, color=(255, 255, 255), thickness=-1)
    # img_ori=plot_pose_box(img, Ps, pts_res)
    return Ps,pts68[[0,1],:]
Ejemplo n.º 9
0
def main():
    parse_args()  # parse global argsl

    # logging setup
    logging.basicConfig(
        format=
        '[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s',
        level=logging.INFO,
        handlers=[
            logging.FileHandler(args.log_file, mode=args.log_mode),
            logging.StreamHandler()
        ])

    print_args(args)  # print args

    # step1: define the model structure
    model = getattr(mobilenet_v1, args.arch)(num_classes=args.num_classes)

    torch.cuda.set_device(
        args.devices_id[0]
    )  # fix bug for `ERROR: all tensors must be on devices[0]`

    model = nn.DataParallel(model, device_ids=args.devices_id).cuda()  # -> GPU

    # step2: optimization: loss and optimization method
    # criterion = nn.MSELoss(size_average=args.size_average).cuda()
    if args.loss.lower() == 'wpdc':
        print(args.opt_style)
        criterion = WPDCLoss(opt_style=args.opt_style).cuda()
        logging.info('Use WPDC Loss')
    elif args.loss.lower() == 'vdc':
        criterion = VDCLoss(opt_style=args.opt_style).cuda()
        logging.info('Use VDC Loss')
    elif args.loss.lower() == 'pdc':
        criterion = nn.MSELoss(size_average=args.size_average).cuda()
        logging.info('Use PDC loss')
    else:
        raise Exception(f'Unknown Loss {args.loss}')

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.base_lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=True)
    # step 2.1 resume
    if args.resume:
        if Path(args.resume).is_file():
            logging.info(f'=> loading checkpoint {args.resume}')

            checkpoint = torch.load(
                args.resume,
                map_location=lambda storage, loc: storage)['state_dict']
            # checkpoint = torch.load(args.resume)['state_dict']
            model.load_state_dict(checkpoint)

        else:
            logging.info(f'=> no checkpoint found at {args.resume}')

    # step3: data
    normalize = NormalizeGjz(mean=127.5, std=128)  # may need optimization

    train_dataset = DDFADataset(root=args.root,
                                filelists=args.filelists_train,
                                param_fp=args.param_fp_train,
                                transform=transforms.Compose(
                                    [ToTensorGjz(), normalize]))
    val_dataset = DDFADataset(root=args.root,
                              filelists=args.filelists_val,
                              param_fp=args.param_fp_val,
                              transform=transforms.Compose(
                                  [ToTensorGjz(), normalize]))

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              num_workers=args.workers,
                              shuffle=True,
                              pin_memory=True,
                              drop_last=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.val_batch_size,
                            num_workers=args.workers,
                            shuffle=False,
                            pin_memory=True)

    # step4: run
    cudnn.benchmark = True
    if args.test_initial:
        logging.info('Testing from initial')
        validate(val_loader, model, criterion, args.start_epoch)

    for epoch in range(args.start_epoch, args.epochs + 1):
        # adjust learning rate
        adjust_learning_rate(optimizer, epoch, args.milestones)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)
        filename = f'{args.snapshot}_checkpoint_epoch_{epoch}.pth.tar'
        save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                # 'optimizer': optimizer.state_dict()
            },
            filename)

        validate(val_loader, model, criterion, epoch)
Ejemplo n.º 10
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    # 2. parse images list
    img_list = listdir(args.img_prefix)

    alignment_model = face_alignment.FaceAlignment(
        face_alignment.LandmarksType._2D, flip_input=False, device=args.mode)

    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)

    for img_idx, img_fp in enumerate(tqdm(img_list)):
        img_ori = cv2.imread(os.path.join(args.img_prefix, img_fp))

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)

        # face alignment model use RGB as input, result is a tuple with landmarks and boxes
        preds = alignment_model.get_landmarks(img_ori[:, :, ::-1])
        try:
            pts_2d_68 = preds[0]
        except:
            continue
        roi_box = parse_roi_box_from_landmark(pts_2d_68.T)

        img = crop_img(img_ori, roi_box)
        # import pdb; pdb.set_trace()

        # forward: one step
        img = cv2.resize(img,
                         dsize=(STD_SIZE, STD_SIZE),
                         interpolation=cv2.INTER_LINEAR)
        input = transform(img).unsqueeze(0)
        with torch.no_grad():
            if args.mode == 'gpu':
                input = input.cuda()
            param = model(input)
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        # 68 pts
        pts68 = predict_68pts(param, roi_box)

        # two-step for more accurate bbox to crop face
        if args.bbox_init == 'two':
            roi_box = parse_roi_box_from_landmark(pts68)
            img_step2 = crop_img(img_ori, roi_box)
            img_step2 = cv2.resize(img_step2,
                                   dsize=(STD_SIZE, STD_SIZE),
                                   interpolation=cv2.INTER_LINEAR)
            input = transform(img_step2).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)

            pts68 = predict_68pts(param, roi_box)

        pts_res.append(pts68)
        P, pose = parse_pose(param)
        Ps.append(P)
        poses.append(pose)

        # dense face 3d vertices
        vertices = predict_dense(param, roi_box)

        if args.dump_2d_img:
            wfp_2d_img = os.path.join(args.save_dir, os.path.basename(img_fp))
            colors = get_colors(img_ori, vertices)
            # aligned_param = get_aligned_param(param)
            # vertices_aligned = predict_dense(aligned_param, roi_box)
            # h, w, c = 120, 120, 3
            h, w, c = img_ori.shape
            img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1],
                                    h, w)
            cv2.imwrite(wfp_2d_img, img_2d[:, :, ::-1])
        del img_ori
        del img
        del img_2d
Ejemplo n.º 11
0
    model_dict = model.state_dict()
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    # 2. Function de detection des visages
    face_detector = dlib.get_frontal_face_detector()

    # 3. Detection
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    print('len(folder) :', len(folder))

    for item in tqdm(folder):
        try:
            img_ori = cv2.imread(str(folder[item]))
            rects = face_detector(img_ori, 1)

            if len(rects) != 0:
                for rect in rects:
                    bbox = [
                        rect.left(),
                        rect.top(),
                        rect.right(),
                        rect.bottom()
    model_dict = model.state_dict()
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    # 2. Fonction de détection des visages
    face_detector = dlib.get_frontal_face_detector()

    # 3. Détection
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    for item in folder:
        img_ori = cv2.imread(folder[item])
        rects = face_detector(img_ori, 1)
        if len(rects) != 0:
        
            for rect in rects:
                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
                roi_box = parse_roi_box_from_bbox(bbox)
                img = crop_img(img_ori, roi_box)
                img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
                input = transform(img).unsqueeze(0)
                with torch.no_grad():
                    if args.mode == 'gpu':
                        input = input.cuda()
Ejemplo n.º 13
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    app = RenderPipeline(**cfg)

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    face_detector = dlib.get_frontal_face_detector()

    # 3. forward
    tri = sio.loadmat('tri_refine.mat')['tri']
    tri = _to_ctype(tri).astype(np.int32)  # for type compatible
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    last_frame_lmks = []

    vc = cv2.VideoCapture("C:\\Users\\zh13\\Videos\\TrackingTest.mov")
    success, frame = vc.read()
    while success:
        roi_box = []

        if len(last_frame_lmks) == 0:
            rects = face_detector(frame, 1)
            for rect in rects:
                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
                roi_box.append(parse_roi_box_from_bbox(bbox))
        else:
            for lmk in last_frame_lmks:
                roi_box.append(parse_roi_box_from_landmark(lmk))

        this_frame_lmk = []
        params = []
        for box in roi_box:
            img_to_net = crop_img(frame, box)
            img_to_net = cv2.resize(img_to_net,
                                    dsize=(STD_SIZE, STD_SIZE),
                                    interpolation=cv2.INTER_LINEAR)
            input = transform(img_to_net).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)
                params.append(param)
            this_frame_lmk.append(predict_68pts(param, box))
        last_frame_lmks = this_frame_lmk

        if args.render_mesh:
            for box, param in zip(roi_box, params):
                vertices = predict_dense(param, box)
                frame = app(_to_ctype(vertices.T), tri,
                            _to_ctype(frame.astype(np.float32) / 255.))
        else:
            for lmk in last_frame_lmks:
                for p in lmk.T:
                    cv2.circle(frame, (int(round(p[0] * draw_multiplier)),
                                       int(round(p[1] * draw_multiplier))),
                               draw_multiplier, (255, 0, 0), 1, cv2.LINE_AA,
                               draw_shiftbits)
        cv2.imshow("3ddfa video demo", frame)
        cv2.waitKey(1)
        success, frame = vc.read()
Ejemplo n.º 14
0
def main(args):
    # 1. load pre-trained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    # 2. load pre-trained model uv-gan
    if args.uvgan:
        if args.checkpoint_uv_gan == "":
            print("Specify the path to checkpoint uv_gan")
            exit()
        uvgan = infer_uv_gan.UV_GAN(args.checkpoint_uv_gan)

    # 3. load dlib model for face detection and landmark used for face cropping
    if args.dlib_landmark:
        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
        face_regressor = dlib.shape_predictor(dlib_landmark_model)
    if args.dlib_bbox:
        face_detector = dlib.get_frontal_face_detector()

    # 4. forward
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    for img_fp in args.files:
        img_ori = cv2.imread(img_fp)
        if args.dlib_bbox:
            rects = face_detector(img_ori, 1)
        else:
            rects = []

        if len(rects) == 0:
            rects = dlib.rectangles()
            rect_fp = img_fp + '.bbox'
            lines = open(rect_fp).read().strip().split('\n')[1:]
            for l in lines:
                l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
                rect = dlib.rectangle(l, r, t, b)
                rects.append(rect)

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)
        for rect in rects:
            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
            if args.dlib_landmark:
                # - use landmark for cropping
                pts = face_regressor(img_ori, rect).parts()
                pts = np.array([[pt.x, pt.y] for pt in pts]).T
                roi_box = parse_roi_box_from_landmark(pts)
            else:
                # - use detected face bbox
                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
                roi_box = parse_roi_box_from_bbox(bbox)

            img = crop_img(img_ori, roi_box)

            # forward: one step
            img = cv2.resize(img,
                             dsize=(STD_SIZE, STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
            input = transform(img).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)

            # 68 pts
            pts68 = predict_68pts(param, roi_box)

            # two-step for more accurate bbox to crop face
            if args.bbox_init == 'two':
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(img_ori, roi_box)
                img_step2 = cv2.resize(img_step2,
                                       dsize=(STD_SIZE, STD_SIZE),
                                       interpolation=cv2.INTER_LINEAR)
                input = transform(img_step2).unsqueeze(0)
                with torch.no_grad():
                    if args.mode == 'gpu':
                        input = input.cuda()
                    param = model(input)
                    param = param.squeeze().cpu().numpy().flatten().astype(
                        np.float32)

                pts68 = predict_68pts(param, roi_box)

            pts_res.append(pts68)
            P, pose = parse_pose(param)

            Ps.append(P)
            poses.append(pose)

            if args.dump_obj:
                vertices = predict_dense(param, roi_box)
                vertices_lst.append(vertices)
                wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
                colors = get_colors(img_ori, vertices)

                p, offset, alpha_shp, alpha_exp = _parse_param(param)

                vertices = (u + w_shp @ alpha_shp + w_exp @ alpha_exp).reshape(
                    3, -1, order='F') + offset
                vertices = vertices.T
                tri = tri.T - 1
                print('Dump obj with sampled texture to {}'.format(wfp))
                unwraps = create_unwraps(vertices)
                h, w = args.height, args.width
                tcoords = process_uv(unwraps[:, :2], h, w)
                texture = render_colors(tcoords, tri, colors, h, w,
                                        c=3).astype('uint8')
                scaled_tcoords = scale_tcoords(tcoords)
                if args.uvgan:
                    texture = uvgan.infer(texture)
                else:
                    texture = cv2.cvtColor(texture, cv2.COLOR_BGR2RGB)
                vertices, colors, uv_coords = vertices.astype(
                    np.float32).copy(), colors.astype(
                        np.float32).copy(), scaled_tcoords.astype(
                            np.float32).copy()
                write_obj_with_colors_texture(wfp, vertices, colors, tri,
                                              texture * 255.0, uv_coords)

            ind += 1
Ejemplo n.º 15
0
def classify(model, inputs):
    in_img = inputs['photo']
    img_ori = np.array(in_img)
    img_fp = 'samples/test1.jpg'

    face_detector = dlib.get_frontal_face_detector()

    # 3. forward
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    #print(transform)
    rects = face_detector(img_ori, 1)

    pts_res = []
    Ps = []  # Camera matrix collection
    poses = []  # pose collection, [todo: validate it]
    vertices_lst = []  # store multiple face vertices
    ind = 0
    suffix = get_suffix(img_fp)
    for rect in rects:
        # - use detected face bbox
        bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
        roi_box = parse_roi_box_from_bbox(bbox)

        img = crop_img(img_ori, roi_box)

        # forward: one step
        img = cv2.resize(img,
                         dsize=(STD_SIZE, STD_SIZE),
                         interpolation=cv2.INTER_LINEAR)
        input = transform(img).unsqueeze(0)
        print(input)
        with torch.no_grad():

            if mode == 'gpu':
                input = input.cuda()

            param = model(input)
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        # 68 pts
        pts68 = predict_68pts(param, roi_box)

        # two-step for more accurate bbox to crop face
        if bbox_init == 'two':
            roi_box = parse_roi_box_from_landmark(pts68)
            img_step2 = crop_img(img_ori, roi_box)
            img_step2 = cv2.resize(img_step2,
                                   dsize=(STD_SIZE, STD_SIZE),
                                   interpolation=cv2.INTER_LINEAR)
            input = transform(img_step2).unsqueeze(0)
            with torch.no_grad():
                if mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)

            pts68 = predict_68pts(param, roi_box)

        pts_res.append(pts68)
        P, pose = parse_pose(param)
        Ps.append(P)
        poses.append(pose)

        vertices = predict_dense(param, roi_box)
        vertices_lst.append(vertices)
        ind += 1

    pncc_feature = cpncc(img_ori, vertices_lst, tri - 1)
    output = pncc_feature[:, :, ::-1]
    print(type(output))
    pilImg = transforms.ToPILImage()(np.uint8(output))

    return {"image": pilImg}
Ejemplo n.º 16
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    # 2. load dlib model for face detection and landmark used for face cropping
    if args.dlib_landmark:
        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
        face_regressor = dlib.shape_predictor(dlib_landmark_model)
    if args.dlib_bbox:
        face_detector = dlib.get_frontal_face_detector()

    # 3. forward
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    for img_fp in args.files:
        img_ori = cv2.imread(img_fp)
        if args.dlib_bbox:
            rects = face_detector(img_ori, 1)
        else:
            rects = []

        if len(rects) == 0:
            rects = dlib.rectangles()
            rect_fp = img_fp + '.bbox'
            try:
                lines = open(rect_fp).read().strip().split('\n')[1:]
            except FileNotFoundError:
                print('Cannot load bbox file')
                continue
            for l in lines:
                l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
                rect = dlib.rectangle(l, r, t, b)
                rects.append(rect)

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)
        for rect in rects:
            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
            if args.dlib_landmark:
                # - use landmark for cropping
                pts = face_regressor(img_ori, rect).parts()
                pts = np.array([[pt.x, pt.y] for pt in pts]).T
                roi_box = parse_roi_box_from_landmark(pts)
            else:
                # - use detected face bbox
                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
                roi_box = parse_roi_box_from_bbox(bbox)

            img = crop_img(img_ori, roi_box)

            # forward: one step
            img = cv2.resize(img,
                             dsize=(STD_SIZE, STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
            input = transform(img).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)

            # 68 pts
            pts68 = predict_68pts(param, roi_box)

            # two-step for more accurate bbox to crop face
            if args.bbox_init == 'two':
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(img_ori, roi_box)
                img_step2 = cv2.resize(img_step2,
                                       dsize=(STD_SIZE, STD_SIZE),
                                       interpolation=cv2.INTER_LINEAR)
                input = transform(img_step2).unsqueeze(0)
                with torch.no_grad():
                    if args.mode == 'gpu':
                        input = input.cuda()
                    param = model(input)
                    param = param.squeeze().cpu().numpy().flatten().astype(
                        np.float32)

                pts68 = predict_68pts(param, roi_box)

            pts_res.append(pts68)
            P, pose = parse_pose(param)
            Ps.append(P)
            poses.append(pose)

            # dense face 3d vertices
            if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj:
                vertices = predict_dense(param, roi_box)
                vertices_lst.append(vertices)
            if args.dump_ply:
                dump_to_ply(
                    vertices, tri,
                    '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind))
            if args.dump_vertex:
                dump_vertex(
                    vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''),
                                                 ind))
            if args.dump_pts:
                wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, pts68, fmt='%.3f')
                print('Save 68 3d landmarks to {}'.format(wfp))
            if args.dump_roi_box:
                wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, roi_box, fmt='%.3f')
                print('Save roi box to {}'.format(wfp))
            if args.dump_paf:
                wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''),
                                                 ind)
                wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''),
                                                   ind)
                paf_feature = gen_img_paf(img_crop=img,
                                          param=param,
                                          kernel_size=args.paf_size)

                cv2.imwrite(wfp_paf, paf_feature)
                cv2.imwrite(wfp_crop, img)
                print('Dump to {} and {}'.format(wfp_crop, wfp_paf))
            if args.dump_obj:
                wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
                colors = get_colors(img_ori, vertices)
                write_obj_with_colors(wfp, vertices, tri, colors)
                print('Dump obj with sampled texture to {}'.format(wfp))
            ind += 1

        if args.dump_pose:
            # P, pose = parse_pose(param)  # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
            img_pose = plot_pose_box(img_ori, Ps, pts_res)
            wfp = img_fp.replace(suffix, '_pose.jpg')
            cv2.imwrite(wfp, img_pose)
            print('Dump to {}'.format(wfp))
        if args.dump_depth:
            wfp = img_fp.replace(suffix, '_depth.png')
            # depths_img = get_depths_image(img_ori, vertices_lst, tri-1)  # python version
            depths_img = cget_depths_image(img_ori, vertices_lst,
                                           tri - 1)  # cython version
            cv2.imwrite(wfp, depths_img)
            print('Dump to {}'.format(wfp))
        if args.dump_pncc:
            wfp = img_fp.replace(suffix, '_pncc.png')
            pncc_feature = cpncc(img_ori, vertices_lst,
                                 tri - 1)  # cython version
            cv2.imwrite(
                wfp,
                pncc_feature[:, :, ::-1])  # cv2.imwrite will swap RGB -> BGR
            print('Dump to {}'.format(wfp))
        if args.dump_res:
            draw_landmarks(img_ori,
                           pts_res,
                           wfp=img_fp.replace(suffix, '_3DDFA.jpg'),
                           show_flg=args.show_flg)
Ejemplo n.º 17
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    # 2. load dlib model for face detection and landmark used for face cropping
    if args.dlib_landmark:
        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
        face_regressor = dlib.shape_predictor(dlib_landmark_model)
    if args.dlib_bbox:
        face_detector = dlib.get_frontal_face_detector()

    # 3. forward
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    files = sorted(
        glob.glob(os.path.join(args.folder, '*.jpg')) +
        glob.glob(os.path.join(args.folder, '*.png')))
    p_bar = tqdm(total=len(files))
    for img_fp in files:

        img_ori = cv2.imread(img_fp)
        if args.dlib_bbox:
            rects = face_detector(img_ori, 1)
        else:
            rects = []

        if len(rects) == 0:
            rects = dlib.rectangles()
            rect_fp = img_fp + '.bbox'
            lines = open(rect_fp).read().strip().split('\n')[1:]
            for l in lines:
                l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
                rect = dlib.rectangle(l, r, t, b)
                rects.append(rect)

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)
        for rect in rects:
            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
            if args.dlib_landmark:
                # - use landmark for cropping
                pts = face_regressor(img_ori, rect).parts()
                pts = np.array([[pt.x, pt.y] for pt in pts]).T
                roi_box = parse_roi_box_from_landmark(pts)
            else:
                # - use detected face bbox
                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
                roi_box = parse_roi_box_from_bbox(bbox)

            img = crop_img(img_ori, roi_box)

            # forward: one step
            img = cv2.resize(img,
                             dsize=(STD_SIZE, STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
            input_ = transform(img).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input_ = input_.cuda()
                param = model(input_)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)

            # 68 pts
            pts68 = predict_68pts(param, roi_box)

            # two-step for more accurate bbox to crop face
            if args.bbox_init == 'two':
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(img_ori, roi_box)
                img_step2 = cv2.resize(img_step2,
                                       dsize=(STD_SIZE, STD_SIZE),
                                       interpolation=cv2.INTER_LINEAR)
                input_ = transform(img_step2).unsqueeze(0)
                with torch.no_grad():
                    if args.mode == 'gpu':
                        input_ = input_.cuda()
                    param = model(input_)
                    param = param.squeeze().cpu().numpy().flatten().astype(
                        np.float32)

                pts68 = predict_68pts(param, roi_box)

            pts_res.append(pts68)
            P, pose = parse_pose(param)
            Ps.append(P)
            poses.append(pose)

            points = np.array(pts_res)[0].T

            rotated = eulerAnglesToRotationMatrix(np.array([0., np.pi, 0.]))
            points = points.dot(rotated)

            scaler = MinMaxScaler(feature_range=(-1., 1))
            scaled_points = scaler.fit_transform(points)
            points = scaled_points

            f_name = img_fp.replace(args.folder + '/',
                                    '').replace('.png',
                                                '').replace('.jpg', '')
            np.save('./results/{}.npy'.format(f_name), points.reshape(-1))

            if args.plot:
                plot_face(points, img_fp)
                if args.show_flg:
                    plt.show()
                else:
                    plt.savefig('./results/{}.png'.format(f_name))

            p_bar.update(1)
Ejemplo n.º 18
0
def test_video(args):
    start_time = time.time()
    x = 1  # displays the frame rate every 1 second
    counter = 0
    # 1. load pre-tained model
    # checkpoint_fp='models/phase1_wpdc_vdc_v2.pth.tar'
    # arch='mobilenet_1'
    checkpoint_fp = 'models/MobDenseNet.pth.tar'
    arch = 'mobdensenet_v1'
    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(MobDenseNet, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()
    # 2. load dlib model for face detection and landmark used for face cropping
    if args.dlib_landmark:
        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
        face_regressor = dlib.shape_predictor(dlib_landmark_model)
    if args.dlib_bbox:
        face_detector = dlib.get_frontal_face_detector()
    # 3. forward
    tri = sio.loadmat('visualize/tri.mat')['tri'] - 1
    tri_pts68 = sio.loadmat('visualize/pats68_tri.mat')['tri']
    textureImg = cv2.imread(image_name)
    cameraImg = cap.read()[1]
    # textureCoords=df.getFaceTextureCoords(textureImg)
    # drawface=Drawing3DFace.Draw3DFace(cameraImg,textureImg,textureCoords,tri_pts68.T)
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    while True:
        # get a frame
        img_ori = cap.read()[1]
        imgScale = 1
        scaledImg = img_ori
        if max(img_ori.shape) > maxImgSizeForDetection:
            imgScale = maxImgSizeForDetection / float(max(img_ori.shape))
            scaledImg = cv2.resize(img_ori, (int(img_ori.shape[1] * imgScale),
                                             int(img_ori.shape[0] * imgScale)))
        rects = face_detector(scaledImg, 1)

        Ps = []  # Camera matrix collection
        poses = []  # pose collection
        pts_res = []
        # suffix=get_suffix(img_ori)
        for rect in rects:
            if args.dlib_landmark:
                faceRectangle = rectangle(int(rect.left() / imgScale),
                                          int(rect.top() / imgScale),
                                          int(rect.right() / imgScale),
                                          int(rect.bottom() / imgScale))

                # - use landmark for cropping
                pts = face_regressor(img_ori, faceRectangle).parts()
                pts = np.array([[pt.x, pt.y] for pt in pts]).T
                roi_box = parse_roi_box_from_landmark(pts)
            else:
                bbox = [
                    int(rect.left() / imgScale),
                    int(rect.top() / imgScale),
                    int(rect.right() / imgScale),
                    int(rect.bottom() / imgScale)
                ]
                roi_box = parse_roi_box_from_bbox(bbox)
        img = crop_img(img_ori, roi_box)
        # forward: one step
        img = cv2.resize(img,
                         dsize=(STD_SIZE, STD_SIZE),
                         interpolation=cv2.INTER_LINEAR)
        input = transform(img).unsqueeze(0)
        with torch.no_grad():
            if args.mode == 'gpu':
                input = input.cuda()
            param = model(input)
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
        # 68 pts
        pts68 = predict_68pts(param, roi_box)
        # df.triDelaunay(pts68)
        densePts = predict_dense(param, roi_box)
        P, pose = parse_pose(param)
        Ps.append(P)
        poses.append(pose)
        # two-step for more accurate bbox to crop face
        if args.bbox_init == 'two':
            roi_box = parse_roi_box_from_landmark(pts68)
            img_step2 = crop_img(img_ori, roi_box)
            img_step2 = cv2.resize(img_step2,
                                   dsize=(STD_SIZE, STD_SIZE),
                                   interpolation=cv2.INTER_LINEAR)
            input = transform(img_step2).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)
            pts68 = predict_68pts(param, roi_box)
        pts_res.append(pts68)
        pts = []
        #draw landmark
        for indx in range(68):
            pos = (pts68[0, indx], pts68[1, indx])
            pts.append(pos)
            cv2.circle(img_ori, pos, 3, color=(255, 255, 255), thickness=-1)
        ##draw pose box
        if args.dump_pose:
            img_ori = plot_pose_box(img_ori, Ps, pts_res)
        #draw face mesh
        if args.dump_2D_face_mesh:
            img_ori = df.drawMesh(img_ori, densePts.T, tri.T)
        if args.dump_3D_face_mesh:
            pass
            # img=drawface.render(pts68)
        cv2.imshow("faceDetector", img_ori)
        counter += 1
        if (time.time() - start_time) > x:
            print("FPS: ", counter / (time.time() - start_time))
            counter = 0
            start_time = time.time()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
Ejemplo n.º 19
0
def run(use_gpu=False,
        use_two_step_bbox_init=False,
        is_dump_to_ply=False,
        is_dump_vertex=False,
        is_dump_pts=False,
        is_dump_roi_box=False,
        is_dump_paf=False,
        is_dump_obj=False,
        is_dump_pose=False,
        is_dump_depth=False,
        is_dump_pncc=False,
        is_dump_res=False,
        show_landmarks_fig=False,
        paf_size=3):

    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'
    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if use_gpu:
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()
    # 2. load dlib model for face detection and landmark used for face cropping
    dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
    face_regressor = dlib.shape_predictor(dlib_landmark_model)
    face_detector = dlib.get_frontal_face_detector()
    # 3. forward
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    def process_one_frame(frame):
        if frame is None: print("No frame, check your camera")
        img_ori = frame
        img_fp = "camera.png"  # 随便给一个filename 反正之后用不到
        rects = face_detector(img_ori, 1)

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)

        for i in range(len(rects)):
            if i != 0: break  # 这里只检测第一个脸
            rect = rects[i]
            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
            # - use landmark for cropping
            pts = face_regressor(img_ori, rect).parts()
            pts = np.array([[pt.x, pt.y] for pt in pts]).T
            roi_box = parse_roi_box_from_landmark(pts)
            img = crop_img(img_ori, roi_box)
            # forward: one step
            img = cv2.resize(img,
                             dsize=(STD_SIZE, STD_SIZE),
                             interpolation=cv2.INTER_LINEAR)
            input = transform(img).unsqueeze(0)
            with torch.no_grad():
                if use_gpu:
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(
                    np.float32)
            # 68 pts
            pts68 = predict_68pts(param, roi_box)
            # two-step for more accurate bbox to crop face
            if use_two_step_bbox_init:
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(img_ori, roi_box)
                img_step2 = cv2.resize(img_step2,
                                       dsize=(STD_SIZE, STD_SIZE),
                                       interpolation=cv2.INTER_LINEAR)
                input = transform(img_step2).unsqueeze(0)
                with torch.no_grad():
                    if use_gpu:
                        input = input.cuda()
                    param = model(input)
                    param = param.squeeze().cpu().numpy().flatten().astype(
                        np.float32)
                pts68 = predict_68pts(param, roi_box)
            pts_res.append(pts68)
            P, pose = parse_pose(param)
            Ps.append(P)
            poses.append(pose)
            # dense face 3d vertices
            vertices = predict_dense(param, roi_box)
            vertices_lst.append(vertices)
            if is_dump_to_ply:
                dump_to_ply(
                    vertices, tri,
                    '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind))
            if is_dump_vertex:
                dump_vertex(
                    vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''),
                                                 ind))
            if is_dump_pts:
                wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, pts68, fmt='%.3f')
                print('Save 68 3d landmarks to {}'.format(wfp))
            if is_dump_roi_box:
                wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, roi_box, fmt='%.3f')
                print('Save roi box to {}'.format(wfp))
            if is_dump_paf:
                wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''),
                                                 ind)
                wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''),
                                                   ind)
                paf_feature = gen_img_paf(img_crop=img,
                                          param=param,
                                          kernel_size=paf_size)
                cv2.imwrite(wfp_paf, paf_feature)
                cv2.imwrite(wfp_crop, img)
                print('Dump to {} and {}'.format(wfp_crop, wfp_paf))
            if is_dump_obj:
                wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
                colors = get_colors(img_ori, vertices)
                write_obj_with_colors(wfp, vertices, tri, colors)
                print('Dump obj with sampled texture to {}'.format(wfp))
            ind += 1
        if is_dump_pose:
            # P, pose = parse_pose(param)  # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
            img_pose = plot_pose_box(img_ori, Ps, pts_res)
            wfp = img_fp.replace(suffix, '_pose.jpg')
            cv2.imwrite(wfp, img_pose)
            print('Dump to {}'.format(wfp))
        if is_dump_depth:
            wfp = img_fp.replace(suffix, '_depth.png')
            # depths_img = get_depths_image(img_ori, vertices_lst, tri-1)  # python version
            depths_img = cget_depths_image(img_ori, vertices_lst,
                                           tri - 1)  # cython version
            cv2.imwrite(wfp, depths_img)
            print('Dump to {}'.format(wfp))
        if is_dump_pncc:
            wfp = img_fp.replace(suffix, '_pncc.png')
            pncc_feature = cpncc(img_ori, vertices_lst,
                                 tri - 1)  # cython version
            cv2.imwrite(
                wfp,
                pncc_feature[:, :, ::-1])  # cv2.imwrite will swap RGB -> BGR
            print('Dump to {}'.format(wfp))
        if is_dump_res:
            draw_landmarks(img_ori,
                           pts_res,
                           wfp=img_fp.replace(suffix, '_3DDFA.jpg'),
                           show_flg=show_landmarks_fig)

        # 下面这一句将原始图像关掉
        img_ori = np.ones_like(img_ori) * 255

        img_pose = plot_pose_box(img_ori, Ps, pts_res)
        draw_landmarks_opencv(img_pose, pts_res)
        cv2.imshow("pose", img_pose)
        # img_pncc = cpncc(img_ori, vertices_lst, tri - 1)
        # depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1)
        # cv2.imwrite("temp.png", img_pose)
        # cv2.imshow("pncc",img_pncc)
        # cv2.imshow("depths",depths_img)

    cap = cv2.VideoCapture(0)
    while cv2.waitKey(1):
        ret, frame = cap.read()
        process_one_frame(frame)
Ejemplo n.º 20
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    # 2. parse images list 
    with open(args.img_list) as f:
        img_list = [x.strip() for x in f.readlines()]
    landmark_list = []

    alignment_model = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False)

    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)
    if not os.path.exists(args.save_lmk_dir):
        os.mkdir(args.save_lmk_dir)

    for img_idx, img_fp in enumerate(tqdm(img_list)):
        img_ori = cv2.imread(os.path.join(args.img_prefix, img_fp))

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)

        # face alignment model use RGB as input, result is a tuple with landmarks and boxes, cv2读取图片的书序是 BGR
        preds = alignment_model.get_landmarks(img_ori[:, :, ::-1])
        pts_2d_68 = preds[0]
        pts_2d_5 = get_5lmk_from_68lmk(pts_2d_68)
        landmark_list.append(pts_2d_5)
        roi_box = parse_roi_box_from_landmark(pts_2d_68.T)      # 根据68个关键点,确定roi区域

        img = crop_img(img_ori, roi_box)
        # import pdb; pdb.set_trace()

        # forward: one step
        img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
        input = transform(img).unsqueeze(0)
        with torch.no_grad():
            if args.mode == 'gpu':
                input = input.cuda()
            param = model(input)                        # 人脸的RT矩阵,形状和表情的系数
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        # 68 pts
        pts68 = predict_68pts(param, roi_box)                   # 此时pts68是一个68*3的顶点坐标列表

        # two-step for more accurate bbox to crop face
        if args.bbox_init == 'two':
            roi_box = parse_roi_box_from_landmark(pts68)
            img_step2 = crop_img(img_ori, roi_box)
            img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
            input = transform(img_step2).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

            pts68 = predict_68pts(param, roi_box)

        pts_res.append(pts68)
        P, pose = parse_pose(param)
        Ps.append(P)
        poses.append(pose)

        # dense face 3d vertices
        vertices = predict_dense(param, roi_box)

        if args.dump_2d_img:                # 人脸区域的2d图像
            wfp_2d_img = os.path.join(args.save_dir, os.path.basename(img_fp))
            colors = get_colors(img_ori, vertices)
            # aligned_param = get_aligned_param(param)
            # vertices_aligned = predict_dense(aligned_param, roi_box)
            # h, w, c = 120, 120, 3
            h, w, c = img_ori.shape
            img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1], h, w)
            cv2.imwrite(wfp_2d_img, img_2d[:, :, ::-1])
        if args.dump_param:
            split = img_fp.split('/')
            save_name = os.path.join(args.save_dir, '{}.txt'.format(os.path.splitext(split[-1])[0]))
            this_param = param * param_std + param_mean
            this_param = np.concatenate((this_param, roi_box))
            this_param.tofile(save_name, sep=' ')
    if args.dump_lmk:                   # 存储
        save_path = os.path.join(args.save_lmk_dir, 'realign_lmk')
        with open(save_path, 'w') as f:
            for idx, (fname, land) in enumerate(zip(img_list, landmark_list)):
                # f.write('{} {} {} {}')
                land = land.astype(np.int)
                land_str = ' '.join([str(x) for x in land])
                msg = f'{fname} {idx} {land_str}\n'
                f.write(msg)
Ejemplo n.º 21
0
def get_landmark_2d(root, image_path):
    # 0.read image
    img_ori = cv2.imread(os.path.join(root, image_path))
    # 1. load pre-tained model
    checkpoint_fp = 'models/MobDenseNet.pth.tar'
    arch = 'mobdensenet_v1'
    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(MobDenseNet, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)
    model_dict = model.state_dict()
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()
    # 2. load dlib model for face detection and landmark used for face cropping
    if args.dlib_landmark:
        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
        face_regressor = dlib.shape_predictor(dlib_landmark_model)
    if args.dlib_bbox:
        face_detector = dlib.get_frontal_face_detector()
    # 3. forward
    tri = sio.loadmat('visualize/tri.mat')['tri'] - 1
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
    imgScale = 1
    scaledImg = img_ori
    if max(img_ori.shape) > maxImgSizeForDetection:
        imgScale = maxImgSizeForDetection / float(max(img_ori.shape))
        scaledImg = cv2.resize(img_ori, (int(
            img_ori.shape[1] * imgScale), int(img_ori.shape[0] * imgScale)))
    rects = face_detector(scaledImg, 1)
    for rect in rects:
        if args.dlib_landmark:
            faceRectangle = rectangle(int(rect.left() / imgScale),
                                      int(rect.top() / imgScale),
                                      int(rect.right() / imgScale),
                                      int(rect.bottom() / imgScale))
            # - use landmark for cropping
            pts = face_regressor(img_ori, faceRectangle).parts()
            pts = np.array([[pt.x, pt.y] for pt in pts]).T
            roi_box = parse_roi_box_from_landmark(pts)
        else:
            bbox = [
                int(rect.left() / imgScale),
                int(rect.top() / imgScale),
                int(rect.right() / imgScale),
                int(rect.bottom() / imgScale)
            ]
            roi_box = parse_roi_box_from_bbox(bbox)
    img = crop_img(img_ori, roi_box)
    # forward: one step
    img = cv2.resize(img,
                     dsize=(STD_SIZE, STD_SIZE),
                     interpolation=cv2.INTER_LINEAR)
    input = transform(img).unsqueeze(0)
    with torch.no_grad():
        if args.mode == 'gpu':
            input = input.cuda()
        param = model(input)
        param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
    # 68 pts
    pts68 = predict_68pts(param, roi_box)
    return pts68
Ejemplo n.º 22
0
def run(image):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    model.eval()

    # 2. load dlib model for face detection and landmark used for face cropping
    dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
    face_regressor = dlib.shape_predictor(dlib_landmark_model)
    face_detector = dlib.get_frontal_face_detector()

    # 3. forward
    tri = sio.loadmat('tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    img_ori = cv2.imread(image)
    # img_ori = img_ori[:, :, [2, 1, 0]]
    rects = face_detector(img_ori, 1)

    # if len(rects) == 0:
    #     rects = dlib.rectangles()
    #     rect_fp = img_fp + '.bbox'
    #     lines = open(rect_fp).read().strip().split('\n')[1:]
    #     for l in lines:
    #         l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
    #         rect = dlib.rectangle(l, r, t, b)
    #         rects.append(rect)

    pts_res = []
    Ps = []  # Camera matrix collection
    poses = []  # pose collection, [todo: validate it]
    vertices_lst = []  # store multiple face vertices
    ind = 0
    suffix = get_suffix(image)
    for rect in rects:
        # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
        # - use landmark for cropping
        pts = face_regressor(img_ori, rect).parts()
        pts = np.array([[pt.x, pt.y] for pt in pts]).T
        roi_box = parse_roi_box_from_landmark(pts)

        img = crop_img(img_ori, roi_box)

        # forward: one step
        img = cv2.resize(img,
                         dsize=(STD_SIZE, STD_SIZE),
                         interpolation=cv2.INTER_LINEAR)
        input = transform(img).unsqueeze(0)
        with torch.no_grad():
            param = model(input)
            param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

        # 68 pts
        pts68 = predict_68pts(param, roi_box)

        # two-step for more accurate bbox to crop face

        pts_res.append(pts68)
        P, pose = parse_pose(param)
        Ps.append(P)
        poses.append(pose)

        vertices = predict_dense(param, roi_box)
        vertices_lst.append(vertices)

        # dense face 3d vertices
        wfp = './output/obj_{}.obj'.format(image.split('/')[-1])
        colors = get_colors(img_ori, vertices)
        write_obj_with_colors(wfp, vertices, tri, colors)
        print('Dump obj with sampled texture to {}'.format(wfp))
        ind += 1
Ejemplo n.º 23
0
def getPoses(img_ori):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    cudnn.benchmark = True
    model = model.cuda()
    model.eval()

    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    alignment_model = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False,device='cuda')

    # face alignment model use RGB as input, result is a tuple with landmarks and boxes
    preds = alignment_model.get_landmarks(img_ori[:, :, ::-1])
    pts_2d_68 = preds[0]
    roi_box = parse_roi_box_from_landmark(pts_2d_68.T)

    img = crop_img(img_ori, roi_box)
    # import pdb; pdb.set_trace()

    # forward: one step
    img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
    input = transform(img).unsqueeze(0)
    with torch.no_grad():
        input = input.cuda()
        param = model(input)
        param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

    # 68 pts
    pts68 = predict_68pts(param, roi_box)

    roi_box = parse_roi_box_from_landmark(pts68)
    img_step2 = crop_img(img_ori, roi_box)
    img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
    input = transform(img_step2).unsqueeze(0)
    with torch.no_grad():
        input = input.cuda()
        param = model(input)
        param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

    P, pose = parse_pose(param)        
    # dense face 3d vertices
    vertices = predict_dense(param, roi_box)
    colors = get_colors(img_ori, vertices)
    # aligned_param = get_aligned_param(param)
    # vertices_aligned = predict_dense(aligned_param, roi_box)
    # h, w, c = 120, 120, 3
    h, w, c = img_ori.shape
    img_2d = crender_colors(vertices.T, (tri - 1).T, colors[:, ::-1], h, w)
    img_2d = img_2d[:,:,::-1]
        
    return img_2d, pose
Ejemplo n.º 24
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(
        checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(
        num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose(
        [ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    if not os.path.exists(args.save_dir):
        os.mkdir(args.save_dir)

    # 2. parse images list and landmark
    lmk_file = args.lmk_file
    ts = time.time()
    rank_land, rank_img_list, start, end = parse_quality_list_part(
        lmk_file, args.world_size, args.rank, args.resume_idx)
    print('parse land file in {:.3f} seconds'.format(time.time() - ts))

    # for batch processing
    print('World size {}, rank {}, start from {}, end with {}'.format(
        args.world_size, args.rank, start, end))
    dataset = McDataset(rank_img_list,
                        rank_land,
                        transform=transform,
                        std_size=STD_SIZE)
    dataloader = DataLoader(dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=2,
                            pin_memory=True)

    for img_idx, (inputs, ori_imgs, img_fps,
                  roi_boxes) in enumerate(tqdm(dataloader)):

        # forward: one step
        with torch.no_grad():
            if args.mode == 'gpu':
                inputs = inputs.cuda()
            params = model(inputs)
            params = params.cpu().numpy()

        roi_boxes = roi_boxes.numpy()
        outputs_roi_boxes = roi_boxes
        if args.bbox_init == 'two':
            step_two_ori_imgs = []
            step_two_roi_boxes = []
            ori_imgs = ori_imgs.numpy()
            for ii in range(params.shape[0]):
                # 68 pts
                pts68 = predict_68pts(params[ii], roi_boxes[ii])

                # two-step for more accurate bbox to crop face
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(ori_imgs[ii], roi_box)
                img_step2 = cv2.resize(img_step2,
                                       dsize=(STD_SIZE, STD_SIZE),
                                       interpolation=cv2.INTER_LINEAR)
                # input = transform(img_step2).unsqueeze(0)
                step_two_ori_imgs.append(transform(img_step2))
                step_two_roi_boxes.append(roi_box)
            with torch.no_grad():
                step_two_ori_imgs = torch.stack(step_two_ori_imgs, dim=0)
                inputs = step_two_ori_imgs
                if args.mode == 'gpu':
                    inputs = inputs.cuda()
                params = model(inputs)
                params = params.cpu().numpy()
            outputs_roi_boxes = step_two_roi_boxes

        # dump results
        if args.dump_param:
            for img_fp, param, roi_box in zip(img_fps, params,
                                              outputs_roi_boxes):
                split = img_fp.split('/')
                save_name = os.path.join(
                    args.save_dir,
                    '{}.txt'.format(os.path.splitext(split[-1])[0]))
                this_param = param * param_std + param_mean
                this_param = np.concatenate((this_param, roi_box))
                this_param.tofile(save_name, sep=' ')