def preprocess(tensor):
    g = np.random.randint(0, 7)
    n = np.random.sample() * 0.01
    tensor = tensor + n
    if g > 0:
        degrade_blur = GaussianBlur_CUDA(g / 18)
        tensor = degrade_blur(tensor)

    pre_blur = GaussianBlur_CUDA(1)
    tensor = pre_blur(tensor)

    if np.random.sample() < 0.5:
        ww = rand_motion_blur_weight(3, 5, 360).cuda()
        tensor = torch.nn.functional.conv2d(tensor, ww, None, 1, ww.shape[2] // 2)

    return tensor
Ejemplo n.º 2
0
def main(args):
    # 1. load pre-tained model
    checkpoint_fp = 'models/phase1_wpdc_vdc.pth.tar'
    arch = 'mobilenet_1'

    checkpoint = torch.load(checkpoint_fp, map_location=lambda storage, loc: storage)['state_dict']
    model = getattr(mobilenet_v1, arch)(num_classes=62)  # 62 = 12(pose) + 40(shape) +10(expression)

    model_dict = model.state_dict()
    # because the model is trained by multiple gpus, prefix module should be removed
    for k in checkpoint.keys():
        model_dict[k.replace('module.', '')] = checkpoint[k]
    model.load_state_dict(model_dict)
    if args.mode == 'gpu':
        cudnn.benchmark = True
        model = model.cuda()
    model.eval()

    # 2. load dlib model for face detection and landmark used for face cropping
    if args.dlib_landmark:
        dlib_landmark_model = 'models/shape_predictor_68_face_landmarks.dat'
        face_regressor = dlib.shape_predictor(dlib_landmark_model)
    if args.dlib_bbox:
        face_detector = dlib.get_frontal_face_detector()

    # 3. forward
    tri = sio.loadmat('visualize/tri.mat')['tri']
    transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])

    filenames = [join(args.files, x) for x in listdir(args.files) if is_image_file(x)]
    filenames.sort()

    result_fn = join(args.files, 'results')
    if not os.path.exists(result_fn):
        os.mkdir(result_fn)

    blur = GaussianBlur_CUDA(sigma=35)
    rgb2y = RGB2Y('cuda')

    for img_fp in filenames:
        print("process ", img_fp)
        img_ori = cv2.imread(img_fp)
        img_data = torch.Tensor(cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB)).permute(2, 0, 1) / 255.0
        # print(img_data.shape)

        Y = blur(rgb2y.do(img_data.to('cuda').unsqueeze(0)))

        if args.dlib_bbox:
            rects = face_detector(img_ori, 1)
        else:
            rects = []

        if len(rects) == 0:
            rects = dlib.rectangles()
            rect_fp = img_fp + '.bbox'
            lines = open(rect_fp).read().strip().split('\n')[1:]
            for l in lines:
                l, r, t, b = [int(_) for _ in l.split(' ')[1:]]
                rect = dlib.rectangle(l, r, t, b)
                rects.append(rect)

        pts_res = []
        Ps = []  # Camera matrix collection
        poses = []  # pose collection, [todo: validate it]
        vertices_lst = []  # store multiple face vertices
        ind = 0
        suffix = get_suffix(img_fp)
        for rect in rects:
            # whether use dlib landmark to crop image, if not, use only face bbox to calc roi bbox for cropping
            if args.dlib_landmark:
                # - use landmark for cropping
                pts = face_regressor(img_ori, rect).parts()
                pts = np.array([[pt.x, pt.y] for pt in pts]).T
                roi_box = parse_roi_box_from_landmark(pts)
            else:
                # - use detected face bbox
                bbox = [rect.left(), rect.top(), rect.right(), rect.bottom()]
                roi_box = parse_roi_box_from_bbox(bbox)

            img = crop_img(img_ori, roi_box)

            # forward: one step
            img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
            input = transform(img).unsqueeze(0)
            with torch.no_grad():
                if args.mode == 'gpu':
                    input = input.cuda()
                param = model(input)
                param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

            # 68 pts
            pts68 = predict_68pts(param, roi_box)

            # two-step for more accurate bbox to crop face
            if args.bbox_init == 'two':
                roi_box = parse_roi_box_from_landmark(pts68)
                img_step2 = crop_img(img_ori, roi_box)
                img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
                input = transform(img_step2).unsqueeze(0)
                with torch.no_grad():
                    if args.mode == 'gpu':
                        input = input.cuda()
                    param = model(input)
                    param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

                pts68 = predict_68pts(param, roi_box)

            # print(pts68, len(pts68))
            pts_res.append(pts68)
            P, pose = parse_pose(param)
            Ps.append(P)
            poses.append(pose)

            # dense face 3d vertices
            if args.dump_ply or args.dump_vertex or args.dump_depth or args.dump_pncc or args.dump_obj or args.dump_res:
                vertices = predict_dense(param, roi_box)
                # print('vertex size is = ', len(vertices), vertices.shape, "param len ", len(param))
                vertices_lst.append(vertices)
                # print(param)
            if args.dump_ply:
                dump_to_ply(vertices, tri, '{}_{}.ply'.format(img_fp.replace(suffix, ''), ind))
            if args.dump_vertex:
                dump_vertex(vertices, '{}_{}.mat'.format(img_fp.replace(suffix, ''), ind))
            if args.dump_pts:
                wfp = '{}_{}.txt'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, pts68, fmt='%.3f')
                print('Save 68 3d landmarks to {}'.format(wfp))
            if args.dump_roi_box:
                wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
                np.savetxt(wfp, roi_box, fmt='%.3f')
                print('Save roi box to {}'.format(wfp))
            if args.dump_paf:
                wfp_paf = '{}_{}_paf.jpg'.format(img_fp.replace(suffix, ''), ind)
                wfp_crop = '{}_{}_crop.jpg'.format(img_fp.replace(suffix, ''), ind)
                paf_feature = gen_img_paf(img_crop=img, param=param, kernel_size=args.paf_size)

                cv2.imwrite(wfp_paf, paf_feature)
                cv2.imwrite(wfp_crop, img)
                print('Dump to {} and {}'.format(wfp_crop, wfp_paf))
            if args.dump_obj:
                wfp = '{}_{}.obj'.format(img_fp.replace(suffix, ''), ind)
                colors = get_colors(img_ori, vertices)
                write_obj_with_colors(wfp, vertices, tri, colors)
                print('Dump obj with sampled texture to {}'.format(wfp))
            ind += 1

        if args.dump_pose:
            # P, pose = parse_pose(param)  # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
            img_pose = plot_pose_box(img_ori, Ps, pts_res)
            wfp = img_fp.replace(suffix, '_pose.jpg')
            cv2.imwrite(wfp, img_pose)
            print('Dump to {}'.format(wfp))
        if args.dump_depth:
            wfp = img_fp.replace(suffix, '_depth.png')
            # depths_img = get_depths_image(img_ori, vertices_lst, tri-1)  # python version
            depths_img = cget_depths_image(img_ori, vertices_lst, tri - 1)  # cython version
            cv2.imwrite(wfp, depths_img)
            print('Dump to {}'.format(wfp))
        if args.dump_pncc:
            wfp = img_fp.replace(suffix, '_pncc.png')
            pncc_feature = cpncc(img_ori, vertices_lst, tri - 1)  # cython version
            cv2.imwrite(wfp, pncc_feature[:, :, ::-1])  # cv2.imwrite will swap RGB -> BGR
            print('Dump to {}'.format(wfp))
        if args.dump_res:
            # print(pts_res, len(pts_res), len(pts_res[0][0]))
            # draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=args.show_flg, color='yellow')

            # print("dump res")
            h, w = img_ori.shape[:2]
            blank = np.zeros((h, w, 3))
            result = plot_kpt(blank, pts_res)
            # print(result.shape)
            bn = basename(img_fp)
            ofn = join(result_fn, bn).replace(suffix, '_landmark.jpg')
            cv2.imwrite(ofn, result)

            # tri2 = tri / 29
            # print(tri2, tri)
            result = plot_mesh_simple(blank, vertices_lst)

            ofn = join(result_fn, bn).replace(suffix, '_mesh.jpg')
            cv2.imwrite(ofn, result)

            result = plot_light(blank, vertices_lst, Y)

            ofn = join(result_fn, bn).replace(suffix, '_light.jpg')
            cv2.imwrite(ofn, result)
                # print("input size", ow, oh, sz)

                pad = []
                pad.append((sz - ow) // 2)
                pad.append(sz - ow - pad[0])
                pad.append((sz - oh) // 2)
                pad.append(sz - oh - pad[2])

                t = torch.nn.functional.pad(t, pad, mode='constant', value=0.5)

                t = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(t)

                t = t.cuda().unsqueeze(0)
                t = torch.nn.functional.interpolate(t, (256, 256), mode='bilinear', align_corners=False)

                blur = GaussianBlur_CUDA(0.5)
                # t = blur(t)

                t1 = t.clone()
                t2 = t.clone()

                outputs = net(t, t1, t2)

                heatmaps = outputs[-2][:, :17]  # .squeeze(0)
                paf_maps = outputs[-1]  # .squeeze(0)

                # heatmap = heatmaps.sum(1, keepdim=True)
                # heatmap = heatmap[:, :, :255].sum(2)
                heatmap = heatmaps.squeeze(0).reshape(17, 1, heatmaps.shape[2], heatmaps.shape[3])
                viz.draw_images(heatmap, "output_heatmap")
Ejemplo n.º 4
0
    # opt.face_cutter = FaceCutter(512,
    #                              cuda=True,
    #                              blur_threshold=opt.config.dface.blur_threshold,
    #                              method=opt.config.dface.method,
    #                              mtcnn_path=opt.config.dface.mtcnn_path,
    #                              small_face_size=opt.config.dface.small_face_size,
    #                              tiny_face_size=opt.config.dface.tiny_face_size,
    #                              mtcnn_thresholds=opt.config.dface.mtcnn_thresholds,
    #                              mtcnn_nms_thresholds=opt.config.dface.mtcnn_nms_thresholds
    #                              )

    from DGPT.Utils.CUDAFuncs.GaussianBlur import GaussianBlur_CUDA

    # opt.face_initial_blur = GaussianBlur_CUDA(opt.face_net.init_sigma)
    print("base sigma=", opt.config.face_net.base_sigma)
    opt.face_base_blur = GaussianBlur_CUDA(opt.config.face_net.base_sigma)
    opt.face_valid_blur = GaussianBlur_CUDA(opt.config.face_net.valid_sigma)

    opt.bg_valid_blur = GaussianBlur_CUDA(opt.config.bg_net.valid_sigma)
    opt.bg_base_blur = GaussianBlur_CUDA(opt.config.bg_net.base_sigma)

    # from quicklab.utils import VggUnNormalizer
    # opt.vgg_unnorm = VggUnNormalizer().to('cuda')

    from DGPT.Utils.Preprocess import RGB2YUV, YUV2RGB
    opt.rgb2yuv = RGB2YUV('cuda')
    opt.yuv2rgb = YUV2RGB('cuda')

    # prepare mask
    face_mask = torch.ones(1, 1, 512, 512).to('cuda')
    face_mask *= 0.1