def process (self, image_list):
        """
        Args:
            image_list: list of tuples, where first element is an image_name,
                second element is a dictionary with the hand bounding_boxes 
                as well as other information
        """
        hand_mesh_list = []
        print('Extracting hand pose and hand mesh...')
        for image_name, hand_info in tqdm(image_list):
            save_name = os.path.join(self.cache_loc,
                    ('#'.join(image_name.split('/')[-3:])[:-self.extension_length])+'.pkl')
            
            if os.path.exists(save_name) and not self.overwrite:
                with open(save_name, 'rb')  as f :
                    hand_mesh_list.append(pickle.load(f))   
            else:
                hand_mesh = {}
                # crop the image for the left hand
                # pass through the model
                for which_hand in ['left', 'right']:
                    if which_hand in hand_info:
                        image_raw = cv2.imread(image_name)
                        # cropping the hand
                        crop = image_raw[int(hand_info[which_hand]['top_y']):int(hand_info[which_hand]['bottom_y'])+1, 
                                    int(hand_info[which_hand]['left_x']):int(hand_info[which_hand]['right_x'])+1, :]
                        frame= preprocess_frame(crop)
                        img = Image.fromarray(frame.copy())
                        hand_crop = cv2.resize(np.array(img), (256, 256)) 
                        
                        if which_hand == 'left':
                            hand_image = prepare_input(hand_crop, flip_left_right=False)
                        elif which_hand == 'right':
                            hand_image= prepare_input(hand_crop, flip_left_right=True)

                        output = self.forward_pass_3d(self.model, hand_image)
                        verts = output['verts'].cpu().detach().numpy()[0]
                        joints = output['joints'].cpu().detach().numpy()[0]
                        hand_mesh[which_hand] = {'verts': verts, 'joints': joints}
                
                hand_mesh_list.append(hand_mesh)
                # save into cache
                with open(save_name, 'wb') as f:
                    pickle.dump(hand_mesh, f)
        return hand_mesh_list                
    model.eval()

    print("Input image is processed flipped and unflipped "
          "(as left and right hand), both outputs are displayed")

    # load faces of hand
    with open("misc/mano/MANO_RIGHT.pkl", "rb") as p_f:
        mano_right_data = pickle.load(p_f, encoding="latin1")
        faces = mano_right_data["f"]

    fig = plt.figure(figsize=(4, 4))
    fig.clf()
    frame = cv2.imread(args.image_path)
    frame = preprocess_frame(frame)
    input_image = prepare_input(frame)
    cv2.imshow("input", frame)
    img = Image.fromarray(frame.copy())
    hand_crop = cv2.resize(np.array(img), (256, 256))

    noflip_hand_image = prepare_input(hand_crop, flip_left_right=False)
    flip_hand_image = prepare_input(hand_crop, flip_left_right=True)
    noflip_output = forward_pass_3d(model, noflip_hand_image)
    flip_output = forward_pass_3d(model, flip_hand_image)
    flip_verts = flip_output["verts"].cpu().detach().numpy()[0]
    noflip_verts = noflip_output["verts"].cpu().detach().numpy()[0]
    ax = fig.add_subplot(1, 2, 1, projection="3d")
    ax.title.set_text("unflipped input")
    displaymano.add_mesh(ax, flip_verts, faces, flip_x=True)
    if "objpoints3d" in flip_output:
        objverts = flip_output["objpoints3d"].cpu().detach().numpy()[0]
Beispiel #3
0
    # Add attention map
    attention_hand = AttentionHook(model.module.base_net)
    if hasattr(model.module, "atlas_base_net"):
        attention_atlas = AttentionHook(model.module.atlas_base_net)
        has_atlas_encoder = True
    else:
        has_atlas_encoder = False

    fig = plt.figure(figsize=(4, 4))
    while True:
        fig.clf()
        ret, frame = cap.read()
        if not ret:
            raise RuntimeError("OpenCV could not load frame")
        frame = preprocess_frame(frame)
        input_image = prepare_input(frame)
        blend_img_hand = attention_hand.blend_map(frame)
        if has_atlas_encoder:
            blend_img_atlas = attention_atlas.blend_map(frame)
            cv2.imshow("attention atlas", blend_img_atlas)
        img = Image.fromarray(frame.copy())
        hand_crop = cv2.resize(np.array(img), (256, 256))
        hand_image = prepare_input(
            hand_crop, flip_left_right=args.flip_left_right
        )
        output = forward_pass_3d(model, hand_image, hand_side=args.hand_side)

        if "joints2d" in output:
            joints2d = output["joints2d"]
            frame = visualize_joints_2d_cv2(
                frame, joints2d.cpu().detach().numpy()[0]