Esempio n. 1
0
def process_image(img_file, bbox_file, openpose_file, input_res=224):
    """Read image, do preprocessing and possibly crop it according to the bounding box.
    If there are bounding box annotations, use them to crop the image.
    If no bounding box is specified but openpose detections are available, use them to get the bounding box.
    """
    normalize_img = Normalize(mean=cfg.IMG_NORM_MEAN, std=cfg.IMG_NORM_STD)
    img = cv2.imread(img_file)[:, :, ::-1].copy(
    )  # PyTorch does not support negative stride at the moment
    if bbox_file is None and openpose_file is None:
        # Assume that the person is centerered in the image
        height = img.shape[0]
        width = img.shape[1]
        center = np.array([width // 2, height // 2])
        scale = max(height, width) / 200
    else:
        if bbox_file is not None:
            center, scale = bbox_from_pkl(bbox_file)
        elif openpose_file is not None:
            center, scale = bbox_from_openpose(openpose_file)
    # plt.figure()
    # plt.imshow(img)
    # plt.show()
    # plt.imshow(img[int(bbox[0]):int(bbox[2]), int(bbox[1]):int(bbox[3])])
    # plt.show()

    img = crop(img, center, scale, (input_res, input_res))
    img = img.astype(np.float32) / 255.
    img = torch.from_numpy(img).permute(2, 0, 1)
    norm_img = normalize_img(img.clone())[None]
    return img, norm_img
Esempio n. 2
0
def process_image(img_file, bbox_file, openpose_file, input_res=224):
    """Read image, do preprocessing and possibly crop it according to the bounding box.
    If there are bounding box annotations, use them to crop the image.
    If no bounding box is specified but openpose detections are available, use them to get the bounding box.
    """
    normalize_img = Normalize(mean=constants.IMG_NORM_MEAN,
                              std=constants.IMG_NORM_STD)
    img = cv2.imread(img_file)[:, :, ::-1].copy(
    )  # PyTorch does not support negative stride at the moment
    if bbox_file is None and openpose_file is None:
        # Assume that the person is centerered in the image
        height = img.shape[0]
        width = img.shape[1]
        center = np.array([width // 2, height // 2])
        scale = max(height, width) / 200
    else:
        if bbox_file is not None:
            center, scale = bbox_from_json(bbox_file)
        elif openpose_file is not None:
            center, scale = bbox_from_openpose(openpose_file)
    img = crop(img, center, scale, (input_res, input_res))
    img = img.astype(np.float32) / 255.
    #    i1 = img
    #    pdb.set_trace()
    #    img = np.dot(img[...,:3], [0.2989, 0.5870, 0.1140])
    #    img = np.expand_dims(img, 2)
    #    img = np.concatenate((img, img, img), axis=2).astype(np.float32)
    #    pdb.set_trace()
    img = torch.from_numpy(img).permute(2, 0, 1)
    norm_img = normalize_img(img.clone())[None]
    return img, norm_img
Esempio n. 3
0
 def rgb_processing(self, rgb_img, center, scale, rot, flip, pn, is_train):
     rgb_img = crop(rgb_img.copy(), center, scale, [constants.IMG_RES, constants.IMG_RES], rot=rot)
     if is_train:
         if flip:
             rgb_img = flip_img(rgb_img)
     rgb_img = np.transpose(rgb_img.astype('float32'),(2,0,1))/255.0
     return rgb_img
Esempio n. 4
0
 def rgb_processing(self, rgb_img, center, scale):
     """Process rgb image and do augmentation."""
     rgb_img = crop(rgb_img,
                    center,
                    scale, [constants.IMG_RES, constants.IMG_RES],
                    rot=0)
     # (3,224,224),float,[0,1]
     rgb_img = np.transpose(rgb_img.astype('float32'), (2, 0, 1)) / 255.0
     return rgb_img
Esempio n. 5
0
 def rgb_processing(self, rgb_img, center, scale, rot, flip, pn):
     """Process rgb image and do augmentation."""
     # crop and rotate the image
     if self.use_augmentation_rot:
         rgb_img = crop(rgb_img, center, scale, 
                       [self.options.img_res, self.options.img_res], rot=rot)
     else:
         rgb_img = crop(rgb_img, center, scale, 
                       [self.options.img_res, self.options.img_res], rot=0)
     # flip the image 
     if flip:
         rgb_img = flip_img(rgb_img)
     # in the rgb image we add pixel noise in a channel-wise manner
     if self.use_augmentation_rgb:
         rgb_img[:,:,0] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,0]*pn[0]))
         rgb_img[:,:,1] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,1]*pn[1]))
         rgb_img[:,:,2] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,2]*pn[2]))
         
     # (3,224,224),float,[0,1]
     rgb_img = np.transpose(rgb_img.astype('float32'),(2,0,1))/255.0
     return rgb_img
Esempio n. 6
0
 def rgb_processing(self, rgb_img, center, scale, rot, flip, pn):
     """Process rgb image and do augmentation."""
     rgb_img = crop(rgb_img, center, scale, 
                   [constants.IMG_RES, constants.IMG_RES], rot=rot)
     # flip the image 
     if flip:
         rgb_img = flip_img(rgb_img)
     # in the rgb image we add pixel noise in a channel-wise manner
     rgb_img[:,:,0] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,0]*pn[0]))
     rgb_img[:,:,1] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,1]*pn[1]))
     rgb_img[:,:,2] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,2]*pn[2]))
     # (3,224,224),float,[0,1]
     rgb_img = np.transpose(rgb_img.astype('float32'),(2,0,1))/255.0
     return rgb_img
Esempio n. 7
0
def process_image(img_file, bbox_file, input_res=224):
    normalize_img = Normalize(mean=constants.IMG_NORM_MEAN,
                              std=constants.IMG_NORM_STD)
    img = cv2.imread(img_file)[:, :, ::-1].copy()
    if bbox_file is None:
        height = img.shape[0]
        width = img.shape[1]
        center = np.array([width // 2, height // 2])
        scale = max(height, width) / 200
    else:
        center, scale = bbox_from_pkl(bbox_file)
    img = crop(img, center, scale, (input_res, input_res))
    img = img.astype(np.float32) / 255.
    img = torch.from_numpy(img).permute(2, 0, 1)
    norm_img = normalize_img(img.clone())[None]
    return img, norm_img
Esempio n. 8
0
def process_image(img_file, bbox_file, openpose_file, input_res=224):
    """Read image, do preprocessing and possibly crop it according to the bounding box.
    If there are bounding box annotations, use them to crop the image.
    If no bounding box is specified but openpose detections are available, use them to get the bounding box.
    """
    img = cv2.imread(img_file)[:, :, ::-1].copy(
    )  # PyTorch does not support negative stride at the moment
    if bbox_file is None and openpose_file is None:
        # Assume that the person is centerered in the image
        height = img.shape[0]
        width = img.shape[1]
        center = np.array([width // 2, height // 2])
        scale = max(height, width) / 200
    else:
        if bbox_file is not None:
            center, scale = bbox_from_json(bbox_file)
        elif openpose_file is not None:
            center, scale = bbox_from_openpose(openpose_file)
    img = crop(img, center, scale, (input_res, input_res))
    img = img.astype(np.float32) / 255.
    img = torch.from_numpy(img).permute(2, 0, 1)
    return img
Esempio n. 9
0
def inference_structure(pathCkp: str,
                        pathImg: str = None,
                        pathBgImg: str = None):

    print('If trained locally and renamed the workspace, do not for get to '
          'change the "checkpoint_dir" in config.json. ')

    # Load configuration
    with open(pjn(pathCkp, 'config.json'), 'r') as f:
        options = json.load(f)
        options = namedtuple('options', options.keys())(**options)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    mesh = Mesh(options, options.num_downsampling)

    # read SMPL .bj file to get uv coordinates
    _, smpl_tri_ind, uv_coord, tri_uv_ind = read_Obj(options.smpl_objfile_path)
    uv_coord[:, 1] = 1 - uv_coord[:, 1]
    expUV = uv_coord[tri_uv_ind.flatten()]
    unique, index = np.unique(smpl_tri_ind.flatten(), return_index=True)
    smpl_verts_uvs = torch.as_tensor(expUV[index, :]).float().to(device)
    smpl_tri_ind = torch.as_tensor(smpl_tri_ind).to(device)

    # load average pose and shape and convert to camera coodinate;
    # avg pose is decided by the image id we use for training (0-11)
    avgPose_objCoord = np.load(options.MGN_avgPose_path)
    avgPose_objCoord[:3] = rotationMatrix_to_axisAngle(  # for 0,6, front only
        torch.tensor([[[1, 0, 0], [0, -1, 0], [0, 0, -1]]]))
    avgPose = \
        axisAngle_to_Rot6d(
            torch.Tensor(avgPose_objCoord[None]).reshape(-1, 3)
            ).reshape(1, -1).to(device)
    avgBeta = \
        torch.Tensor(
            np.load(options.MGN_avgBeta_path)[None]).to(device)
    avgCam = torch.Tensor([1.2755, 0,
                           0])[None].to(device)  # 1.2755 is for our settings

    # Create model
    model = frameVIBE(options.smpl_model_path, mesh, avgPose, avgBeta, avgCam,
                      options.num_channels, options.num_layers, smpl_verts_uvs,
                      smpl_tri_ind).to(device)

    optimizer = torch.optim.Adam(params=list(model.parameters()))
    models_dict = {options.model: model}
    optimizers_dict = {'optimizer': optimizer}

    # Load pretrained model
    saver = CheckpointSaver(save_dir=options.checkpoint_dir)
    saver.load_checkpoint(models_dict,
                          optimizers_dict,
                          checkpoint_file=options.checkpoint)

    # Prepare and preprocess input image
    pathToObj = '/'.join(pathImg.split('/')[:-2])
    cameraIdx = int(pathImg.split('/')[-1].split('_')[0][6:])
    with open(
            pjn(pathToObj,
                'rendering/camera%d_boundingbox.txt' % (cameraIdx))) as f:
        boundbox = literal_eval(f.readline())
    IMG_NORM_MEAN = [0.485, 0.456, 0.406]
    IMG_NORM_STD = [0.229, 0.224, 0.225]
    normalize_img = Normalize(mean=IMG_NORM_MEAN, std=IMG_NORM_STD)

    path_to_rendering = '/'.join(pathImg.split('/')[:-1])
    cameraPath, lightPath = pathImg.split('/')[-1].split('_')[:2]
    cameraIdx, _ = int(cameraPath[6:]), int(lightPath[5:])
    with open(pjn(path_to_rendering,
                  'camera%d_boundingbox.txt' % (cameraIdx))) as f:
        boundbox = literal_eval(f.readline())
    img = cv2.imread(pathImg)[:, :, ::-1].astype(np.float32)

    # prepare background
    if options.replace_background:
        if pathBgImg is None:
            bgimages = []
            for subfolder in sorted(
                    glob(pjn(options.bgimg_dir, 'images/validation/*'))):
                for subsubfolder in sorted(glob(pjn(subfolder, '*'))):
                    if 'room' in subsubfolder:
                        bgimages += sorted(glob(pjn(subsubfolder, '*.jpg')))
            bgimg = cv2.imread(bgimages[np.random.randint(
                0, len(bgimages))])[:, :, ::-1].astype(np.float32)
        else:
            bgimg = cv2.imread(pathBgImg)[:, :, ::-1].astype(np.float32)
        img = background_replacing(img, bgimg)

    # augment image
    center = [(boundbox[0] + boundbox[2]) / 2, (boundbox[1] + boundbox[3]) / 2]
    scale = max((boundbox[2] - boundbox[0]) / 200,
                (boundbox[3] - boundbox[1]) / 200)
    img = torch.Tensor(crop(img, center, scale, [224, 224], rot=0)).permute(
        2, 0, 1) / 255
    img_in = normalize_img(img)

    # Inference
    with torch.no_grad():  # disable grad
        model.eval()
        prediction = model(
            img_in[None].repeat_interleave(options.batch_size,
                                           dim=0).to(device),
            img[None].repeat_interleave(options.batch_size, dim=0).to(device))

    return prediction, img_in, options