def process_image(img_file, bbox_file, openpose_file, input_res=224): """Read image, do preprocessing and possibly crop it according to the bounding box. If there are bounding box annotations, use them to crop the image. If no bounding box is specified but openpose detections are available, use them to get the bounding box. """ normalize_img = Normalize(mean=cfg.IMG_NORM_MEAN, std=cfg.IMG_NORM_STD) img = cv2.imread(img_file)[:, :, ::-1].copy( ) # PyTorch does not support negative stride at the moment if bbox_file is None and openpose_file is None: # Assume that the person is centerered in the image height = img.shape[0] width = img.shape[1] center = np.array([width // 2, height // 2]) scale = max(height, width) / 200 else: if bbox_file is not None: center, scale = bbox_from_pkl(bbox_file) elif openpose_file is not None: center, scale = bbox_from_openpose(openpose_file) # plt.figure() # plt.imshow(img) # plt.show() # plt.imshow(img[int(bbox[0]):int(bbox[2]), int(bbox[1]):int(bbox[3])]) # plt.show() img = crop(img, center, scale, (input_res, input_res)) img = img.astype(np.float32) / 255. img = torch.from_numpy(img).permute(2, 0, 1) norm_img = normalize_img(img.clone())[None] return img, norm_img
def process_image(img_file, bbox_file, openpose_file, input_res=224): """Read image, do preprocessing and possibly crop it according to the bounding box. If there are bounding box annotations, use them to crop the image. If no bounding box is specified but openpose detections are available, use them to get the bounding box. """ normalize_img = Normalize(mean=constants.IMG_NORM_MEAN, std=constants.IMG_NORM_STD) img = cv2.imread(img_file)[:, :, ::-1].copy( ) # PyTorch does not support negative stride at the moment if bbox_file is None and openpose_file is None: # Assume that the person is centerered in the image height = img.shape[0] width = img.shape[1] center = np.array([width // 2, height // 2]) scale = max(height, width) / 200 else: if bbox_file is not None: center, scale = bbox_from_json(bbox_file) elif openpose_file is not None: center, scale = bbox_from_openpose(openpose_file) img = crop(img, center, scale, (input_res, input_res)) img = img.astype(np.float32) / 255. # i1 = img # pdb.set_trace() # img = np.dot(img[...,:3], [0.2989, 0.5870, 0.1140]) # img = np.expand_dims(img, 2) # img = np.concatenate((img, img, img), axis=2).astype(np.float32) # pdb.set_trace() img = torch.from_numpy(img).permute(2, 0, 1) norm_img = normalize_img(img.clone())[None] return img, norm_img
def rgb_processing(self, rgb_img, center, scale, rot, flip, pn, is_train): rgb_img = crop(rgb_img.copy(), center, scale, [constants.IMG_RES, constants.IMG_RES], rot=rot) if is_train: if flip: rgb_img = flip_img(rgb_img) rgb_img = np.transpose(rgb_img.astype('float32'),(2,0,1))/255.0 return rgb_img
def rgb_processing(self, rgb_img, center, scale): """Process rgb image and do augmentation.""" rgb_img = crop(rgb_img, center, scale, [constants.IMG_RES, constants.IMG_RES], rot=0) # (3,224,224),float,[0,1] rgb_img = np.transpose(rgb_img.astype('float32'), (2, 0, 1)) / 255.0 return rgb_img
def rgb_processing(self, rgb_img, center, scale, rot, flip, pn): """Process rgb image and do augmentation.""" # crop and rotate the image if self.use_augmentation_rot: rgb_img = crop(rgb_img, center, scale, [self.options.img_res, self.options.img_res], rot=rot) else: rgb_img = crop(rgb_img, center, scale, [self.options.img_res, self.options.img_res], rot=0) # flip the image if flip: rgb_img = flip_img(rgb_img) # in the rgb image we add pixel noise in a channel-wise manner if self.use_augmentation_rgb: rgb_img[:,:,0] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,0]*pn[0])) rgb_img[:,:,1] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,1]*pn[1])) rgb_img[:,:,2] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,2]*pn[2])) # (3,224,224),float,[0,1] rgb_img = np.transpose(rgb_img.astype('float32'),(2,0,1))/255.0 return rgb_img
def rgb_processing(self, rgb_img, center, scale, rot, flip, pn): """Process rgb image and do augmentation.""" rgb_img = crop(rgb_img, center, scale, [constants.IMG_RES, constants.IMG_RES], rot=rot) # flip the image if flip: rgb_img = flip_img(rgb_img) # in the rgb image we add pixel noise in a channel-wise manner rgb_img[:,:,0] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,0]*pn[0])) rgb_img[:,:,1] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,1]*pn[1])) rgb_img[:,:,2] = np.minimum(255.0, np.maximum(0.0, rgb_img[:,:,2]*pn[2])) # (3,224,224),float,[0,1] rgb_img = np.transpose(rgb_img.astype('float32'),(2,0,1))/255.0 return rgb_img
def process_image(img_file, bbox_file, input_res=224): normalize_img = Normalize(mean=constants.IMG_NORM_MEAN, std=constants.IMG_NORM_STD) img = cv2.imread(img_file)[:, :, ::-1].copy() if bbox_file is None: height = img.shape[0] width = img.shape[1] center = np.array([width // 2, height // 2]) scale = max(height, width) / 200 else: center, scale = bbox_from_pkl(bbox_file) img = crop(img, center, scale, (input_res, input_res)) img = img.astype(np.float32) / 255. img = torch.from_numpy(img).permute(2, 0, 1) norm_img = normalize_img(img.clone())[None] return img, norm_img
def process_image(img_file, bbox_file, openpose_file, input_res=224): """Read image, do preprocessing and possibly crop it according to the bounding box. If there are bounding box annotations, use them to crop the image. If no bounding box is specified but openpose detections are available, use them to get the bounding box. """ img = cv2.imread(img_file)[:, :, ::-1].copy( ) # PyTorch does not support negative stride at the moment if bbox_file is None and openpose_file is None: # Assume that the person is centerered in the image height = img.shape[0] width = img.shape[1] center = np.array([width // 2, height // 2]) scale = max(height, width) / 200 else: if bbox_file is not None: center, scale = bbox_from_json(bbox_file) elif openpose_file is not None: center, scale = bbox_from_openpose(openpose_file) img = crop(img, center, scale, (input_res, input_res)) img = img.astype(np.float32) / 255. img = torch.from_numpy(img).permute(2, 0, 1) return img
def inference_structure(pathCkp: str, pathImg: str = None, pathBgImg: str = None): print('If trained locally and renamed the workspace, do not for get to ' 'change the "checkpoint_dir" in config.json. ') # Load configuration with open(pjn(pathCkp, 'config.json'), 'r') as f: options = json.load(f) options = namedtuple('options', options.keys())(**options) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') mesh = Mesh(options, options.num_downsampling) # read SMPL .bj file to get uv coordinates _, smpl_tri_ind, uv_coord, tri_uv_ind = read_Obj(options.smpl_objfile_path) uv_coord[:, 1] = 1 - uv_coord[:, 1] expUV = uv_coord[tri_uv_ind.flatten()] unique, index = np.unique(smpl_tri_ind.flatten(), return_index=True) smpl_verts_uvs = torch.as_tensor(expUV[index, :]).float().to(device) smpl_tri_ind = torch.as_tensor(smpl_tri_ind).to(device) # load average pose and shape and convert to camera coodinate; # avg pose is decided by the image id we use for training (0-11) avgPose_objCoord = np.load(options.MGN_avgPose_path) avgPose_objCoord[:3] = rotationMatrix_to_axisAngle( # for 0,6, front only torch.tensor([[[1, 0, 0], [0, -1, 0], [0, 0, -1]]])) avgPose = \ axisAngle_to_Rot6d( torch.Tensor(avgPose_objCoord[None]).reshape(-1, 3) ).reshape(1, -1).to(device) avgBeta = \ torch.Tensor( np.load(options.MGN_avgBeta_path)[None]).to(device) avgCam = torch.Tensor([1.2755, 0, 0])[None].to(device) # 1.2755 is for our settings # Create model model = frameVIBE(options.smpl_model_path, mesh, avgPose, avgBeta, avgCam, options.num_channels, options.num_layers, smpl_verts_uvs, smpl_tri_ind).to(device) optimizer = torch.optim.Adam(params=list(model.parameters())) models_dict = {options.model: model} optimizers_dict = {'optimizer': optimizer} # Load pretrained model saver = CheckpointSaver(save_dir=options.checkpoint_dir) saver.load_checkpoint(models_dict, optimizers_dict, checkpoint_file=options.checkpoint) # Prepare and preprocess input image pathToObj = '/'.join(pathImg.split('/')[:-2]) cameraIdx = int(pathImg.split('/')[-1].split('_')[0][6:]) with open( pjn(pathToObj, 'rendering/camera%d_boundingbox.txt' % (cameraIdx))) as f: boundbox = literal_eval(f.readline()) IMG_NORM_MEAN = [0.485, 0.456, 0.406] IMG_NORM_STD = [0.229, 0.224, 0.225] normalize_img = Normalize(mean=IMG_NORM_MEAN, std=IMG_NORM_STD) path_to_rendering = '/'.join(pathImg.split('/')[:-1]) cameraPath, lightPath = pathImg.split('/')[-1].split('_')[:2] cameraIdx, _ = int(cameraPath[6:]), int(lightPath[5:]) with open(pjn(path_to_rendering, 'camera%d_boundingbox.txt' % (cameraIdx))) as f: boundbox = literal_eval(f.readline()) img = cv2.imread(pathImg)[:, :, ::-1].astype(np.float32) # prepare background if options.replace_background: if pathBgImg is None: bgimages = [] for subfolder in sorted( glob(pjn(options.bgimg_dir, 'images/validation/*'))): for subsubfolder in sorted(glob(pjn(subfolder, '*'))): if 'room' in subsubfolder: bgimages += sorted(glob(pjn(subsubfolder, '*.jpg'))) bgimg = cv2.imread(bgimages[np.random.randint( 0, len(bgimages))])[:, :, ::-1].astype(np.float32) else: bgimg = cv2.imread(pathBgImg)[:, :, ::-1].astype(np.float32) img = background_replacing(img, bgimg) # augment image center = [(boundbox[0] + boundbox[2]) / 2, (boundbox[1] + boundbox[3]) / 2] scale = max((boundbox[2] - boundbox[0]) / 200, (boundbox[3] - boundbox[1]) / 200) img = torch.Tensor(crop(img, center, scale, [224, 224], rot=0)).permute( 2, 0, 1) / 255 img_in = normalize_img(img) # Inference with torch.no_grad(): # disable grad model.eval() prediction = model( img_in[None].repeat_interleave(options.batch_size, dim=0).to(device), img[None].repeat_interleave(options.batch_size, dim=0).to(device)) return prediction, img_in, options