def demo(): net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2) net = NetWrapper(net).cuda() net = DataParallel(net) optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) model_dir = os.path.join(cfg.MODEL_DIR, "cat_demo") load_model(net.module.net, optimizer, model_dir, args.load_epoch) data, points_3d, bb8_3d = read_data() image, mask, vertex, vertex_weights, pose, corner_target = [ d.unsqueeze(0).cuda() for d in data ] seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net( image, mask, vertex, vertex_weights) eval_net = DataParallel(EvalWrapper().cuda()) corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()[0] camera_matrix = np.array([[572.4114, 0., 325.2611], [0., 573.57043, 242.04899], [0., 0., 1.]]) pose_pred = pnp(points_3d, corner_pred, camera_matrix) projector = Projector() bb8_2d_pred = projector.project(bb8_3d, pose_pred, 'linemod') bb8_2d_gt = projector.project(bb8_3d, pose[0].detach().cpu().numpy(), 'linemod') image = imagenet_to_uint8(image.detach().cpu().numpy())[0] visualize_bounding_box(image[None, ...], bb8_2d_pred[None, None, ...], bb8_2d_gt[None, None, ...])
def collect_ms_info(self): database=[] projector=Projector() model_db=LineModModelDB() for k in range(self.ms_num): data=dict() data['rgb_pth']=os.path.join(self.ms_dir, '{}.jpg'.format(k)) data['dpt_pth']=os.path.join(self.ms_dir, '{}_{}_mask.png'.format(k,self.cls_name)) # if too few foreground pts then continue mask=imread(os.path.join(self.linemod_dir,data['dpt_pth'])) if np.sum(mask)<5: continue data['RT'] = read_pickle(os.path.join(self.linemod_dir, self.ms_dir, '{}_{}_RT.pkl'.format(self.cls_name,k)))['RT'] data['cls_typ']=self.cls_name data['rnd_typ']='render_multi' data['corners']=projector.project(model_db.get_corners_3d(self.cls_name),data['RT'],'blender') data['farthest']=projector.project(model_db.get_farthest_3d(self.cls_name),data['RT'],'blender') for num in [4,12,16,20]: data['farthest{}'.format(num)]=projector.project(modeldb.get_farthest_3d(self.cls_name,num),data['RT'],'blender') data['center']=projector.project(model_db.get_centers_3d(self.cls_name)[None,:],data['RT'],'blender') data['small_bbox'] = projector.project(modeldb.get_small_bbox(self.cls_name), data['RT'], 'blender') axis_direct=np.concatenate([np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data['van_pts']=projector.project_h(axis_direct, data['RT'], 'blender') database.append(data) save_pickle(database,self.ms_pkl) return database
def collect_real_set_info(self): database=[] projector=Projector() modeldb=LineModModelDB() transformer=PoseTransformer(class_type=self.cls_name) img_num=len(os.listdir(os.path.join(self.linemod_dir,self.rgb_dir))) print(img_num) for k in range(img_num): data={} data['rgb_pth']=os.path.join(self.rgb_dir,'color_{:05}.png'.format(k)) data['dpt_pth']=os.path.join(self.mask_dir,'{}.png'.format(k)) pose=self.read_pose(os.path.join(self.rt_dir,'info_{:05}.txt'.format(k))) if len(pose)==0: # os.system('cp {} ./{:05}.png'.format(os.path.join(cfg.OCCLUSION_LINEMOD,data['rgb_pth']),k)) continue data['RT']=transformer.occlusion_pose_to_blender_pose(pose) data['cls_typ']=self.cls_name data['rnd_typ']='real' data['corners']=projector.project(modeldb.get_corners_3d(self.cls_name),data['RT'],'linemod') data['farthest']=projector.project(modeldb.get_farthest_3d(self.cls_name),data['RT'],'linemod') for num in [4,12,16,20]: data['farthest{}'.format(num)]=projector.project(modeldb.get_farthest_3d(self.cls_name,num),data['RT'],'linemod') data['center']=projector.project(modeldb.get_centers_3d(self.cls_name)[None,:],data['RT'],'linemod') data['small_bbox'] = projector.project(modeldb.get_small_bbox(self.cls_name), data['RT'], 'linemod') axis_direct=np.concatenate([np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data['van_pts']=projector.project_h(axis_direct, data['RT'], 'blender') database.append(data) save_pickle(database,self.real_pkl) return database
def collect_real_set_info(self): # linemod standard database=[] projector=Projector() modeldb=LineModModelDB() img_num=len(os.listdir(os.path.join(self.linemod_dir,self.rgb_dir))) for k in range(img_num): data={} data['rgb_pth']=os.path.join(self.rgb_dir, '{:06}.jpg'.format(k)) data['dpt_pth']=os.path.join(self.mask_dir, '{:04}.png'.format(k)) pose=read_pose(os.path.join(self.rt_dir, 'rot{}.rot'.format(k)), os.path.join(self.rt_dir, 'tra{}.tra'.format(k))) pose_transformer = PoseTransformer(class_type=self.cls_name) data['RT'] = pose_transformer.orig_pose_to_blender_pose(pose).astype(np.float32) data['cls_typ']=self.cls_name data['rnd_typ']='real' data['corners']=projector.project(modeldb.get_corners_3d(self.cls_name),data['RT'],'linemod') data['farthest']=projector.project(modeldb.get_farthest_3d(self.cls_name),data['RT'],'linemod') for num in [4,12,16,20]: data['farthest{}'.format(num)]=projector.project(modeldb.get_farthest_3d(self.cls_name,num),data['RT'],'linemod') data['center']=projector.project(modeldb.get_centers_3d(self.cls_name)[None, :],data['RT'],'linemod') data['small_bbox'] = projector.project(modeldb.get_small_bbox(self.cls_name), data['RT'], 'linemod') axis_direct=np.concatenate([np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data['van_pts']=projector.project_h(axis_direct, data['RT'], 'linemod') database.append(data) save_pickle(database,self.set_pkl) return database
def demo(): net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2) net = NetWrapper(net).cuda() net = DataParallel(net) optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) model_dir = os.path.join(cfg.MODEL_DIR, "cat_linemod_train") #cat_demo load_model(net.module.net, optimizer, model_dir, args.load_epoch) data, points_3d, bb8_3d = read_data() image, mask, vertex, vertex_weights, pose, corner_target = [ d.unsqueeze(0).cuda() for d in data ] seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net( image, mask, vertex, vertex_weights) eval_net = DataParallel(EvalWrapper().cuda()) #向量方形图,语义分割图,然后 ransac 计算 kp,,向量方向图一旦准了,kp也就准了 corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()[0] camera_matrix = np.array([[572.4114, 0., 325.2611], [0., 573.57043, 242.04899], [0., 0., 1.]]) pose_pred = pnp(points_3d, corner_pred, camera_matrix) projector = Projector() # bb8_2d_pred = projector.project(bb8_3d, pose_pred, 'linemod') bb8_2d_gt = projector.project(bb8_3d, pose[0].detach().cpu().numpy(), 'linemod') image = imagenet_to_uint8(image.detach().cpu().numpy())[0] print("loss_seg:{} , loss_vertex:{} , precision:{},recall:{}, ".format( loss_seg, loss_vertex, precision, recall)) #399.pth #loss_seg:tensor([0.0015], device='cuda:0', grad_fn=<MeanBackward0>) , loss_vertex:tensor([0.0016], device='cuda:0', grad_fn=<DivBackward1>) , #precision:tensor([0.9434], device='cuda:0'),recall:tensor([0.9677], device='cuda:0'), #199.pth # loss_seg:tensor([0.0015], device='cuda:0', grad_fn=<MeanBackward0>) , loss_vertex:tensor([0.0016], device='cuda:0', grad_fn=<DivBackward1>) , # precision:tensor([0.9583], device='cuda:0'),recall:tensor([0.9524], device='cuda:0'), erro = bb8_2d_pred - bb8_2d_gt erro = np.abs(erro) err = np.reshape(erro, (erro.size, )) #abserr = map(abs,err) print("reproject sum_error:{} ".format(np.sum(err))) ## 199 reproject sum_error:13.385891544820552 ## 399 reproject sum_error:12.718721049803733 ##看了是有提高 准召定义 准确下降,召回上升 visualize_bounding_box(image[None, ...], bb8_2d_pred[None, None, ...], bb8_2d_gt[None, None, ...])
def inference(input_image, count=0): c_timer = time.time() rgb = input_image if args.input != 'image': color = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB) rgb = color pre_start = time.time() print(pre_start - c_timer, "s BGR2RGB") #rgb = Image.open(input_image) #print(rgb.shape) start = time.time() transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) rgb = transformer(rgb) rgb = rgb.unsqueeze(0).cuda() seg_pred, vertex_pred = net(rgb) eval_net = DataParallel(EvalWrapper().cuda()) corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()[0] end = time.time() print(end - start, "s - to go from image to corner prediction") image = imagenet_to_uint8(rgb.detach().cpu().numpy())[0] pose_pred = pnp(points_3d, corner_pred, camera_matrix) projector = Projector() bb8_2d_pred = projector.project(bb8_3d, pose_pred, 'logitech') end_ = time.time() print(end_ - end, "s - to project the corners and show the result") seg_mask = torch.argmax(seg_pred, 1) if args.debug: visualize_mask(seg_mask, count) pose_test = np.array([[1, 0, 0, 0], [0, 1, 0, 0.3], [0, 0, 1, 1.2]]) print(pose_pred) #print(pose_test) bb8_2d_gt = projector.project(bb8_3d, pose_test, 'logitech') if pose_pred[2][3] < 0.4: if pose_pred[2][3] > -0.4: if isinstance(rgb, torch.Tensor): rgb = rgb.permute(0, 2, 3, 1).detach().cpu().numpy() rgb = rgb.astype(np.uint8) _, ax = plt.subplots(1) ax.imshow(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)) #plt.show() plt.savefig('temp{}.png'.format(count)) plt.close() print("image was culled due to pose being unreasonable") else: visualize_bounding_box(image[None, ...], bb8_2d_pred[None, None, ...], save=True, count=count) #,bb8_2d_gt[None, None, ...])
def collect_real_set_info(self): ''' pvnet 的数据集linemod有做更改, cat.ply 与原始的linemod中的cat.ply,有模型偏移和旋转, 所以 原始数据集中的pose真值,需要 简单变换,就是这边数据集的 pose 会这样问了,既然图片数据集 这是没有改变的,怎么RT改变呢 因为 这边提的3d特征是 新的model上取的,所以计算RT的时候, RT要变的 pose_real.pkl ''' database = [] projector = Projector() modeldb = LineModModelDB() img_num = len(os.listdir(os.path.join(self.linemod_dir, self.rgb_dir))) for k in range(img_num): data = {} data['rgb_pth'] = os.path.join(self.rgb_dir, '{:06}.jpg'.format(k)) data['dpt_pth'] = os.path.join(self.mask_dir, '{:04}.png'.format(k)) pose = read_pose(os.path.join(self.rt_dir, 'rot{}.rot'.format(k)), os.path.join(self.rt_dir, 'tra{}.tra'.format(k))) pose_transformer = PoseTransformer(class_type=self.cls_name) data['RT'] = pose_transformer.orig_pose_to_blender_pose( pose).astype(np.float32) data['cls_typ'] = self.cls_name data['rnd_typ'] = 'real' data['corners'] = projector.project( modeldb.get_corners_3d(self.cls_name), data['RT'], 'linemod') data['farthest'] = projector.project( modeldb.get_farthest_3d(self.cls_name), data['RT'], 'linemod') for num in [4, 12, 16, 20]: data['farthest{}'.format(num)] = projector.project( modeldb.get_farthest_3d(self.cls_name, num), data['RT'], 'linemod') data['center'] = projector.project( modeldb.get_centers_3d(self.cls_name)[None, :], data['RT'], 'linemod') data['small_bbox'] = projector.project( modeldb.get_small_bbox(self.cls_name), data['RT'], 'linemod') axis_direct = np.concatenate( [np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data['van_pts'] = projector.project_h(axis_direct, data['RT'], 'linemod') database.append(data) save_pickle(database, self.real_pkl) return database
def demo(): net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2) net = NetWrapper(net).cuda() net = DataParallel(net) optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) model_dir = os.path.join(cfg.MODEL_DIR, "switch_linemod_train") load_model(net.module.net, optimizer, model_dir, -1) image, points_3d, bb8_3d = read_data() image = image[None, ...] seg_pred, vertex_pred = net(image) # visualize_mask(mask) # visualize_vertex(vertex, vertex_weights) # visualize_hypothesis(image, seg_pred, vertex_pred, corner_target) # visualize_voting_ellipse(image, seg_pred, vertex_pred, corner_target) eval_net = DataParallel(EvalWrapper().cuda()) corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()[0] camera_matrix = np.array([[572.4114, 0., 325.2611], [0., 573.57043, 242.04899], [0., 0., 1.]]) pose_pred = pnp(points_3d, corner_pred, camera_matrix) projector = Projector() bb8_2d_pred = projector.project(bb8_3d, pose_pred, 'linemod') print(bb8_2d_pred) image = imagenet_to_uint8(image.detach().cpu().numpy())[0] visualize_bounding_box(image[None, ...], bb8_2d_pred[None, None, ...])
def read_data(idx): import torchvision.transforms as transforms demo_dir = os.path.join(cfg.HOMEMADE,'pipe2') #source_dir = '/home/volvomlp2/python-envs/pvnet/data/HOMEMADE/renders/intake/validation' source_dir = os.path.join(demo_dir,'..','renders','pipe2') rgb = Image.open(os.path.join(source_dir, str(idx)+'.jpg')) mask = np.array(cv2.imread(os.path.join(source_dir, str(idx)+'_depth.png'))).astype(np.int32)[..., 0] #mask[mask != 0] = 1 points_3d = np.loadtxt(os.path.join(demo_dir, 'pipe2_points_3d.txt')) bb8_3d = np.loadtxt(os.path.join(demo_dir,'corners.txt')) pose = pickle.load(open(os.path.join(source_dir,str(idx)+'_RT.pkl'),'rb'))['RT'] print("RT",pose) projector = Projector() points_2d = projector.project(points_3d, pose, 'blender') print("pts-2d",points_2d) vertex = compute_vertex(mask, points_2d) transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) rgb = transformer(rgb) #vertex = torch.tensor(vertex, dtype=torch.float32).permute(2, 0, 1) mask = torch.tensor(np.ascontiguousarray(mask), dtype=torch.int64) #vertex_weight = mask.unsqueeze(0).float() pose = torch.tensor(pose.astype(np.float32)) #points_2d = torch.tensor(points_2d.astype(np.float32)) data = (rgb, mask, pose) return data, bb8_3d
def read_data(): import torchvision.transforms as transforms demo_dir_path = os.path.join(cfg.DATA_DIR, 'demo') rgb = Image.open(os.path.join(demo_dir_path, 'cat.jpg')) mask = np.array(Image.open(os.path.join( demo_dir_path, 'cat_mask.png'))).astype(np.int32)[..., 0] mask[mask != 0] = 1 points_3d = np.loadtxt(os.path.join(demo_dir_path, 'cat_points_3d.txt')) bb8_3d = np.loadtxt(os.path.join(demo_dir_path, 'cat_bb8_3d.txt')) pose = np.load(os.path.join(demo_dir_path, 'cat_pose.npy')) projector = Projector() points_2d = projector.project(points_3d, pose, 'linemod') vertex = compute_vertex(mask, points_2d) transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) rgb = transformer(rgb) vertex = torch.tensor(vertex, dtype=torch.float32).permute(2, 0, 1) mask = torch.tensor(np.ascontiguousarray(mask), dtype=torch.int64) vertex_weight = mask.unsqueeze(0).float() pose = torch.tensor(pose.astype(np.float32)) points_2d = torch.tensor(points_2d.astype(np.float32)) data = (rgb, mask, vertex, vertex_weight, pose, points_2d) return data, points_3d, bb8_3d
def demo(): net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2) net = NetWrapper(net).cuda() net = DataParallel(net) optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) model_dir = os.path.join(cfg.MODEL_DIR, "cat_demo") load_model(net.module.net, optimizer, model_dir, -1) data, points_3d, bb8_3d = read_data() #print("BB8_3D: ",bb8_3d) image, mask, vertex, vertex_weights, pose, corner_target = [ d.unsqueeze(0).cuda() for d in data ] seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net( image, mask, vertex, vertex_weights) seg_mask = torch.argmax(seg_pred, 1) print("seg_mask", seg_mask, type(seg_mask), seg_mask.shape, seg_mask[0]) visualize_mask(seg_mask) visualize_mask(mask) #visualize_vertex(vertex, vertex_weights) #visualize_hypothesis(image, seg_pred, vertex_pred, corner_target) visualize_voting_ellipse(image, seg_pred, vertex_pred, corner_target) eval_net = DataParallel(EvalWrapper().cuda()) corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()[0] print("Corner Predictions: ", corner_pred) camera_matrix = np.array([[572.4114, 0., 325.2611], [0., 573.57043, 242.04899], [0., 0., 1.]]) pose_pred = pnp(points_3d, corner_pred, camera_matrix) projector = Projector() bb8_2d_pred = projector.project(bb8_3d, pose_pred, 'linemod') print("Pose prediction :\n", pose_pred) print("GT pose: \n", pose[0].detach().cpu().numpy()) bb8_2d_gt = projector.project(bb8_3d, pose[0].detach().cpu().numpy(), 'linemod') print(bb8_2d_gt) image = imagenet_to_uint8(image.detach().cpu().numpy())[0] visualize_bounding_box(image[None, ...], bb8_2d_pred[None, None, ...], bb8_2d_gt[None, None, ...])
def demo(idx): data, bb8_3d = read_data(idx) print("BB8_3D: ",bb8_3d) image, mask, pose = [d.unsqueeze(0).cuda() for d in data] projector = Projector() bb8_2d_gt = projector.project(bb8_3d, pose[0].detach().cpu().numpy(), 'blender') print(bb8_2d_gt) image = imagenet_to_uint8(image.detach().cpu().numpy())[0] visualize_bounding_box(image[None, ...], bb8_2d_gt[None, None, ...])
def get_dataset(num=10): dataset=[] projector=Projector() modeldb=LineModModelDB() for k in range(num): data={} data['rgb_pth']='special/duck/{}.jpg'.format(k) data['dpt_pth']='special/duck/{}_depth.png'.format(k) data['RT']=read_pickle(os.path.join(cfg.LINEMOD,'special/duck/{}_RT.pkl'.format(k)))['RT'] data['center']=projector.project(modeldb.get_centers_3d('duck'),data['RT'],'blender') data['rnd_typ']='render' dataset.append(data) return dataset
def collect_render_set_info(self,pkl_file,render_dir,format='jpg'): database=[] # blender standard projector=Projector() modeldb=LineModModelDB() for k in range(self.render_num): data={} data['rgb_pth']=os.path.join(render_dir,'{}.{}'.format(k,format)) data['RT']=read_pickle(os.path.join(self.linemod_dir,render_dir,'{}_RT.pkl'.format(k)))['RT'] data['cls_typ']=self.cls_name data['rnd_typ']='render' data['corners']=projector.project(modeldb.get_corners_3d(self.cls_name),data['RT'],'blender') data['farthest']=projector.project(modeldb.get_farthest_3d(self.cls_name),data['RT'],'blender') data['center']=projector.project(modeldb.get_centers_3d(self.cls_name)[None,:],data['RT'],'blender') for num in [4,12,16,20]: data['farthest{}'.format(num)]=projector.project(modeldb.get_farthest_3d(self.cls_name,num),data['RT'],'blender') data['small_bbox'] = projector.project(modeldb.get_small_bbox(self.cls_name), data['RT'], 'blender') axis_direct=np.concatenate([np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data['van_pts']=projector.project_h(axis_direct, data['RT'], 'blender') database.append(data) save_pickle(database,pkl_file) return database
class YCBDB(object): def __init__(self, class_type): self.class_type = class_type self.data_dir_path = os.path.join(cfg.YCB, 'data') self.rgb_pattern = os.path.join(self.data_dir_path, '{:04}/{:06}-color.png') self.projector = Projector() def validate_pose(self): rgb_path = '/home/pengsida/Datasets/YCB/renders/{}/0.jpg'.format( self.class_type) pose_path = '/home/pengsida/Datasets/YCB/renders/{}/0_RT.pkl'.format( self.class_type) model_path = '/home/pengsida/Datasets/YCB/models/{}/points.xyz'.format( self.class_type) img = np.array(Image.open(rgb_path)) pose = read_pickle(pose_path)['RT'] model_3d = np.loadtxt(model_path) model_2d = self.projector.project(model_3d, pose, 'blender') import matplotlib.pyplot as plt plt.imshow(img) plt.plot(model_2d[:, 0], model_2d[:, 1], 'r.') plt.show() @staticmethod def get_proper_crop_size(): mask_paths = glob.glob( '/home/pengsida/Datasets/YCB/renders/003_cracker_box/*_depth.png') widths = [] heights = [] for mask_path in mask_paths: mask = np.array(Image.open(mask_path)) row_col = np.argwhere(mask == 1) min_row, max_row = np.min(row_col[:, 0]), np.max(row_col[:, 0]) min_col, max_col = np.min(row_col[:, 1]), np.max(row_col[:, 1]) width = max_col - min_col height = max_row - min_row widths.append(width) heights.append(height) widths = np.array(widths) heights = np.array(heights) inds = np.lexsort([heights, widths]) print('min width: {}, max width: {}'.format(np.min(widths), np.max(widths))) print('min height: {}, max height: {}'.format(np.min(heights), np.max(heights))) print('min size: {}, {}'.format(heights[inds[0]], widths[inds[0]])) print('max size: {}, {}'.format(heights[inds[-1]], widths[inds[-1]]))
def collect_val_render(self, pkl_file, render_dir, format='jpg'): database = [] projector = Projector() modeldb = HomemadeModelDB() for k in range(3482, 3499): data = {} print(os.path.join(self.render_val_dir, '{}.{}'.format(k, format))) data['rgb_pth'] = os.path.join(self.render_val_dir, '{}.{}'.format(k, format)) data['dpt_pth'] = os.path.join(self.render_val_dir, '{}_depth.png'.format(k)) data['RT'] = read_pickle( os.path.join(self.homemade_dir, self.render_val_dir, '{}_RT.pkl'.format(k)))['RT'] data['cls_typ'] = self.cls_name data['rnd_typ'] = 'render' data['corners'] = projector.project( modeldb.get_corners_3d(self.cls_name), data['RT'], 'blender') data['farthest'] = projector.project( modeldb.get_farthest_3d(self.cls_name), data['RT'], 'blender') data['center'] = projector.project( modeldb.get_centers_3d(self.cls_name)[None, :], data['RT'], 'blender') for num in [4, 12, 16, 20]: data['farthest{}'.format(num)] = projector.project( modeldb.get_farthest_3d(self.cls_name, num), data['RT'], 'blender') data['small_bbox'] = projector.project( modeldb.get_small_bbox(self.cls_name), data['RT'], 'blender') axis_direct = np.concatenate( [np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data['van_pts'] = projector.project_h(axis_direct, data['RT'], 'blender') database.append(data) print("collectval successful?: length = ", len(database)) save_pickle(database, pkl_file) return database
def read_data(): import torchvision.transforms as transforms demo_dir_path = os.path.join(cfg.DATA_DIR, 'demo_driller_real') rgb = Image.open(os.path.join(demo_dir_path, 'driller2.jpg')) #掩码是一个真值, driller_mask mask = np.array(Image.open(os.path.join(demo_dir_path, '1168.png'))).astype(np.int32)[..., 0] # 全1 ,,没有概率 权重 mask[mask != 0] = 1 #9个点3d 关键点 fps提取 driller_points_3d.txt points_3d = np.loadtxt(os.path.join(demo_dir_path, 'farthest9.txt')) #Bbox 3d bb8_3d = np.loadtxt(os.path.join(demo_dir_path, 'driller_bb8_3d.txt')) #位姿真值 pose = np.load(os.path.join(demo_dir_path, 'driller_pose.npy')) projector = Projector() #内can rt points_2d = projector.project(points_3d, pose, 'linemod') #向量方向图 真值 关键点points_2d 结合mask,反求一张 顶点向量图,作为基准 vertex = compute_vertex(mask, points_2d) #这个均值怎么拿到的?? transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) rgb = transformer(rgb) vertex = torch.tensor(vertex, dtype=torch.float32).permute(2, 0, 1) mask = torch.tensor(np.ascontiguousarray(mask), dtype=torch.int64) #论文中公式234没有呢 vertex_weight = mask.unsqueeze(0).float() pose = torch.tensor(pose.astype(np.float32)) points_2d = torch.tensor(points_2d.astype(np.float32)) data = (rgb, mask, vertex, vertex_weight, pose, points_2d) return data, points_3d, bb8_3d
def getval_dataset(num=15): dataset = [] projector = Projector() modeldb = HomemadeModelDB() source_dir = '/media/volvomlp2/03C796544677EF72/BBB/HOMEMADE/ladderframe/validation/' for k in range(3482, 3482 + num): print(k) data = {} data['rgb_pth'] = os.path.join(source_dir, '{}.jpg'.format(k)) data['dpt_pth'] = os.path.join(source_dir, '{}_depth.png'.format(k)) data['RT'] = read_pickle( os.path.join(source_dir, '{}_RT.pkl'.format(k)))['RT'] data['center'] = projector.project( modeldb.get_centers_3d('intake'), data['RT'], 'blender') data['rnd_typ'] = 'render' dataset.append(data) return dataset
def read_data(idx): import torchvision.transforms as transforms demo_dir = os.path.join(cfg.DATA_DIR, 'demo', 'pipe2') source_dir = '/media/volvomlp2/03C796544677EF72/BBB/HOMEMADE/renders/pipe2/' #source_dir = os.path.join(demo_dir,'source') rgb = Image.open(os.path.join(source_dir, str(idx) + '.jpg')) mask = np.array( Image.open(os.path.join(source_dir, str(idx) + '_depth.png'))).astype(np.int32) #mask = np.array(cv2.imread(os.path.join(source_dir, str(idx)+'_depth.png'))).astype(np.int32)[..., 0] mask[mask != 0] = 1 #print(mask,"-",type(mask),"-",mask.shape) points_3d = np.loadtxt( os.path.join(cfg.HOMEMADE, 'pipe2', 'pipe2_points_3d.txt')) ##DEEMO DIR bb8_3d = np.loadtxt(os.path.join(cfg.HOMEMADE, 'pipe2', 'corners.txt')) pose = pickle.load( open(os.path.join(source_dir, str(idx) + '_RT.pkl'), 'rb'))['RT'] #print("RT",pose) projector = Projector() points_2d = projector.project(points_3d, pose, 'blender') #print("pts-2d",points_2d) vertex = compute_vertex(mask, points_2d) transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) rgb = transformer(rgb) vertex = torch.tensor(vertex, dtype=torch.float32).permute(2, 0, 1) mask = torch.tensor(np.ascontiguousarray(mask), dtype=torch.int64) vertex_weight = mask.unsqueeze(0).float() pose = torch.tensor(pose.astype(np.float32)) points_2d = torch.tensor(points_2d.astype(np.float32)) data = (rgb, mask, vertex, vertex_weight, pose, points_2d) return data, points_3d, bb8_3d
def read_data(): import torchvision.transforms as transforms demo_dir_path = os.path.join(cfg.DATA_DIR, 'demo', 'cat') rgb = Image.open(os.path.join(demo_dir_path, '3.jpg')) mask = np.array(Image.open(os.path.join(demo_dir_path, 'new.png'))).astype(np.int32) mask[mask != 0] = 1 points_3d = np.loadtxt(os.path.join(demo_dir_path, 'cat_points_3d.txt')) bb8_3d = np.loadtxt(os.path.join(demo_dir_path, 'cat_bb8_3d.txt')) #pose = np.load('/home/volvomlp2/python-envs/pvnet/data/deme/intake_pose.npy') #print("cat",pose) pose = pickle.load(open(os.path.join(demo_dir_path, '3_RT.pkl'), 'rb'))['RT'] #pose = np.loadtxt(os.path.join(demo_dir_path,'pose.txt')) #print(pose) #print(os.getcwd()) #np.save('intake_poseTHIS.npy', pose) projector = Projector() points_2d = projector.project(points_3d, pose, 'linemod') #print("pts-2d",points_2d) #print("mask",mask,type(mask),mask.shape, mask[0]) vertex = compute_vertex(mask, points_2d) transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) rgb = transformer(rgb) vertex = torch.tensor(vertex, dtype=torch.float32).permute(2, 0, 1) mask = torch.tensor(np.ascontiguousarray(mask), dtype=torch.int64) vertex_weight = mask.unsqueeze(0).float() pose = torch.tensor(pose.astype(np.float32)) points_2d = torch.tensor(points_2d.astype(np.float32)) data = (rgb, mask, vertex, vertex_weight, pose, points_2d) return data, points_3d, bb8_3d
# camera_matrix = np.array([[572.4114, 0., 325.2611], # [0., 573.57043, 242.04899], # [0., 0., 1.]]) # pose_pred = pnp(points_3d, corner_pred, camera_matrix) # projector = Projector() # bb8_2d_pred = projector.project(bb8_3d, pose_pred, 'linemod') # bb8_2d_gt = projector.project(bb8_3d, pose[0].detach().cpu().numpy(), 'linemod') # image = imagenet_to_uint8(image.detach().cpu().numpy())[0] # visualize_bounding_box(image[None, ...], bb8_2d_pred[None, None, ...], bb8_2d_gt[None, None, ...]) ======= seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net(image, mask, vertex, vertex_weights) raise TypeError eval_net = DataParallel(EvalWrapper().cuda()) corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()[0] camera_matrix = np.array([[572.4114, 0., 325.2611], [0., 573.57043, 242.04899], [0., 0., 1.]]) pose_pred = pnp(points_3d, corner_pred, camera_matrix) projector = Projector() bb8_2d_pred = projector.project(bb8_3d, pose_pred, 'linemod') bb8_2d_gt = projector.project(bb8_3d, pose[0].detach().cpu().numpy(), 'linemod') image = imagenet_to_uint8(image.detach().cpu().numpy())[0] visualize_bounding_box(image[None, ...], bb8_2d_pred[None, None, ...], bb8_2d_gt[None, None, ...]) >>>>>>> 2c722555563b8a77e36b246d82747754cf8dfae7 if __name__ == "__main__": demo()
def demo(): net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2) net = NetWrapper(net).cuda() net = DataParallel(net) optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) model_dir = os.path.join(cfg.MODEL_DIR, 'cat_demo') load_model(net.module.net, optimizer, model_dir, args.load_epoch) data, points_3d, bb8_3d = read_data() image, mask, vertex, vertex_weights, pose, corner_target = [ d.unsqueeze(0).cuda() for d in data ] # Run the net seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net( image, mask, vertex, vertex_weights) print('vertex_pred.shape') print(vertex_pred.shape) print(' ') print('vertex_pred[0]') print(vertex_pred) print(' ') # Various visualizations #visualize_vertex_field(vertex_pred,vertex_weights, keypointIdx=3) print('asdasdsadas') print(seg_pred.shape, mask.shape) visualize_mask(np.squeeze(seg_pred.cpu().detach().numpy()), mask.cpu().detach().numpy()) rgb = Image.open('data/demo/cat.jpg') img = np.array(rgb) #visualize_overlap_mask(img, np.squeeze(seg_pred.cpu().detach().numpy()), None) # Run the ransac voting eval_net = DataParallel(EvalWrapper2().cuda()) #corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()[0] corner_pred, covar = [ x.cpu().detach().numpy()[0] for x in eval_net(seg_pred, vertex_pred) ] print('Keypoint predictions:') print(corner_pred) print(' ') print('covar: ', covar) print(' ') camera_matrix = np.array([[572.4114, 0., 325.2611], [0., 573.57043, 242.04899], [0., 0., 1.]]) # Fit pose to points #pose_pred = pnp(points_3d, corner_pred, camera_matrix) #evaluator = Evaluator() #pose_pred = evaluator.evaluate_uncertainty(corner_pred,covar,pose,'cat',intri_matrix=camera_matrix) def getWeights(covar): cov_invs = [] for vi in range(covar.shape[0]): # For every keypoint if covar[vi, 0, 0] < 1e-6 or np.sum(np.isnan(covar)[vi]) > 0: cov_invs.append(np.zeros([2, 2]).astype(np.float32)) continue cov_inv = np.linalg.inv(scipy.linalg.sqrtm(covar[vi])) cov_invs.append(cov_inv) cov_invs = np.asarray(cov_invs) # pn,2,2 weights = cov_invs.reshape([-1, 4]) weights = weights[:, (0, 1, 3)] return weights weights = getWeights(covar) pose_pred = uncertainty_pnp(corner_pred, weights, points_3d, camera_matrix) print('Predicted pose: \n', pose_pred) print('Ground truth pose: \n', pose[0].detach().cpu().numpy()) print(' ') projector = Projector() bb8_2d_pred = projector.project(bb8_3d, pose_pred, 'linemod') bb8_2d_gt = projector.project(bb8_3d, pose[0].detach().cpu().numpy(), 'linemod') image = imagenet_to_uint8(image.detach().cpu().numpy())[0] visualize_points(image[None, ...], corner_target.detach().cpu().numpy(), pts_pred=corner_pred[None, :, :]) visualize_bounding_box(image[None, ...], bb8_2d_pred[None, None, ...], bb8_2d_gt[None, None, ...])
def demo(idx): net = Resnet18_8s(ver_dim=vote_num * 2, seg_dim=2) net = NetWrapper(net).cuda() net = DataParallel(net) optimizer = optim.Adam(net.parameters(), lr=train_cfg['lr']) model_dir = os.path.join(cfg.MODEL_DIR, "intake_demo") load_model(net.module.net, optimizer, model_dir, -1) data, points_3d, bb8_3d = read_data(idx) #print("BB8_3D: ",bb8_3d) image, mask, vertex, vertex_weights, pose, corner_target = [ d.unsqueeze(0).cuda() for d in data ] seg_pred, vertex_pred, loss_seg, loss_vertex, precision, recall = net( image, mask, vertex, vertex_weights) seg_mask = torch.argmax(seg_pred, 1) visualize_mask(seg_mask) visualize_mask(mask) #visualize_vertex(vertex, vertex_weights) #visualize_hypothesis(image, seg_pred, vertex_pred, corner_target) #visualize_voting_ellipse(image, seg_pred, vertex_pred, corner_target) ############# eval_net = DataParallel(EvalWrapper().cuda()) uncertain_eval_net = DataParallel(UncertaintyEvalWrapper().cuda()) corner_pred = eval_net(seg_pred, vertex_pred).cpu().detach().numpy()[0] net.eval() loss_seg, loss_vertex, precision, recall = [ torch.mean(val) for val in (loss_seg, loss_vertex, precision, recall) ] print("LOSS SEG :", loss_seg, "\nLOSS VERTEX : ", loss_vertex, "\nPRECISION :", precision, '\nRECALL :', recall) ############### #print("Corner Predictions: ",corner_pred) camera_matrix = np.array([[700, 0., 320.], [0., 700, 240.], [0., 0., 1.]]) pose_pred = pnp(points_3d, corner_pred, camera_matrix) projector = Projector() print("Pose prediction :\n", pose_pred) pose_gt = pose[0].detach().cpu().numpy() print("GT Pose :\n", pose[0].detach().cpu().numpy()) s = 0 import math as m for i in range(3): if pose_pred[2][3] < 0: print('NB!') s += (pose_pred[i][3] - pose_gt[i][3])**2 s = m.sqrt(s) print("--->", loss_seg.detach().cpu().numpy(), loss_vertex.detach().cpu().numpy(), precision.detach().cpu().numpy(), recall.detach().cpu().numpy(), s) bb8_2d_pred = projector.project(bb8_3d, pose_pred, 'blender') bb8_2d_gt = projector.project(bb8_3d, pose[0].detach().cpu().numpy(), 'blender') #print(bb8_2d_gt) image = imagenet_to_uint8(image.detach().cpu().numpy())[0] visualize_bounding_box(image[None, ...], bb8_2d_pred[None, None, ...], bb8_2d_gt[None, None, ...])