def get_world_to_view_transform(self, **kwargs) -> Transform3d: """ Return the world-to-view transform. Args: **kwargs: parameters for the camera extrinsics can be passed in as keyword arguments to override the default values set in __init__. Setting R and T here will update the values set in init as these values may be needed later on in the rendering pipeline e.g. for lighting calculations. Returns: T: a Transform3d object which represents a batch of transforms of shape (N, 3, 3) """ R = self.R = kwargs.get("R", self.R) # pyre-ignore[16] T = self.T = kwargs.get("T", self.T) # pyre-ignore[16] if T.shape[0] != R.shape[0]: msg = "Expected R, T to have the same batch dimension; got %r, %r" raise ValueError(msg % (R.shape[0], T.shape[0])) if T.dim() != 2 or T.shape[1:] != (3,): msg = "Expected T to have shape (N, 3); got %r" raise ValueError(msg % repr(T.shape)) if R.dim() != 3 or R.shape[1:] != (3, 3): msg = "Expected R to have shape (N, 3, 3); got %r" raise ValueError(msg % R.shape) # Create a Transform3d object T = Translate(T, device=T.device) R = Rotate(R, device=R.device) world_to_view_transform = R.compose(T) return world_to_view_transform
def prepare_pose(self, p: dict) -> Transform3d: # transform evimo coordinate system to pytorch3d coordinate system pos_t = self.evimo_to_pytorch3d_xyz(p) pos_R = self.evimo_to_pytorch3d_Rotation(p) R_tmp = Rotate(pos_R) w2v_transform = R_tmp.translate(pos_t) return Transform3d(matrix=w2v_transform.get_matrix())
def __init__(self, points, normals=None, features=None, to_unit_sphere: bool = False, to_unit_box: bool = False, to_axis_aligned: bool = False, up=((0, 1, 0), ), front=((0, 0, 1), )): """ Args: points, normals: points in world coordinates (unnormalized and unaligned) Pointclouds in pytorch3d features: can be a dict {name: value} where value can be any acceptable form as the pytorch3d.Pointclouds to_unit_box (bool): transform to unit box (sidelength = 1) to_axis_aligned (bool): rotate the object using the up and front vectors up: the up direction in world coordinate (will be justified to object) front: front direction in the world coordinate (will be justified to z-axis) """ super().__init__(points, normals=normals, features=features) self.obj2world_trans = Transform3d() # rotate object to have up direction (0, 1, 0) # and front direction (0, 0, -1) # (B,3,3) rotation to transform to axis-aligned point clouds if to_axis_aligned: self.obj2world_trans = Rotate(look_at_rotation(((0, 0, 0), ), at=front, up=up), device=self.device) world_to_obj_rotate_trans = self.obj2world_trans.inverse() # update points, normals self.update_points_( world_to_obj_rotate_trans.transform_points( self.points_packed())) normals_packed = self.normals_packed() if normals_packed is not None: self.update_normals_( world_to_obj_rotate_trans.transform_normals( normals_packed)) # normalize to unit box and update obj2world_trans if to_unit_box: normalizing_trans = self.normalize_to_box_() elif to_unit_sphere: normalizing_trans = self.normalize_to_sphere_()
def test(model, loader, num_class=40): mean_correct = [] class_acc = np.zeros((num_class,3)) for j, data in tqdm(enumerate(loader), total=len(loader)): points, target = data trot = None if args.rot == 'z': trot = RotateAxisAngle(angle=torch.rand(points.shape[0])*360, axis="Z", degrees=True) elif args.rot == 'so3': trot = Rotate(R=random_rotations(points.shape[0])) if trot is not None: points = trot.transform_points(points) target = target[:, 0] points = points.transpose(2, 1) points, target = points.cuda(), target.cuda() classifier = model.eval() pred, _ = classifier(points) pred_choice = pred.data.max(1)[1] for cat in np.unique(target.cpu()): classacc = pred_choice[target==cat].eq(target[target==cat].long().data).cpu().sum() class_acc[cat,0]+= classacc.item()/float(points[target==cat].size()[0]) class_acc[cat,1]+=1 correct = pred_choice.eq(target.long().data).cpu().sum() mean_correct.append(correct.item()/float(points.size()[0])) class_acc[:,2] = class_acc[:,0]/ class_acc[:,1] class_acc = np.mean(class_acc[:,2]) instance_acc = np.mean(mean_correct) return instance_acc, class_acc
def get_tri_color_lights_for_view(cams, has_specular=False, point_lights=True): """ Create RGB lights direction in the half dome The direction is given in the same coordinates as the pointcloud Args: cams Returns: Lights with three RGB light sources (B: right, G: left, R: bottom) """ import math from DSS.core.lighting import (DirectionalLights, PointLights) from pytorch3d.renderer.cameras import look_at_rotation from pytorch3d.transforms import Rotate elev = torch.tensor(((30, 30, 30), ), device=cams.device) azim = torch.tensor(((-60, 60, 180), ), device=cams.device) elev = math.pi / 180.0 * elev azim = math.pi / 180.0 * azim x = torch.cos(elev) * torch.sin(azim) y = torch.sin(elev) z = torch.cos(elev) * torch.cos(azim) light_directions = torch.stack([x, y, z], dim=-1) cam_pos = cams.get_camera_center() R = look_at_rotation(torch.zeros_like(cam_pos), at=F.normalize(torch.cross(cam_pos, torch.rand_like(cam_pos)), dim=-1), up=cam_pos) light_directions = Rotate(R=R.transpose( 1, 2), device=cams.device).transform_points(light_directions) # trimesh.Trimesh(vertices=torch.cat([cam_pos, light_directions[0]], dim=0).cpu().numpy(), process=False).export('tests/outputs/light_dir.ply') ambient_color = torch.FloatTensor((((0.2, 0.2, 0.2), ), )) diffuse_color = torch.FloatTensor((( (0.0, 0.0, 0.8), (0.0, 0.8, 0.0), (0.8, 0.0, 0.0), ), )) if has_specular: specular_color = 0.15 * diffuse_color diffuse_color *= 0.85 else: specular_color = (( (0, 0, 0), (0, 0, 0), (0, 0, 0), ), ) if not point_lights: lights = DirectionalLights(ambient_color=ambient_color, diffuse_color=diffuse_color, specular_color=specular_color, direction=light_directions) else: location = light_directions * 5 lights = PointLights(ambient_color=ambient_color, diffuse_color=diffuse_color, specular_color=specular_color, location=location) return lights
def get_world_to_view_transform(R=r, T=t) -> Transform3d: """ This function returns a Transform3d representing the transformation matrix to go from world space to view space by applying a rotation and a translation. Pytorch3d uses the same convention as Hartley & Zisserman. I.e., for camera extrinsic parameters R (rotation) and T (translation), we map a 3D point `X_world` in world coordinates to a point `X_cam` in camera coordinates with: `X_cam = X_world R + T` Args: R: (N, 3, 3) matrix representing the rotation. T: (N, 3) matrix representing the translation. Returns: a Transform3d object which represents the composed RT transformation. """ # TODO: also support the case where RT is specified as one matrix # of shape (N, 4, 4). if T.shape[0] != R.shape[0]: msg = "Expected R, T to have the same batch dimension; got %r, %r" raise ValueError(msg % (R.shape[0], T.shape[0])) if T.dim() != 2 or T.shape[1:] != (3, ): msg = "Expected T to have shape (N, 3); got %r" raise ValueError(msg % repr(T.shape)) if R.dim() != 3 or R.shape[1:] != (3, 3): msg = "Expected R to have shape (N, 3, 3); got %r" raise ValueError(msg % repr(R.shape)) # Create a Transform3d object T = Translate(T, device=T.device) R = Rotate(R, device=R.device) return R.compose(T)
def __getitem__(self, index): index %= len(self.meshes) scale, rot, trans = self.get_random_transform() transform = Transform3d() \ .scale(scale) \ .compose(Rotate(rot)) \ .translate(*trans) \ .get_matrix() \ .squeeze() mesh = self.meshes[index].scale_verts(scale) pixels = self.renderer(mesh, R=rot.unsqueeze(0).to(self.device), T=trans.unsqueeze(0).to(self.device)) pixels = pixels[0, ..., :3].transpose(0, -1) return (pixels, [transform.to(self.device)])
def main(args): def log_string(str): logger.info(str) print(str) '''HYPER PARAMETER''' os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu '''CREATE DIR''' timestr = str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')) experiment_dir = Path('./log/') experiment_dir.mkdir(parents=True, exist_ok=True) experiment_dir = experiment_dir.joinpath('partseg') experiment_dir.mkdir(parents=True, exist_ok=True) if args.log_dir is None: experiment_dir = experiment_dir.joinpath(timestr) else: experiment_dir = experiment_dir.joinpath(args.log_dir) experiment_dir.mkdir(parents=True, exist_ok=True) checkpoints_dir = experiment_dir.joinpath('checkpoints/') checkpoints_dir.mkdir(parents=True, exist_ok=True) log_dir = experiment_dir.joinpath('logs/') log_dir.mkdir(parents=True, exist_ok=True) '''LOG''' args = parse_args() logger = logging.getLogger("Model") logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler = logging.FileHandler('%s/%s.txt' % (log_dir, args.model)) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) log_string('PARAMETER ...') log_string(args) root = 'data/shapenetcore_partanno_segmentation_benchmark_v0_normal/' TRAIN_DATASET = PartNormalDataset(root = root, npoints=args.npoint, split='trainval', normal_channel=args.normal) trainDataLoader = torch.utils.data.DataLoader(TRAIN_DATASET, batch_size=args.batch_size,shuffle=True, num_workers=4) TEST_DATASET = PartNormalDataset(root = root, npoints=args.npoint, split='test', normal_channel=args.normal) testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=args.batch_size,shuffle=False, num_workers=4) log_string("The number of training data is: %d" % len(TRAIN_DATASET)) log_string("The number of test data is: %d" % len(TEST_DATASET)) num_classes = 16 num_part = 50 '''MODEL LOADING''' MODEL = importlib.import_module(args.model) classifier = MODEL.get_model(args, num_part, normal_channel=args.normal).cuda() criterion = MODEL.get_loss().cuda() def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: torch.nn.init.xavier_normal_(m.weight.data) torch.nn.init.constant_(m.bias.data, 0.0) elif classname.find('Linear') != -1: torch.nn.init.xavier_normal_(m.weight.data) torch.nn.init.constant_(m.bias.data, 0.0) try: checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth') start_epoch = checkpoint['epoch'] classifier.load_state_dict(checkpoint['model_state_dict']) log_string('Use pretrain model') except: log_string('No existing model, starting training from scratch...') start_epoch = 0 #classifier = classifier.apply(weights_init) if args.optimizer == 'Adam': optimizer = torch.optim.Adam( classifier.parameters(), lr=args.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay_rate ) else: optimizer = torch.optim.SGD(classifier.parameters(), lr=args.learning_rate, momentum=0.9) def bn_momentum_adjust(m, momentum): if isinstance(m, torch.nn.BatchNorm2d) or isinstance(m, torch.nn.BatchNorm1d): m.momentum = momentum LEARNING_RATE_CLIP = 1e-5 MOMENTUM_ORIGINAL = 0.1 MOMENTUM_DECCAY = 0.5 MOMENTUM_DECCAY_STEP = args.step_size best_acc = 0 global_epoch = 0 best_class_avg_iou = 0 best_inctance_avg_iou = 0 for epoch in range(start_epoch,args.epoch): log_string('Epoch %d (%d/%s):' % (global_epoch + 1, epoch + 1, args.epoch)) '''Adjust learning rate and BN momentum''' lr = max(args.learning_rate * (args.lr_decay ** (epoch // args.step_size)), LEARNING_RATE_CLIP) log_string('Learning rate:%f' % lr) for param_group in optimizer.param_groups: param_group['lr'] = lr mean_correct = [] momentum = MOMENTUM_ORIGINAL * (MOMENTUM_DECCAY ** (epoch // MOMENTUM_DECCAY_STEP)) if momentum < 0.01: momentum = 0.01 print('BN momentum updated to: %f' % momentum) classifier = classifier.apply(lambda x: bn_momentum_adjust(x,momentum)) '''learning one epoch''' for i, data in tqdm(enumerate(trainDataLoader), total=len(trainDataLoader), smoothing=0.9): points, label, target = data trot = None if args.rot == 'z': trot = RotateAxisAngle(angle=torch.rand(points.shape[0])*360, axis="Z", degrees=True) elif args.rot == 'so3': trot = Rotate(R=random_rotations(points.shape[0])) if trot is not None: points = trot.transform_points(points) points = points.data.numpy() points[:,:, 0:3] = provider.random_scale_point_cloud(points[:,:, 0:3]) points[:,:, 0:3] = provider.shift_point_cloud(points[:,:, 0:3]) points = torch.Tensor(points) points, label, target = points.float().cuda(),label.long().cuda(), target.long().cuda() points = points.transpose(2, 1) optimizer.zero_grad() classifier = classifier.train() seg_pred, trans_feat = classifier(points, to_categorical(label, num_classes)) seg_pred = seg_pred.contiguous().view(-1, num_part) target = target.view(-1, 1)[:, 0] pred_choice = seg_pred.data.max(1)[1] correct = pred_choice.eq(target.data).cpu().sum() mean_correct.append(correct.item() / (args.batch_size * args.npoint)) loss = criterion(seg_pred, target, trans_feat) loss.backward() optimizer.step() train_instance_acc = np.mean(mean_correct) log_string('Train accuracy is: %.5f' % train_instance_acc) with torch.no_grad(): test_metrics = {} total_correct = 0 total_seen = 0 total_seen_class = [0 for _ in range(num_part)] total_correct_class = [0 for _ in range(num_part)] shape_ious = {cat: [] for cat in seg_classes.keys()} seg_label_to_cat = {} # {0:Airplane, 1:Airplane, ...49:Table} for cat in seg_classes.keys(): for label in seg_classes[cat]: seg_label_to_cat[label] = cat for batch_id, (points, label, target) in tqdm(enumerate(testDataLoader), total=len(testDataLoader), smoothing=0.9): cur_batch_size, NUM_POINT, _ = points.size() trot = None if args.rot == 'z': trot = RotateAxisAngle(angle=torch.rand(points.shape[0])*360, axis="Z", degrees=True) elif args.rot == 'so3': trot = Rotate(R=random_rotations(points.shape[0])) if trot is not None: points = trot.transform_points(points) points, label, target = points.float().cuda(), label.long().cuda(), target.long().cuda() points = points.transpose(2, 1) classifier = classifier.eval() seg_pred, _ = classifier(points, to_categorical(label, num_classes)) cur_pred_val = seg_pred.cpu().data.numpy() cur_pred_val_logits = cur_pred_val cur_pred_val = np.zeros((cur_batch_size, NUM_POINT)).astype(np.int32) target = target.cpu().data.numpy() for i in range(cur_batch_size): cat = seg_label_to_cat[target[i, 0]] logits = cur_pred_val_logits[i, :, :] cur_pred_val[i, :] = np.argmax(logits[:, seg_classes[cat]], 1) + seg_classes[cat][0] correct = np.sum(cur_pred_val == target) total_correct += correct total_seen += (cur_batch_size * NUM_POINT) for l in range(num_part): total_seen_class[l] += np.sum(target == l) total_correct_class[l] += (np.sum((cur_pred_val == l) & (target == l))) for i in range(cur_batch_size): segp = cur_pred_val[i, :] segl = target[i, :] cat = seg_label_to_cat[segl[0]] part_ious = [0.0 for _ in range(len(seg_classes[cat]))] for l in seg_classes[cat]: if (np.sum(segl == l) == 0) and ( np.sum(segp == l) == 0): # part is not present, no prediction as well part_ious[l - seg_classes[cat][0]] = 1.0 else: part_ious[l - seg_classes[cat][0]] = np.sum((segl == l) & (segp == l)) / float( np.sum((segl == l) | (segp == l))) shape_ious[cat].append(np.mean(part_ious)) all_shape_ious = [] for cat in shape_ious.keys(): for iou in shape_ious[cat]: all_shape_ious.append(iou) shape_ious[cat] = np.mean(shape_ious[cat]) mean_shape_ious = np.mean(list(shape_ious.values())) test_metrics['accuracy'] = total_correct / float(total_seen) test_metrics['class_avg_accuracy'] = np.mean( np.array(total_correct_class) / np.array(total_seen_class, dtype=np.float)) for cat in sorted(shape_ious.keys()): log_string('eval mIoU of %s %f' % (cat + ' ' * (14 - len(cat)), shape_ious[cat])) test_metrics['class_avg_iou'] = mean_shape_ious test_metrics['inctance_avg_iou'] = np.mean(all_shape_ious) log_string('Epoch %d test Accuracy: %f Class avg mIOU: %f Inctance avg mIOU: %f' % ( epoch+1, test_metrics['accuracy'],test_metrics['class_avg_iou'],test_metrics['inctance_avg_iou'])) if (test_metrics['inctance_avg_iou'] >= best_inctance_avg_iou): logger.info('Save model...') savepath = str(checkpoints_dir) + '/best_model.pth' log_string('Saving at %s'% savepath) state = { 'epoch': epoch, 'train_acc': train_instance_acc, 'test_acc': test_metrics['accuracy'], 'class_avg_iou': test_metrics['class_avg_iou'], 'inctance_avg_iou': test_metrics['inctance_avg_iou'], 'model_state_dict': classifier.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), } torch.save(state, savepath) log_string('Saving model....') if test_metrics['accuracy'] > best_acc: best_acc = test_metrics['accuracy'] if test_metrics['class_avg_iou'] > best_class_avg_iou: best_class_avg_iou = test_metrics['class_avg_iou'] if test_metrics['inctance_avg_iou'] > best_inctance_avg_iou: best_inctance_avg_iou = test_metrics['inctance_avg_iou'] log_string('Best accuracy is: %.5f'%best_acc) log_string('Best class avg mIOU is: %.5f'%best_class_avg_iou) log_string('Best inctance avg mIOU is: %.5f'%best_inctance_avg_iou) global_epoch+=1
srange = smax - smin scale = (torch.rand(1).squeeze() * srange + smin).item() # Generate a random NDC coordinate https://pytorch3d.org/docs/cameras x, y, d = torch.rand(3) x = x * 2.0 - 1.0 y = y * 2.0 - 1.0 trans = torch.Tensor([x, y, d]).to(device) trans = cameras.unproject_points(trans.unsqueeze(0), world_coordinates=False, scaled_depth_input=True)[0] rot = random_rotations(1)[0].to(device) transform = Transform3d() \ .scale(scale) \ .compose(Rotate(rot)) \ .translate(*trans) # TODO: transform mesh # Create a phong renderer by composing a rasterizer and a shader. The textured phong shader will # interpolate the texture uv coordinates for each vertex, sample from a texture image and # apply the Phong lighting model renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings), shader=SoftPhongShader( device=device, cameras=cameras, lights=lights, )) images = renderer(mesh.scale_verts(scale), R=rot.unsqueeze(0),
def main(args): def log_string(str): logger.info(str) print(str) '''HYPER PARAMETER''' os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu experiment_dir = 'log/partseg/' + args.log_dir '''LOG''' args = parse_args() logger = logging.getLogger("Model") logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler = logging.FileHandler('%s/eval.txt' % experiment_dir) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) log_string('PARAMETER ...') log_string(args) root = 'data/shapenetcore_partanno_segmentation_benchmark_v0_normal/' TEST_DATASET = PartNormalDataset(root = root, npoints=args.num_point, split='test', normal_channel=args.normal) testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=args.batch_size,shuffle=False, num_workers=4) log_string("The number of test data is: %d" % len(TEST_DATASET)) num_classes = 16 num_part = 50 '''MODEL LOADING''' MODEL = importlib.import_module(args.model) classifier = MODEL.get_model(args, num_part, normal_channel=args.normal).cuda() checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth') classifier.load_state_dict(checkpoint['model_state_dict']) with torch.no_grad(): test_metrics = {} total_correct = 0 total_seen = 0 total_seen_class = [0 for _ in range(num_part)] total_correct_class = [0 for _ in range(num_part)] shape_ious = {cat: [] for cat in seg_classes.keys()} seg_label_to_cat = {} # {0:Airplane, 1:Airplane, ...49:Table} for cat in seg_classes.keys(): for label in seg_classes[cat]: seg_label_to_cat[label] = cat for batch_id, (points, label, target) in tqdm(enumerate(testDataLoader), total=len(testDataLoader), smoothing=0.9): batchsize, num_point, _ = points.size() cur_batch_size, NUM_POINT, _ = points.size() if args.rot == 'z': trot = RotateAxisAngle(angle=torch.rand(points.shape[0])*360, axis="Z", degrees=True) elif args.rot == 'so3': trot = Rotate(R=random_rotations(points.shape[0])) points = trot.transform_points(points) points, label, target = points.float().cuda(), label.long().cuda(), target.long().cuda() points = points.transpose(2, 1) classifier = classifier.eval() vote_pool = torch.zeros(target.size()[0], target.size()[1], num_part).cuda() for _ in range(args.num_votes): seg_pred, _ = classifier(points, to_categorical(label, num_classes)) vote_pool += seg_pred seg_pred = vote_pool / args.num_votes cur_pred_val = seg_pred.cpu().data.numpy() cur_pred_val_logits = cur_pred_val cur_pred_val = np.zeros((cur_batch_size, NUM_POINT)).astype(np.int32) target = target.cpu().data.numpy() for i in range(cur_batch_size): cat = seg_label_to_cat[target[i, 0]] logits = cur_pred_val_logits[i, :, :] cur_pred_val[i, :] = np.argmax(logits[:, seg_classes[cat]], 1) + seg_classes[cat][0] correct = np.sum(cur_pred_val == target) total_correct += correct total_seen += (cur_batch_size * NUM_POINT) for l in range(num_part): total_seen_class[l] += np.sum(target == l) total_correct_class[l] += (np.sum((cur_pred_val == l) & (target == l))) for i in range(cur_batch_size): segp = cur_pred_val[i, :] segl = target[i, :] cat = seg_label_to_cat[segl[0]] part_ious = [0.0 for _ in range(len(seg_classes[cat]))] for l in seg_classes[cat]: if (np.sum(segl == l) == 0) and ( np.sum(segp == l) == 0): # part is not present, no prediction as well part_ious[l - seg_classes[cat][0]] = 1.0 else: part_ious[l - seg_classes[cat][0]] = np.sum((segl == l) & (segp == l)) / float( np.sum((segl == l) | (segp == l))) shape_ious[cat].append(np.mean(part_ious)) all_shape_ious = [] for cat in shape_ious.keys(): for iou in shape_ious[cat]: all_shape_ious.append(iou) shape_ious[cat] = np.mean(shape_ious[cat]) mean_shape_ious = np.mean(list(shape_ious.values())) test_metrics['accuracy'] = total_correct / float(total_seen) test_metrics['class_avg_accuracy'] = np.mean( np.array(total_correct_class) / np.array(total_seen_class, dtype=np.float)) for cat in sorted(shape_ious.keys()): log_string('eval mIoU of %s %f' % (cat + ' ' * (14 - len(cat)), shape_ious[cat])) test_metrics['class_avg_iou'] = mean_shape_ious test_metrics['inctance_avg_iou'] = np.mean(all_shape_ious) log_string('Accuracy is: %.5f'%test_metrics['accuracy']) log_string('Class avg accuracy is: %.5f'%test_metrics['class_avg_accuracy']) log_string('Class avg mIOU is: %.5f'%test_metrics['class_avg_iou']) log_string('Inctance avg mIOU is: %.5f'%test_metrics['inctance_avg_iou'])
class PointClouds3D(PytorchPointClouds): """ PointClouds storing batches of point clouds in *object coordinate*. The point clouds are centered and isotropically resized to a unit cube, with up direction (0, 1, 0) and front direction (0, 0, -1) Overload of pytorch3d Pointclouds class Support named features, with a OrderedDict {name: dimensions} Attributes: normalized (bool): whether the point cloud is centered and normalized obj2world_mat (tensor): (B, 4, 4) object-to-world transformation for each (normalized and aligned) point cloud """ def __init__(self, points, normals=None, features=None, to_unit_sphere: bool = False, to_unit_box: bool = False, to_axis_aligned: bool = False, up=((0, 1, 0), ), front=((0, 0, 1), )): """ Args: points, normals: points in world coordinates (unnormalized and unaligned) Pointclouds in pytorch3d features: can be a dict {name: value} where value can be any acceptable form as the pytorch3d.Pointclouds to_unit_box (bool): transform to unit box (sidelength = 1) to_axis_aligned (bool): rotate the object using the up and front vectors up: the up direction in world coordinate (will be justified to object) front: front direction in the world coordinate (will be justified to z-axis) """ super().__init__(points, normals=normals, features=features) self.obj2world_trans = Transform3d() # rotate object to have up direction (0, 1, 0) # and front direction (0, 0, -1) # (B,3,3) rotation to transform to axis-aligned point clouds if to_axis_aligned: self.obj2world_trans = Rotate(look_at_rotation(((0, 0, 0), ), at=front, up=up), device=self.device) world_to_obj_rotate_trans = self.obj2world_trans.inverse() # update points, normals self.update_points_( world_to_obj_rotate_trans.transform_points( self.points_packed())) normals_packed = self.normals_packed() if normals_packed is not None: self.update_normals_( world_to_obj_rotate_trans.transform_normals( normals_packed)) # normalize to unit box and update obj2world_trans if to_unit_box: normalizing_trans = self.normalize_to_box_() elif to_unit_sphere: normalizing_trans = self.normalize_to_sphere_() def update_points_(self, others_packed): points_packed = self.points_packed() if others_packed.shape != points_packed.shape: raise ValueError("update points must have dimension (all_p, 3).") self.offset_(others_packed - points_packed) def update_normals_(self, others_packed): """ Update the point clouds normals. In place operation. Args: offsets_packed: A Tensor of the same shape as self.points_packed giving offsets to be added to all points. Returns: self. """ if self.isempty(): assert (others_packed.nelement( ) == 0), "Cannot update empty pointclouds with non-empty features" return self normals_packed = self.normals_packed() if normals_packed is not None: if others_packed.shape != normals_packed.shape: raise ValueError( "update normals must have dimension (all_p, 3).") if normals_packed is None: self._normals_packed = others_packed else: normals_packed += (-normals_packed + others_packed) new_normals_list = list( self._normals_packed.split(self.num_points_per_cloud().tolist(), 0)) # Note that since _compute_packed() has been executed, points_list # cannot be None even if not provided during construction. self._normals_list = new_normals_list self._normals_padded = list_to_padded(new_normals_list) return self def update_features_(self, others_packed): """ Update the point clouds features. In place operation. Args: offsets_packed: A Tensor of the same shape as self.points_packed giving offsets to be added to all points. Returns: self. """ if self.isempty(): assert (others_packed.nelement( ) == 0), "Cannot update empty pointclouds with non-empty features" return self features_packed = self.features_packed() if features_packed is None or features_packed.shape != others_packed.shape: self._features_packed = others_packed self._C = others_packed.shape[-1] else: features_packed += (-features_packed + others_packed) new_features_list = list( self._features_packed.split(self.num_points_per_cloud().tolist(), 0)) # Note that since _compute_packed() has been executed, points_list # cannot be None even if not provided during construction. self._features_list = new_features_list self._features_padded = list_to_padded(new_features_list) return self def normalize_to_sphere_(self): """ Center and scale the point clouds to a unit sphere Returns: normalizing_trans (Transform3D) """ # (B,3,2) boxMinMax = self.get_bounding_boxes() boxCenter = boxMinMax.sum(dim=-1) / 2 # (B,) boxRange, _ = (boxMinMax[:, :, 1] - boxMinMax[:, :, 0]).max(dim=-1) if boxRange == 0: boxRange = 1 # center and scale the point clouds, likely faster than calling obj2world_trans directly? pointOffsets = torch.repeat_interleave(-boxCenter, self.num_points_per_cloud(), dim=0) self.offset_(pointOffsets) # (P) norms = torch.norm(self.points_packed(), dim=-1) # List[(Pi)] norms = torch.split(norms, self.num_points_per_cloud()) # (N) scale = torch.stack([x.max() for x in norms], dim=0) self.scale_(1 / eps_denom(scale)) normalizing_trans = Translate(-boxCenter).compose( Scale(1 / eps_denom(scale))).to(device=self.device) self.obj2world_trans = normalizing_trans.inverse().compose( self.obj2world_trans) return normalizing_trans def normalize_to_box_(self): """ center and scale the point clouds to a unit cube, Returns: normalizing_trans (Transform3D): Transform3D used to normalize the pointclouds """ # (B,3,2) boxMinMax = self.get_bounding_boxes() boxCenter = boxMinMax.sum(dim=-1) / 2 # (B,) boxRange, _ = (boxMinMax[:, :, 1] - boxMinMax[:, :, 0]).max(dim=-1) if boxRange == 0: boxRange = 1 # center and scale the point clouds, likely faster than calling obj2world_trans directly? pointOffsets = torch.repeat_interleave(-boxCenter, self.num_points_per_cloud(), dim=0) self.offset_(pointOffsets) self.scale_(1 / boxRange) # update obj2world_trans normalizing_trans = Translate(-boxCenter).compose(Scale( 1 / boxRange)).to(device=self.device) self.obj2world_trans = normalizing_trans.inverse().compose( self.obj2world_trans) return normalizing_trans def get_object_to_world_transformation(self, **kwargs): """ Returns a Transform3d object from object to world """ return self.obj2world_trans def estimate_normals( self, neighborhood_size: int = 50, disambiguate_directions: bool = True, assign_to_self: bool = False, ): """ Estimates the normals of each point in each cloud and assigns them to the internal tensors `self._normals_list` and `self._normals_padded` The function uses `ops.estimate_pointcloud_local_coord_frames` to estimate the normals. Please refer to this function for more detailed information about the implemented algorithm. Args: **neighborhood_size**: The size of the neighborhood used to estimate the geometry around each point. **disambiguate_directions**: If `True`, uses the algorithm from [1] to ensure sign consistency of the normals of neigboring points. **normals**: A tensor of normals for each input point of shape `(minibatch, num_point, 3)`. If `pointclouds` are of `Pointclouds` class, returns a padded tensor. **assign_to_self**: If `True`, assigns the computed normals to the internal buffers overwriting any previously stored normals. References: [1] Tombari, Salti, Di Stefano: Unique Signatures of Histograms for Local Surface Description, ECCV 2010. """ # estimate the normals normals_est = estimate_pointcloud_normals( self, neighborhood_size=neighborhood_size, disambiguate_directions=disambiguate_directions, ) # assign to self if assign_to_self: _, self._normals_padded, _ = self._parse_auxiliary_input( normals_est) self._normals_list, self._normals_packed = None, None if self._points_list is not None: # update self._normals_list self.normals_list() if self._points_packed is not None: # update self._normals_packed self._normals_packed = torch.cat(self._normals_list, dim=0) return normals_est def subsample_randomly(self, ratio: Union[torch.Tensor, float]): if not isinstance(ratio, torch.Tensor): ratio = torch.full((len(self), ), ratio, device=self.device) assert ratio.nelement() == len(self) ratio[ratio > 1.0] = 1.0 if (ratio == 1.0).all(): return self.clone() points_list = self.points_list() normals_list = self.normals_list() features_list = self.features_list() for b, pts in enumerate(points_list): idx = torch.randperm(pts.shape[0])[:int(ratio[b] * pts.shape[0])] points_list[b] = pts[idx] if features_list is not None: features_list[b] = features_list[b][idx] if normals_list is not None: normals_list[b] = normals_list[b][idx] other = self.__class__(points=points_list, normals=normals_list, features=features_list) return other
def _check_raysampler_ray_directions(self, cameras, raysampler, ray_bundle): """ Check the rays_directions_world output of raysamplers. """ batch_size = cameras.R.shape[0] n_pts_per_ray = ray_bundle.lengths.shape[-1] spatial_size = ray_bundle.xys.shape[1:-1] n_rays_per_image = spatial_size.numel() # obtain the ray points in world coords rays_points_world = cameras.unproject_points( torch.cat( ( ray_bundle.xys.view(batch_size, n_rays_per_image, 1, 2).expand( batch_size, n_rays_per_image, n_pts_per_ray, 2 ), ray_bundle.lengths.view( batch_size, n_rays_per_image, n_pts_per_ray, 1 ), ), dim=-1, ).view(batch_size, -1, 3) ).view(batch_size, -1, n_pts_per_ray, 3) # reshape to common testing size rays_directions_world_normed = torch.nn.functional.normalize( ray_bundle.directions.view(batch_size, -1, 3), dim=-1 ) # check that the l2-normed difference of all consecutive planes # of points in world coords matches ray_directions_world rays_directions_world_ = torch.nn.functional.normalize( rays_points_world[:, :, -1:] - rays_points_world[:, :, :-1], dim=-1 ) self.assertClose( rays_directions_world_normed[:, :, None].expand_as(rays_directions_world_), rays_directions_world_, atol=1e-4, ) # check the ray directions rotated using camera rotation matrix # match the ray directions of a camera with trivial extrinsics cameras_trivial_extrinsic = cameras.clone() cameras_trivial_extrinsic.R = eyes( N=batch_size, dim=3, dtype=cameras.R.dtype, device=cameras.device ) cameras_trivial_extrinsic.T = torch.zeros_like(cameras.T) # make sure we get the same random rays in case we call the # MonteCarloRaysampler twice below with torch.random.fork_rng(devices=range(torch.cuda.device_count())): torch.random.manual_seed(42) ray_bundle_world_fix_seed = raysampler(cameras=cameras) torch.random.manual_seed(42) ray_bundle_camera_fix_seed = raysampler(cameras=cameras_trivial_extrinsic) rays_directions_camera_fix_seed_ = Rotate( cameras.R, device=cameras.R.device ).transform_points(ray_bundle_world_fix_seed.directions.view(batch_size, -1, 3)) self.assertClose( rays_directions_camera_fix_seed_, ray_bundle_camera_fix_seed.directions.view(batch_size, -1, 3), atol=1e-5, )
def main(args): def log_string(str): logger.info(str) print(str) '''HYPER PARAMETER''' os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu '''CREATE DIR''' timestr = str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')) experiment_dir = Path('./log/') experiment_dir.mkdir(parents=True, exist_ok=True) experiment_dir = experiment_dir.joinpath('cls') experiment_dir.mkdir(parents=True, exist_ok=True) if args.log_dir is None: experiment_dir = experiment_dir.joinpath(timestr) else: experiment_dir = experiment_dir.joinpath(args.log_dir) experiment_dir.mkdir(parents=True, exist_ok=True) checkpoints_dir = experiment_dir.joinpath('checkpoints/') checkpoints_dir.mkdir(parents=True, exist_ok=True) log_dir = experiment_dir.joinpath('logs/') log_dir.mkdir(parents=True, exist_ok=True) '''LOG''' args = parse_args() logger = logging.getLogger("Model") logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler = logging.FileHandler('%s/%s.txt' % (log_dir, args.model)) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) log_string('PARAMETER ...') log_string(args) '''DATA LOADING''' log_string('Load dataset ...') DATA_PATH = 'data/modelnet40_normal_resampled/' TRAIN_DATASET = ModelNetDataLoader(root=DATA_PATH, npoint=args.num_point, split='train', normal_channel=args.normal) TEST_DATASET = ModelNetDataLoader(root=DATA_PATH, npoint=args.num_point, split='test', normal_channel=args.normal) trainDataLoader = torch.utils.data.DataLoader(TRAIN_DATASET, batch_size=args.batch_size, shuffle=True, num_workers=4) testDataLoader = torch.utils.data.DataLoader(TEST_DATASET, batch_size=args.batch_size, shuffle=False, num_workers=4) '''MODEL LOADING''' num_class = 40 MODEL = importlib.import_module(args.model) classifier = MODEL.get_model(args, num_class, normal_channel=args.normal).cuda() criterion = MODEL.get_loss().cuda() try: checkpoint = torch.load(str(experiment_dir) + '/checkpoints/best_model.pth') start_epoch = checkpoint['epoch'] classifier.load_state_dict(checkpoint['model_state_dict']) log_string('Use pretrain model') except: log_string('No existing model, starting training from scratch...') start_epoch = 0 if args.optimizer == 'Adam': optimizer = torch.optim.Adam( classifier.parameters(), lr=args.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay_rate ) else: optimizer = torch.optim.SGD( classifier.parameters(), lr=args.learning_rate*100, momentum=0.9, weight_decay=args.decay_rate ) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.7) global_epoch = 0 global_step = 0 best_instance_acc = 0.0 best_class_acc = 0.0 mean_correct = [] '''TRANING''' logger.info('Start training...') for epoch in range(start_epoch,args.epoch): log_string('Epoch %d (%d/%s):' % (global_epoch + 1, epoch + 1, args.epoch)) scheduler.step() for batch_id, data in tqdm(enumerate(trainDataLoader, 0), total=len(trainDataLoader), smoothing=0.9): points, target = data trot = None if args.rot == 'z': trot = RotateAxisAngle(angle=torch.rand(points.shape[0])*360, axis="Z", degrees=True) elif args.rot == 'so3': trot = Rotate(R=random_rotations(points.shape[0])) if trot is not None: points = trot.transform_points(points) points = points.data.numpy() points = provider.random_point_dropout(points) points[:,:, 0:3] = provider.random_scale_point_cloud(points[:,:, 0:3]) points[:,:, 0:3] = provider.shift_point_cloud(points[:,:, 0:3]) points = torch.Tensor(points) target = target[:, 0] points = points.transpose(2, 1) points, target = points.cuda(), target.cuda() optimizer.zero_grad() classifier = classifier.train() pred, trans_feat = classifier(points) loss = criterion(pred, target.long(), trans_feat) pred_choice = pred.data.max(1)[1] correct = pred_choice.eq(target.long().data).cpu().sum() mean_correct.append(correct.item() / float(points.size()[0])) loss.backward() optimizer.step() global_step += 1 train_instance_acc = np.mean(mean_correct) log_string('Train Instance Accuracy: %f' % train_instance_acc) with torch.no_grad(): instance_acc, class_acc = test(classifier.eval(), testDataLoader) if (instance_acc >= best_instance_acc): best_instance_acc = instance_acc best_epoch = epoch + 1 if (class_acc >= best_class_acc): best_class_acc = class_acc log_string('Test Instance Accuracy: %f, Class Accuracy: %f'% (instance_acc, class_acc)) log_string('Best Instance Accuracy: %f, Class Accuracy: %f'% (best_instance_acc, best_class_acc)) if (instance_acc >= best_instance_acc): logger.info('Save model...') savepath = str(checkpoints_dir) + '/best_model.pth' log_string('Saving at %s'% savepath) state = { 'epoch': best_epoch, 'instance_acc': instance_acc, 'class_acc': class_acc, 'model_state_dict': classifier.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), } torch.save(state, savepath) global_epoch += 1 logger.info('End of training...')