def forward(self, data_dict, args): """ Forward pass of the network Args: data_dict: dict { point_clouds, lang_feat } point_clouds: Variable(torch.cuda.FloatTensor) (B, N, 3 + input_channels) tensor Point cloud to run predicts on Each point in the point-cloud MUST be formated as (x, y, z, features...) Returns: end_points: dict """ # ======================================= # Get 3d <-> 2D Projection Mapping and 2D feature map # ======================================= batch_size = len(data_dict['scan_name']) new_features = torch.zeros((batch_size, args.num_points, 32)).cuda() for idx, scene_id in enumerate(data_dict['scan_name']): intrinsics = get_intrinsics(scene_id, args) projection = ProjectionHelper(intrinsics, args.depth_min, args.depth_max, proj_image_dims) features_2d = scannet_projection( data_dict['point_clouds'][idx].cpu().numpy(), intrinsics, projection, scene_id, args, None, None, self.maskrcnn_model) new_features[idx, :] = features_2d[:] data_dict['new_features'] = new_features pcl_enriched = torch.cat( (data_dict['point_clouds'], data_dict['new_features']), dim=2) data_dict['point_clouds'] = pcl_enriched data_dict = self.backbone_net(data_dict) # --------- HOUGH VOTING --------- xyz = data_dict["fp2_xyz"] features = data_dict["fp2_features"] data_dict["seed_inds"] = data_dict["fp2_inds"] data_dict["seed_xyz"] = xyz data_dict["seed_features"] = features xyz, features = self.vgen(xyz, features) features_norm = torch.norm(features, p=2, dim=1) features = features.div(features_norm.unsqueeze(1)) data_dict["vote_xyz"] = xyz data_dict["vote_features"] = features data_dict = self.rfnet(xyz, features, data_dict) return data_dict
num_classes = opt.num_classes model2d_fixed, model2d_trainable, model2d_classifier = create_enet_for_3d( ENET_TYPES[opt.model2d_type], opt.model2d_path, num_classes) model = BeachNet(num_classes, num_images, input_channels, intrinsic, proj_image_dims, opt.depth_min, opt.depth_max, opt.accuracy, fusion=True, fuseAtPosition=2, fuse_no_ft_pn=False, pointnet_pointnet=False) projection = ProjectionHelper(intrinsic, opt.depth_min, opt.depth_max, proj_image_dims, opt.accuracy) # create loss criterion = util.WeightedCrossEntropyLoss() criterion2d = torch.nn.CrossEntropyLoss().cuda() # move to gpu model2d_fixed = model2d_fixed.cuda() model2d_trainable = model2d_trainable.cuda() model2d_classifier = model2d_classifier.cuda() model = model.cuda() criterion = criterion.cuda() # initialize optimizer optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr_pointnet,
def findCorrespondingImages(chunksPath, posesPath, outPath, numImgs=3): """ for each scene chunk, finds the 3 (or more) images with the highest number of points in their frustum and stores the result in a .hdf5 file :param chunksPath: Path to where the precomputed scene chunks are stored :param posesPath: Path to where the precomputed poses are stored :param outPath: Path to where scene chunks with corresponding images should be stored :param numImgs: number of images to be used (default is 3) """ print ("Finding image correspondences") # Initialize Projection projection = ProjectionHelper(intrinsic, opt.depth_min, opt.depth_max, proj_image_dims, opt.voxel_size) # Find all scene chunks that have been precomputed fileList = list() for file in os.listdir(chunksPath): if file.endswith(".npy"): scene = file[:-4] fileList.append(scene) count = 1 # Iterate through all scene chunks and compute their corresponding images for scene in fileList: if(os.path.isfile(os.path.join(outPath, scene + ".hdf5"))): print(scene + " was already processed.") continue poseDict = {} count += 1 if(count % 50 == 0): print(count, "/", len(fileList)) # Find the name of the scene (we need this to find the corresponding camera poses) countLiterals = 0 for i in range(len(scene)): if(scene[i]=="0"): countLiterals = i break scene_nr = int(scene[countLiterals:countLiterals+4]) scene_version = int(scene[countLiterals+5:countLiterals+7]) # Load data data = np.load(os.path.join(chunksPath, scene + ".npy")) scene_points = data[:, :3] semantic_labels = data[:, 3] npoints = scene_points.shape[0] # Find full Scene Number (always starts with '0') findZero = 0 for i in range(len(scene)): if(scene[i] == '0'): findZero = i break large_scene = scene[findZero:findZero+7] # name of whole scene, e.g. 0000_01 posesPathScene = os.path.join(posesPath, "scene"+large_scene, "pose") # Check if there are image poses for this scene (a couple of scenes caused problems when extracting the poses from sensor data) if(not os.path.isdir(posesPathScene)): print("Did not find any according Image Poses") continue # Iterate through all poses # For each pose, compute the number of points that lie in the frustum that corresponds to the camera pose # Keep the 3 image IDs corresponding to the poses with the highest numbers of points in the frustum for poseFile in os.listdir(posesPathScene): pose = load_pose(os.path.join(posesPathScene,poseFile)) corners = projection.compute_frustum_corners(pose)[:, :3, 0] # Corners of Frustum normals = projection.compute_frustum_normals(corners) # Normals of frustum num_valid_points = projection.points_in_frustum(corners.double().cuda(), normals.double().cuda(), torch.DoubleTensor(scene_points).cuda()) # Checks for each point if it lies on the correct side of the normals of the frustum poseDict[poseFile[:-4]] = num_valid_points if(len(poseDict) == 0): # If there was something wrong, skip continue poseList = list() poseList.append(scene_nr) poseList.append(scene_version) for i in range(numImgs): # find maxima maximum = max(poseDict, key=poseDict.get) poseList.append(int(maximum)) del poseDict[maximum] # Write to file h5file = h5py.File(os.path.join(outPath, scene + ".hdf5"), "w") dset = h5file.create_dataset("points", (npoints, 3), data=scene_points) dset = h5file.create_dataset("labels", (npoints,), data=semantic_labels) dset = h5file.create_dataset("corresponding_images", (numImgs+2,), data=poseList) h5file.close()
import argparse from scipy import misc from utils import util from data.data_util import resize_crop_image from model import BeachNet from utils.projection import ProjectionHelper # initialize model and Projection Helper proj_image_dims = [41, 32] intrinsic = util.make_intrinsic( 577.870605, 577.870605, 319.5, 239.5) # affine transformation from image plane to pixel coords intrinsic = util.adjust_intrinsic(intrinsic, [640, 480], proj_image_dims) projection = ProjectionHelper(intrinsic, 0.4, 4.0, proj_image_dims, 0.05) model = BeachNet(42, 3, 128, intrinsic, proj_image_dims, 0.4, 4.0, 0.05) model = model.cuda() # get point cloud input = torch.Tensor( np.load( '/media/lorenzlamm/My Book/pointnet2/scannet/preprocessing/scannet_scenes/scene0000_00.npy' )).cuda() point_cloud = input[:, :3] num_points = point_cloud.shape[0] batch_size = 2 num_images = 3 num_points_sample = 8192 point_batch = point_cloud.new(batch_size * num_images, num_points_sample,
def main(args): os.environ[ "CUDA_VISIBLE_DEVICES"] = args.gpu if args.multi_gpu is None else '0,1,2,3' '''CREATE DIR''' experiment_dir = Path('./experiment/') experiment_dir.mkdir(exist_ok=True) file_dir = Path( str(experiment_dir) + '/%sScanNetSemSeg-' % args.model_name + str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M'))) file_dir.mkdir(exist_ok=True) checkpoints_dir = file_dir.joinpath('checkpoints/') checkpoints_dir.mkdir(exist_ok=True) log_dir = file_dir.joinpath('logs/') log_dir.mkdir(exist_ok=True) '''LOG''' args = parse_args() logger = logging.getLogger(args.model_name) logger.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler = logging.FileHandler( str(log_dir) + '/train_%s_semseg.txt' % args.model_name) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) logger.info( '---------------------------------------------------TRANING---------------------------------------------------' ) logger.info('PARAMETER ...') logger.info(args) print('Load data...') dataset = ScannetDatasetRGBImg(root='./data', split='train', npoints=8192, num_images=3) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batchsize, collate_fn=dataset.collate_fn, shuffle=True, num_workers=int(args.workers)) test_dataset = ScannetDatasetRGBImg(root='./data', split='test', npoints=8192, num_images=3) testdataloader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batchsize, collate_fn=test_dataset.collate_fn, shuffle=True, num_workers=int(args.workers)) num_classes = 21 model = PointNet2Multiview2(num_classes) loss_function = torch.nn.CrossEntropyLoss(ignore_index=0, reduction='none') #loss_function = torch.nn.CrossEntropyLoss(reduction='none') if args.pretrain is not None: model.load_state_dict(torch.load(args.pretrain)) print('load model %s' % args.pretrain) logger.info('load model %s' % args.pretrain) else: print('Training from scratch') logger.info('Training from scratch') pretrain = args.pretrain init_epoch = int(pretrain[-14:-11]) if args.pretrain is not None else 0 if args.optimizer == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) LEARNING_RATE_CLIP = 1e-5 '''GPU selection and multi-GPU''' if args.multi_gpu is not None: device_ids = [int(x) for x in args.multi_gpu.split(',')] torch.backends.cudnn.benchmark = True model.cuda(device_ids[0]) model = torch.nn.DataParallel(model, device_ids=device_ids) else: model.cuda() intrinsic = [[37.01983, 0, 20, 0], [0, 38.52470, 15.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]] projection = ProjectionHelper(intrinsic, 0.1, 4.0, [41, 32], 0.05) history = defaultdict(lambda: list()) best_acc = 0 best_acc_epoch = 0 best_mIoU = 0 best_mIoU_epoch = 0 for epoch in range(init_epoch, args.epoch): scheduler.step() lr = max(optimizer.param_groups[0]['lr'], LEARNING_RATE_CLIP) print('Learning rate:%f' % lr) for param_group in optimizer.param_groups: param_group['lr'] = lr train_loss_sum = 0.0 train_acc_sum = 0.0 invalid_count = 0 for i, data in enumerate(dataloader): points, target, sample_weights, image, depth, pose = data batch_size = points.shape[0] num_points = points.shape[1] num_images = image[0].shape[0] points, target = points.float(), target.long() points = points.transpose(2, 1) points, target, sample_weights = points.cuda(), target.cuda( ), sample_weights.cuda() depth = [d.cuda() for d in depth] pose = [p.cuda() for p in pose] # Compute projection mapping points_projection = torch.repeat_interleave( points.transpose(2, 1)[:, :, 0:3], num_images, dim=0 ) # For each scene chunk, we have num_images images. We repeat each point cloud num_images times to compute the projection proj_mapping = [[ projection.compute_projection(p, d, c, num_points) for p, d, c in zip( points_projection[k * num_images:(k + 1) * num_images], depth[k], pose[k]) ] for k in range(batch_size)] jump_flag = False for k in range(batch_size): if None in proj_mapping[k]: #invalid sample print('invalid sample') invalid_count = invalid_count + 1 jump_flag = True break if jump_flag: continue proj_ind_3d = [] proj_ind_2d = [] for k in range(batch_size): proj_mapping0, proj_mapping1 = zip(*proj_mapping[k]) proj_ind_3d.append(torch.stack(proj_mapping0)) proj_ind_2d.append(torch.stack(proj_mapping1)) optimizer.zero_grad() model = model.train() model.enet_fixed = model.enet_fixed.eval() model.enet_trainable = model.enet_trainable.eval() for param in model.enet_trainable.parameters(): param.requires_grad = False pred = model(points[:, :3, :], image, proj_ind_3d, proj_ind_2d) #pred = model(points[:,:3,:], points[:,3:6,:], image, proj_ind_3d, proj_ind_2d) pred = pred.contiguous().view(-1, num_classes) target = target.view(pred.size(0)) weights = sample_weights.view(pred.size(0)) loss = loss_function(pred, target) loss = loss * weights loss = torch.mean(loss) history['loss'].append(loss.item()) train_loss_sum += loss.item() loss.backward() optimizer.step() # Train acc pred_val = torch.argmax(pred, 1) correct = torch.sum( ((pred_val == target) & (target > 0) & (weights > 0)).float()) seen = torch.sum(((target > 0) & (weights > 0)).float()) + 1e-08 train_acc = correct / seen if seen != 0 else correct train_acc_sum += train_acc.item() if (i + 1) % 5 == 0: print( '[Epoch %d/%d] [Iteration %d/%d] TRAIN acc/loss: %f/%f ' % (epoch + 1, args.epoch, i + 1, len(dataloader), train_acc.item(), loss.item())) logger.info( '[Epoch %d/%d] [Iteration %d/%d] TRAIN acc/loss: %f/%f ' % (epoch + 1, args.epoch, i + 1, len(dataloader), train_acc.item(), loss.item())) train_loss_avg = train_loss_sum / (len(dataloader) - invalid_count) train_acc_avg = train_acc_sum / (len(dataloader) - invalid_count) history['train_acc'].append(train_acc_avg) print('[Epoch %d/%d] TRAIN acc/loss: %f/%f ' % (epoch + 1, args.epoch, train_acc_avg, train_loss_avg)) logger.info('[Epoch %d/%d] TRAIN acc/loss: %f/%f ' % (epoch + 1, args.epoch, train_acc_avg, train_loss_avg)) #Test acc test_losses = [] total_correct = 0 total_seen = 0 total_correct_class = [0 for _ in range(num_classes)] total_seen_class = [0 for _ in range(num_classes)] total_intersection_class = [0 for _ in range(num_classes)] total_union_class = [0 for _ in range(num_classes)] total_correct_vox = 0 total_seen_vox = 0 total_seen_class_vox = [0 for _ in range(num_classes)] total_correct_class_vox = [0 for _ in range(num_classes)] total_intersection_class_vox = [0 for _ in range(num_classes)] total_union_class_vox = [0 for _ in range(num_classes)] labelweights = np.zeros(num_classes) labelweights_vox = np.zeros(num_classes) for j, data in enumerate(testdataloader): with torch.no_grad(): points, target, sample_weights, image, depth, pose = data batch_size = points.shape[0] num_points = points.shape[1] num_images = image[0].shape[0] points, target, sample_weights = points.float(), target.long( ), sample_weights.float() points = points.transpose(2, 1) points, target, sample_weights = points.cuda(), target.cuda( ), sample_weights.cuda() depth = [d.cuda() for d in depth] pose = [p.cuda() for p in pose] # Compute projection mapping points_projection = torch.repeat_interleave( points.transpose(2, 1)[:, :, 0:3], num_images, dim=0 ) # For each scene chunk, we have num_images images. We repeat each point cloud num_images times to compute the projection proj_mapping = [[ projection.compute_projection(p, d, c, num_points) for p, d, c in zip( points_projection[k * num_images:(k + 1) * num_images], depth[k], pose[k]) ] for k in range(batch_size)] jump_flag = False for k in range(batch_size): if None in proj_mapping[k]: #invalid sample print('invalid sample') jump_flag = True break if jump_flag: continue proj_ind_3d = [] proj_ind_2d = [] for k in range(batch_size): proj_mapping0, proj_mapping1 = zip(*proj_mapping[k]) proj_ind_3d.append(torch.stack(proj_mapping0)) proj_ind_2d.append(torch.stack(proj_mapping1)) model = model.eval() pred = model(points[:, :3, :], image, proj_ind_3d, proj_ind_2d) #pred = model(points[:,:3,:], points[:,3:6,:], image, proj_ind_3d, proj_ind_2d) pred_2d = pred.contiguous().view(-1, num_classes) target_1d = target.view(pred_2d.size(0)) weights_1d = sample_weights.view(pred_2d.size(0)) loss = loss_function(pred_2d, target_1d) loss = loss * weights_1d loss = torch.mean(loss) test_losses.append(loss.item()) #first convert torch tensor to numpy array pred_np = pred.cpu().numpy() #[B,N,C] target_np = target.cpu().numpy() #[B,N] weights_np = sample_weights.cpu().numpy() #[B,N] points_np = points.transpose(2, 1).cpu().numpy() #[B,N,3] # point wise acc pred_val = np.argmax(pred_np, 2) #[B,N] correct = np.sum((pred_val == target_np) & (target_np > 0) & (weights_np > 0)) total_correct += correct total_seen += np.sum((target_np > 0) & (weights_np > 0)) tmp, _ = np.histogram(target_np, range(num_classes + 1)) labelweights += tmp # point wise acc and IoU per class for l in range(num_classes): total_seen_class[l] += np.sum((target_np == l) & (weights_np > 0)) total_correct_class[l] += np.sum((pred_val == l) & (target_np == l) & (weights_np > 0)) total_intersection_class[l] += np.sum((pred_val == l) & (target_np == l) & (weights_np > 0)) total_union_class[l] += np.sum(( (pred_val == l) | (target_np == l)) & (weights_np > 0)) # voxel wise acc for b in range(target_np.shape[0]): _, uvlabel, _ = point_cloud_label_to_surface_voxel_label_fast( points_np[b, weights_np[b, :] > 0, :], np.concatenate( (np.expand_dims(target_np[b, weights_np[b, :] > 0], 1), np.expand_dims(pred_val[b, weights_np[b, :] > 0], 1)), axis=1), res=0.02) total_correct_vox += np.sum((uvlabel[:, 0] == uvlabel[:, 1]) & (uvlabel[:, 0] > 0)) total_seen_vox += np.sum(uvlabel[:, 0] > 0) tmp, _ = np.histogram(uvlabel[:, 0], range(num_classes + 1)) labelweights_vox += tmp # voxel wise acc and IoU per class for l in range(num_classes): total_seen_class_vox[l] += np.sum(uvlabel[:, 0] == l) total_correct_class_vox[l] += np.sum((uvlabel[:, 0] == l) & (uvlabel[:, 1] == l)) total_intersection_class_vox[l] += np.sum( (uvlabel[:, 0] == l) & (uvlabel[:, 1] == l)) total_union_class_vox[l] += np.sum((uvlabel[:, 0] == l) | (uvlabel[:, 1] == l)) test_loss = np.mean(test_losses) test_point_acc = total_correct / float(total_seen) history['test_point_acc'].append(test_point_acc) test_voxel_acc = total_correct_vox / float(total_seen_vox) history['test_voxel_acc'].append(test_voxel_acc) test_avg_class_point_acc = np.mean( np.array(total_correct_class[1:]) / (np.array(total_seen_class[1:], dtype=np.float) + 1e-6)) history['test_avg_class_point_acc'].append(test_avg_class_point_acc) test_avg_class_voxel_acc = np.mean( np.array(total_correct_class_vox[1:]) / (np.array(total_seen_class_vox[1:], dtype=np.float) + 1e-6)) history['test_avg_class_voxel_acc'].append(test_avg_class_voxel_acc) test_avg_class_point_IoU = np.mean( np.array(total_intersection_class[1:]) / (np.array(total_union_class[1:], dtype=np.float) + 1e-6)) history['test_avg_class_point_IoU'].append(test_avg_class_point_IoU) test_avg_class_voxel_IoU = np.mean( np.array(total_intersection_class_vox[1:]) / (np.array(total_union_class_vox[1:], dtype=np.float) + 1e-6)) history['test_avg_class_voxel_IoU'].append(test_avg_class_voxel_IoU) labelweights = labelweights[1:].astype(np.float32) / np.sum( labelweights[1:].astype(np.float32)) labelweights_vox = labelweights_vox[1:].astype(np.float32) / np.sum( labelweights_vox[1:].astype(np.float32)) #caliweights = np.array([0.388,0.357,0.038,0.033,0.017,0.02,0.016,0.025,0.002,0.002,0.002,0.007,0.006,0.022,0.004,0.0004,0.003,0.002,0.024,0.029]) #test_cali_voxel_acc = np.average(np.array(total_correct_class_vox[1:])/(np.array(total_seen_class_vox[1:],dtype=np.float)+1e-6),weights=caliweights) #history['test_cali_voxel_acc'].append(test_cali_voxel_acc) #test_cali_point_acc = np.average(np.array(total_correct_class[1:])/(np.array(total_seen_class[1:],dtype=np.float)+1e-6),weights=caliweights) #history['test_cali_point_acc'].append(test_cali_point_acc) print('[Epoch %d/%d] TEST acc/loss: %f/%f ' % (epoch + 1, args.epoch, test_voxel_acc, test_loss)) logger.info('[Epoch %d/%d] TEST acc/loss: %f/%f ' % (epoch + 1, args.epoch, test_voxel_acc, test_loss)) print('Whole scene point wise accuracy: %f' % (test_point_acc)) logger.info('Whole scene point wise accuracy: %f' % (test_point_acc)) print('Whole scene voxel wise accuracy: %f' % (test_voxel_acc)) logger.info('Whole scene voxel wise accuracy: %f' % (test_voxel_acc)) print('Whole scene class averaged point wise accuracy: %f' % (test_avg_class_point_acc)) logger.info('Whole scene class averaged point wise accuracy: %f' % (test_avg_class_point_acc)) print('Whole scene class averaged voxel wise accuracy: %f' % (test_avg_class_voxel_acc)) logger.info('Whole scene class averaged voxel wise accuracy: %f' % (test_avg_class_voxel_acc)) #print('Whole scene calibrated point wise accuracy: %f' % (test_cali_point_acc)) #logger.info('Whole scene calibrated point wise accuracy: %f' % (test_cali_point_acc)) #print('Whole scene calibrated voxel wise accuracy: %f' % (test_cali_voxel_acc)) #logger.info('Whole scene calibrated voxel wise accuracy: %f' % (test_cali_voxel_acc)) print('Whole scene class averaged point wise IoU: %f' % (test_avg_class_point_IoU)) logger.info('Whole scene class averaged point wise IoU: %f' % (test_avg_class_point_IoU)) print('Whole scene class averaged voxel wise IoU: %f' % (test_avg_class_voxel_IoU)) logger.info('Whole scene class averaged voxel wise IoU: %f' % (test_avg_class_voxel_IoU)) per_class_voxel_str = 'voxel based --------\n' for l in range(1, num_classes): per_class_voxel_str += 'class %d weight: %f, acc: %f, IoU: %f;\n' % ( l, labelweights_vox[l - 1], total_correct_class_vox[l] / float( total_seen_class_vox[l]), total_intersection_class_vox[l] / (float(total_union_class_vox[l]) + 1e-6)) logger.info(per_class_voxel_str) per_class_point_str = 'point based --------\n' for l in range(1, num_classes): per_class_point_str += 'class %d weight: %f, acc: %f, IoU: %f;\n' % ( l, labelweights[l - 1], total_correct_class[l] / float(total_seen_class[l]), total_intersection_class[l] / (float(total_union_class[l]) + 1e-6)) logger.info(per_class_point_str) if (epoch + 1) % 5 == 0: torch.save( model.state_dict(), '%s/%s_%.3d.pth' % (checkpoints_dir, args.model_name, epoch + 1)) logger.info('Save model..') print('Save model..') if test_voxel_acc > best_acc: best_acc = test_voxel_acc best_acc_epoch = epoch + 1 torch.save( model.state_dict(), '%s/%s_%.3d_%.4f_bestacc.pth' % (checkpoints_dir, args.model_name, epoch + 1, best_acc)) logger.info('Save best acc model..') print('Save best acc model..') if test_avg_class_voxel_IoU > best_mIoU: best_mIoU = test_avg_class_voxel_IoU best_mIoU_epoch = epoch + 1 torch.save( model.state_dict(), '%s/%s_%.3d_%.4f_bestmIoU.pth' % (checkpoints_dir, args.model_name, epoch + 1, best_mIoU)) logger.info('Save best mIoU model..') print('Save best mIoU model..') print('Best voxel wise accuracy is %f at epoch %d.' % (best_acc, best_acc_epoch)) logger.info('Best voxel wise accuracy is %f at epoch %d.' % (best_acc, best_acc_epoch)) print('Best class averaged voxel wise IoU is %f at epoch %d.' % (best_mIoU, best_mIoU_epoch)) logger.info('Best class averaged voxel wise IoU is %f at epoch %d.' % (best_mIoU, best_mIoU_epoch)) plot_loss_curve(history['loss'], str(log_dir)) plot_acc_curve(history['train_acc'], history['test_voxel_acc'], str(log_dir)) plot_acc_curve(history['train_acc'], history['test_avg_class_voxel_IoU'], str(log_dir)) print('FINISH.') logger.info('FINISH')
def evaluate(args): # init training dataset print("preparing data...") scanrefer, scene_list = get_scanrefer(args) # dataloader _, dataloader = get_dataloader(args, scanrefer, scene_list, "val", DC) # model model = get_model(args) # config POST_DICT = { 'remove_empty_box': True, 'use_3d_nms': True, 'nms_iou': 0.25, 'use_old_type_nms': False, 'cls_nms': True, 'per_class_proposal': True, 'conf_thresh': 0.05, 'dataset_config': DC } if not args.no_nms else None # evaluate print("evaluating...") ref_acc = [] objectness_precisions, objectness_recalls, objectness_f1s = [], [], [] ious = [] masks = [] maskrcnn_model = resnet_fpn_backbone('resnet18', True).fpn.cuda() for data in tqdm(dataloader): for key in data: if key != "scan_name": data[key] = data[key].cuda() batch_size = len(data['scan_name']) new_features = torch.zeros((batch_size, 40000, 32)).cuda() print("start to project") for idx, scene_id in enumerate(data['scan_name']): intrinsics = get_intrinsics(scene_id, args) projection = ProjectionHelper(intrinsics, args.depth_min, args.depth_max, proj_image_dims) features_2d = scannet_projection( data['point_clouds'][idx].cpu().numpy(), intrinsics, projection, scene_id, args, None, None, maskrcnn_model) new_features[idx, :] = features_2d[:] data['new_features'] = new_features print("finish projection") # feed data = model(data) _, data = get_loss(data, DC, True, True, POST_DICT) ref_acc += data["ref_acc"] objectness_precisions += data["objectness_precision"] objectness_recalls += data["objectness_recall"] objectness_f1s += data["objectness_f1"] ious += data["ref_iou"] masks += data["ref_multiple_mask"] # aggregate scores ref_acc = np.array(ref_acc) objectness_precisions, objectness_recalls, objectness_f1s = np.array( objectness_precisions), np.array(objectness_recalls), np.array( objectness_f1s) ious = np.array(ious) masks = np.array(masks) stats = { "unique": np.sum(masks == 0), "multiple": np.sum(masks == 1), "overall": masks.shape[0] } scores = {"unique": {}, "multiple": {}, "overall": {}} scores["unique"]["ref_acc"] = np.mean( ref_acc[masks == 0]) if np.sum(masks == 0) > 0 else 0 scores["unique"]["objn_prec"] = np.mean( objectness_precisions[masks == 0]) if np.sum(masks == 0) > 0 else 0 scores["unique"]["objn_recall"] = np.mean( objectness_recalls[masks == 0]) if np.sum(masks == 0) > 0 else 0 scores["unique"]["objn_f1"] = np.mean( objectness_f1s[masks == 0]) if np.sum(masks == 0) > 0 else 0 scores["unique"]["iou_rate_0.25"] = ious[masks == 0][ious[ masks == 0] >= 0.25].shape[0] / ious[masks == 0].shape[0] if np.sum( masks == 0) > 0 else 0 scores["unique"]["iou_rate_0.5"] = ious[masks == 0][ious[ masks == 0] >= 0.5].shape[0] / ious[masks == 0].shape[0] if np.sum( masks == 0) > 0 else 0 scores["multiple"]["ref_acc"] = np.mean( ref_acc[masks == 1]) if np.sum(masks == 1) > 0 else 0 scores["multiple"]["objn_prec"] = np.mean( objectness_precisions[masks == 1]) if np.sum(masks == 1) > 0 else 0 scores["multiple"]["objn_recall"] = np.mean( objectness_recalls[masks == 1]) if np.sum(masks == 1) > 0 else 0 scores["multiple"]["objn_f1"] = np.mean( objectness_f1s[masks == 1]) if np.sum(masks == 1) > 0 else 0 scores["multiple"]["iou_rate_0.25"] = ious[masks == 1][ious[ masks == 1] >= 0.25].shape[0] / ious[masks == 1].shape[0] if np.sum( masks == 1) > 0 else 0 scores["multiple"]["iou_rate_0.5"] = ious[masks == 1][ious[ masks == 1] >= 0.5].shape[0] / ious[masks == 1].shape[0] if np.sum( masks == 1) > 0 else 0 scores["overall"]["ref_acc"] = np.mean(ref_acc) scores["overall"]["objn_prec"] = np.mean(objectness_precisions) scores["overall"]["objn_recall"] = np.mean(objectness_recalls) scores["overall"]["objn_f1"] = np.mean(objectness_f1s) scores["overall"]["iou_rate_0.25"] = ious[ ious >= 0.25].shape[0] / ious.shape[0] scores["overall"]["iou_rate_0.5"] = ious[ ious >= 0.5].shape[0] / ious.shape[0] print("done!") return stats, scores
def _feed(self, dataloader, phase, epoch_id): # switch mode self._set_phase(phase) # change dataloader dataloader = dataloader if phase == "train" else tqdm(dataloader) for data_dict in dataloader: # move to cuda for key in data_dict: if key != 'scan_name': data_dict[key] = data_dict[key].cuda() # ======================================= # Get 3d <-> 2D Projection Mapping and 2D feature map # ======================================= batch_size = len(data_dict['scan_name']) new_features = np.zeros((batch_size, self.args.num_points, 256)) for idx, scene_id in enumerate(data_dict['scan_name']): intrinsics = get_intrinsics(scene_id, self.args) projection = ProjectionHelper(intrinsics, self.args.depth_min, self.args.depth_max, proj_image_dims) features_2d = scannet_projection( data_dict['point_clouds'][idx].cpu().numpy(), intrinsics, projection, scene_id, self.args, None, None, self.maskrcnn_model) new_features[idx, :] = features_2d[:] data_dict['new_features'] = torch.tensor( new_features, dtype=torch.float32, requires_grad=True).cuda() # initialize the running loss self._running_log = { # loss "loss": 0, "ref_loss": 0, "lang_loss": 0, "objectness_loss": 0, "vote_loss": 0, "box_loss": 0, # acc "lang_acc": 0, "ref_acc": 0, "obj_acc": 0, "pos_ratio": 0, "neg_ratio": 0, "iou_rate_0.25": 0, "iou_rate_0.5": 0 } # load self.log[phase]["fetch"].append( data_dict["load_time"].sum().item()) with torch.autograd.set_detect_anomaly(False): # forward start = time.time() data_dict = self._forward(data_dict) self._compute_loss(data_dict) self.log[phase]["forward"].append(time.time() - start) # backward if phase == "train": start = time.time() self._backward() self.log[phase]["backward"].append(time.time() - start) # eval start = time.time() self._eval(data_dict) self.log[phase]["eval"].append(time.time() - start) # record log self.log[phase]["loss"].append(self._running_log["loss"].item()) self.log[phase]["ref_loss"].append( self._running_log["ref_loss"].item()) self.log[phase]["lang_loss"].append( self._running_log["lang_loss"].item()) self.log[phase]["objectness_loss"].append( self._running_log["objectness_loss"].item()) self.log[phase]["vote_loss"].append( self._running_log["vote_loss"].item()) self.log[phase]["box_loss"].append( self._running_log["box_loss"].item()) self.log[phase]["lang_acc"].append(self._running_log["lang_acc"]) self.log[phase]["ref_acc"].append(self._running_log["ref_acc"]) self.log[phase]["obj_acc"].append(self._running_log["obj_acc"]) self.log[phase]["pos_ratio"].append(self._running_log["pos_ratio"]) self.log[phase]["neg_ratio"].append(self._running_log["neg_ratio"]) self.log[phase]["iou_rate_0.25"].append( self._running_log["iou_rate_0.25"]) self.log[phase]["iou_rate_0.5"].append( self._running_log["iou_rate_0.5"]) # report if phase == "train": iter_time = self.log[phase]["fetch"][-1] iter_time += self.log[phase]["forward"][-1] iter_time += self.log[phase]["backward"][-1] iter_time += self.log[phase]["eval"][-1] self.log[phase]["iter_time"].append(iter_time) if (self._global_iter_id + 1) % self.verbose == 0: self._train_report(epoch_id) # evaluation if self._global_iter_id != 0 and self._global_iter_id % self.val_step == 0: print("evaluating...") # val self._feed(self.dataloader["val"], "val", epoch_id) self._dump_log("val") self._set_phase("train") self._epoch_report(epoch_id) # dump log self._dump_log("train") self._global_iter_id += 1 # check best if phase == "val": cur_criterion = "iou_rate_0.5" cur_best = np.mean(self.log[phase][cur_criterion]) if cur_best > self.best[cur_criterion]: self._log("best {} achieved: {}".format( cur_criterion, cur_best)) self._log("current train_loss: {}".format( np.mean(self.log["train"]["loss"]))) self._log("current val_loss: {}".format( np.mean(self.log["val"]["loss"]))) self.best["epoch"] = epoch_id + 1 self.best["loss"] = np.mean(self.log[phase]["loss"]) self.best["ref_loss"] = np.mean(self.log[phase]["ref_loss"]) self.best["lang_loss"] = np.mean(self.log[phase]["lang_loss"]) self.best["objectness_loss"] = np.mean( self.log[phase]["objectness_loss"]) self.best["vote_loss"] = np.mean(self.log[phase]["vote_loss"]) self.best["box_loss"] = np.mean(self.log[phase]["box_loss"]) self.best["lang_acc"] = np.mean(self.log[phase]["lang_acc"]) self.best["ref_acc"] = np.mean(self.log[phase]["ref_acc"]) self.best["obj_acc"] = np.mean(self.log[phase]["obj_acc"]) self.best["pos_ratio"] = np.mean(self.log[phase]["pos_ratio"]) self.best["neg_ratio"] = np.mean(self.log[phase]["neg_ratio"]) self.best["iou_rate_0.25"] = np.mean( self.log[phase]["iou_rate_0.25"]) self.best["iou_rate_0.5"] = np.mean( self.log[phase]["iou_rate_0.5"]) # save model self._log("saving best models...\n") model_root = os.path.join(CONF.PATH.OUTPUT, self.stamp) torch.save(self.model.state_dict(), os.path.join(model_root, "model.pth"))
import torch.utils.data from torch.utils.data import Dataset import numpy as np import utils.pc_util as pc_util import utils.scene_util as scene_util import random import math from scipy import misc from PIL import Image import torchvision.transforms as transforms from utils.projection import ProjectionHelper from model.pointnet_util import pc_normalize intrinsic = [[37.01983, 0, 20, 0], [0, 38.52470, 15.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]] projection = ProjectionHelper(intrinsic, 0.1, 4.0, [41, 32], 0.05) class ScannetDatasetRGBImg(Dataset): def __init__(self, root, npoints=8192, split='train', num_images=5): self.npoints = npoints self.root = root self.split = split self.num_images = num_images data_list = os.path.join(self.root, 'scannetv2_%s.txt' % (split)) datalist = open(data_list, 'r') self.scenes = [x.strip() for x in datalist.readlines()] self.data_filename = os.path.join(self.root, 'scannetv2_%s.pickle' % (split)) with open(self.data_filename, 'rb') as fp: self.scene_points_list = pickle.load(fp, encoding='bytes')