def test_network(cfg, network, data_loader, checkpoint, result_set): _checkpoint = torch.load(checkpoint) _checkpoint = {k.replace('module.', ''): v for k, v in _checkpoint['rmnet'].items()} network.load_state_dict(_checkpoint) network.eval() checkpoint = os.path.basename(checkpoint) test_metrics = AverageMeter(Metrics.names()) device, = list(set(p.device for p in network.parameters())) for idx, (video_name, n_objects, frames, masks, optical_flows) in enumerate( tqdm(data_loader, leave=False, desc='%s on GPU %d' % (checkpoint, device.index), position=device.index)): with torch.no_grad(): try: est_probs = network(frames, masks, optical_flows, n_objects, cfg.TEST.MEMORIZE_EVERY, device) est_probs = est_probs.permute(0, 2, 1, 3, 4) masks = torch.argmax(masks, dim=2) est_masks = torch.argmax(est_probs, dim=1) except Exception as ex: logging.warning('Error occurred during testing Checkpoint[Name=%s]: %s' % (checkpoint, ex)) continue metrics = Metrics.get(est_masks[0], masks[0]) test_metrics.update(metrics, torch.max(n_objects[0]).item()) jf_mean = test_metrics.avg(2) if jf_mean != 0: logging.info('Checkpoint[Name=%s] has been tested successfully, JF-Mean = %.4f.' % (checkpoint, jf_mean)) else: logging.warning('Exception occurred during testing Checkpoint[Name=%s]' % checkpoint) result_set['JF-Mean'] = jf_mean
def test_net(cfg, epoch_idx=-1, test_data_loader=None, test_writer=None, model=None): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True if test_data_loader is None: # Set up data loader dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) test_data_loader = torch.utils.data.DataLoader( dataset=dataset_loader.get_dataset( utils.data_loaders.DatasetSubset.TEST), batch_size=1, num_workers=cfg.CONST.NUM_WORKERS, collate_fn=utils.data_loaders.collate_fn, pin_memory=True, shuffle=False) # Setup networks and initialize networks if model is None: model = Model(dataset=cfg.DATASET.TRAIN_DATASET) if torch.cuda.is_available(): model = torch.nn.DataParallel(model).cuda() assert 'WEIGHTS' in cfg.CONST and cfg.CONST.WEIGHTS logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) model.load_state_dict(checkpoint['model']) # Switch models to evaluation mode model.eval() n_samples = len(test_data_loader) test_losses = AverageMeter(['cd1', 'cd2', 'cd3', 'pmd']) test_metrics = AverageMeter(Metrics.names()) category_metrics = dict() # Testing loop with tqdm(test_data_loader) as t: # print('repeating') for model_idx, (taxonomy_id, model_id, data) in enumerate(t): taxonomy_id = taxonomy_id[0] if isinstance( taxonomy_id[0], str) else taxonomy_id[0].item() model_id = model_id[0] with torch.no_grad(): for k, v in data.items(): data[k] = utils.helpers.var_or_cuda(v) partial = data['partial_cloud'] gt = data['gtcloud'] partial = random_subsample( partial.repeat((1, 8, 1)).reshape(-1, 16384, 3)) # b*8, 2048, 3 b, n, _ = partial.shape pcds, deltas = model(partial.contiguous()) cd1 = chamfer_sqrt(pcds[0].reshape(-1, 16384, 3).contiguous(), gt).item() * 1e3 cd2 = chamfer_sqrt(pcds[1].reshape(-1, 16384, 3).contiguous(), gt).item() * 1e3 cd3 = chamfer_sqrt(pcds[2].reshape(-1, 16384, 3).contiguous(), gt).item() * 1e3 # pmd loss pmd_losses = [] for delta in deltas: pmd_losses.append(torch.sum(delta**2)) pmd = torch.sum(torch.stack(pmd_losses)) / 3 pmd_item = pmd.item() _metrics = [pmd_item, cd3] test_losses.update([cd1, cd2, cd3, pmd_item]) test_metrics.update(_metrics) if taxonomy_id not in category_metrics: category_metrics[taxonomy_id] = AverageMeter( Metrics.names()) category_metrics[taxonomy_id].update(_metrics) t.set_description( 'Test[%d/%d] Taxonomy = %s Sample = %s Losses = %s Metrics = %s' % (model_idx + 1, n_samples, taxonomy_id, model_id, [ '%.4f' % l for l in test_losses.val() ], ['%.4f' % m for m in _metrics])) # Print testing results print( '============================ TEST RESULTS ============================' ) print('Taxonomy', end='\t') print('#Sample', end='\t') for metric in test_metrics.items: print(metric, end='\t') print() for taxonomy_id in category_metrics: print(taxonomy_id, end='\t') print(category_metrics[taxonomy_id].count(0), end='\t') for value in category_metrics[taxonomy_id].avg(): print('%.4f' % value, end='\t') print() print('Overall', end='\t\t\t') for value in test_metrics.avg(): print('%.4f' % value, end='\t') print('\n') # Add testing results to TensorBoard if test_writer is not None: test_writer.add_scalar('Loss/Epoch/cd1', test_losses.avg(0), epoch_idx) test_writer.add_scalar('Loss/Epoch/cd2', test_losses.avg(1), epoch_idx) test_writer.add_scalar('Loss/Epoch/cd3', test_losses.avg(2), epoch_idx) test_writer.add_scalar('Loss/Epoch/delta', test_losses.avg(3), epoch_idx) for i, metric in enumerate(test_metrics.items): test_writer.add_scalar('Metric/%s' % metric, test_metrics.avg(i), epoch_idx) return test_losses.avg(2)
def train_net(cfg): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True # Set up data loader # choose ShapeNet train_dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.TRAIN_DATASET](cfg) test_dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) # get_dataset's para: subdataset(train0, test1, val2) train_data_loader = torch.utils.data.DataLoader( dataset=train_dataset_loader.get_dataset( utils.data_loaders.DatasetSubset.TRAIN), # train/test/val batch_size=cfg.TRAIN.BATCH_SIZE, num_workers=cfg.CONST.NUM_WORKERS, collate_fn=utils.data_loaders.collate_fn, pin_memory=True, shuffle=True, drop_last=True) val_data_loader = torch.utils.data.DataLoader( dataset=test_dataset_loader.get_dataset( utils.data_loaders.DatasetSubset.VAL), batch_size=1, num_workers=cfg.CONST.NUM_WORKERS, collate_fn=utils.data_loaders.collate_fn, pin_memory=True, shuffle=False) # Set up folders for logs and checkpoints output_dir = os.path.join(cfg.DIR.OUT_PATH, '%s', datetime.now().isoformat()) # output_dir cfg.DIR.CHECKPOINTS = output_dir % 'checkpoints' cfg.DIR.LOGS = output_dir % 'logs' txt_dir = output_dir % 'txt' if not os.path.exists(txt_dir): os.makedirs(txt_dir) f_record = open(txt_dir + '/record.txt', 'w') if not os.path.exists(cfg.DIR.CHECKPOINTS): os.makedirs(cfg.DIR.CHECKPOINTS) # Create tensorboard writers train_writer = SummaryWriter(os.path.join(cfg.DIR.LOGS, 'train')) val_writer = SummaryWriter(os.path.join(cfg.DIR.LOGS, 'test')) # Create the networks grnet = GRNet(cfg) grnet.apply(utils.helpers.init_weights) logging.debug('Parameters in GRNet: %d.' % utils.helpers.count_parameters(grnet)) # Move the network to GPU if possible if torch.cuda.is_available(): grnet = torch.nn.DataParallel(grnet).cuda() # Create the optimizers grnet_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, grnet.parameters()), lr=cfg.TRAIN.LEARNING_RATE, weight_decay=cfg.TRAIN.WEIGHT_DECAY, betas=cfg.TRAIN.BETAS) grnet_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( grnet_optimizer, milestones=cfg.TRAIN.LR_MILESTONES, gamma=cfg.TRAIN.GAMMA) # Set up loss functions chamfer_dist = ChamferDistance() gridding_loss = GriddingLoss( # lgtm [py/unused-local-variable] scales=cfg.NETWORK.GRIDDING_LOSS_SCALES, alphas=cfg.NETWORK.GRIDDING_LOSS_ALPHAS) # Load pretrained model if exists init_epoch = 0 # 断点续跑 best_metrics = None if 'WEIGHTS' in cfg.CONST: logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) best_metrics = Metrics(cfg.TEST.METRIC_NAME, checkpoint['best_metrics']) grnet.load_state_dict(checkpoint['grnet']) logging.info( 'Recover complete. Current epoch = #%d; best metrics = %s.' % (init_epoch, best_metrics)) # Training/Testing the network first_epoch = True for epoch_idx in range(init_epoch + 1, cfg.TRAIN.N_EPOCHS + 1): epoch_start_time = time() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter(['SparseLoss', 'DenseLoss']) # losses = AverageMeter(['GridLoss', 'DenseLoss']) grnet.train() batch_end_time = time() n_batches = len(train_data_loader) for batch_idx, (taxonomy_ids, model_ids, data) in enumerate(train_data_loader): # print('batch_size: ', data['partial_cloud'].shape) data_time.update(time() - batch_end_time) for k, v in data.items(): data[k] = utils.helpers.var_or_cuda(v) sparse_ptcloud, dense_ptcloud = grnet(data) sparse_loss = chamfer_dist(sparse_ptcloud, data['gtcloud']) # grid_loss = gridding_loss(dense_ptcloud, data['gtcloud']) dense_loss = chamfer_dist(dense_ptcloud, data['gtcloud']) _loss = sparse_loss + dense_loss losses.update( [sparse_loss.item() * 1000, dense_loss.item() * 1000]) # _loss = grid_loss + dense_loss # losses.update([grid_loss.item() * 1000, dense_loss.item() * 1000]) grnet.zero_grad() _loss.backward() grnet_optimizer.step() n_itr = (epoch_idx - 1) * n_batches + batch_idx train_writer.add_scalar('Loss/Batch/Sparse', sparse_loss.item() * 1000, n_itr) # train_writer.add_scalar('Loss/Batch/Grid', grid_loss.item() * 1000, n_itr) train_writer.add_scalar('Loss/Batch/Dense', dense_loss.item() * 1000, n_itr) batch_time.update(time() - batch_end_time) batch_end_time = time() ### f_record.write( '\n[Epoch %d/%d][Batch %d/%d] BatchTime = %.3f (s) DataTime = %.3f (s) Losses = %s' % (epoch_idx, cfg.TRAIN.N_EPOCHS, batch_idx + 1, n_batches, batch_time.val(), data_time.val(), ['%.4f' % l for l in losses.val()])) logging.info( '[Epoch %d/%d][Batch %d/%d] BatchTime = %.3f (s) DataTime = %.3f (s) Losses = %s' % (epoch_idx, cfg.TRAIN.N_EPOCHS, batch_idx + 1, n_batches, batch_time.val(), data_time.val(), ['%.4f' % l for l in losses.val()])) grnet_lr_scheduler.step() epoch_end_time = time() train_writer.add_scalar('Loss/Epoch/Sparse', losses.avg(0), epoch_idx) # train_writer.add_scalar('Loss/Epoch/Grid', losses.avg(0), epoch_idx) train_writer.add_scalar('Loss/Epoch/Dense', losses.avg(1), epoch_idx) f_record.write('\n[Epoch %d/%d] EpochTime = %.3f (s) Losses = %s' % (epoch_idx, cfg.TRAIN.N_EPOCHS, epoch_end_time - epoch_start_time, ['%.4f' % l for l in losses.avg()])) logging.info('[Epoch %d/%d] EpochTime = %.3f (s) Losses = %s' % (epoch_idx, cfg.TRAIN.N_EPOCHS, epoch_end_time - epoch_start_time, ['%.4f' % l for l in losses.avg()])) # Validate the current model # if epoch_idx % cfg.TRAIN.SAVE_FREQ == 0: # metrics = test_net(cfg, epoch_idx, val_data_loader, val_writer, grnet) # Save ckeckpoints # if epoch_idx % cfg.TRAIN.SAVE_FREQ == 0 or metrics.better_than(best_metrics): if first_epoch: metrics = test_net(cfg, epoch_idx, val_data_loader, val_writer, grnet) best_metrics = metrics first_epoch = False if epoch_idx % cfg.TRAIN.SAVE_FREQ == 0: metrics = test_net(cfg, epoch_idx, val_data_loader, val_writer, grnet) file_name = 'best-ckpt.pth' if metrics.better_than( best_metrics) else 'epoch-%03d.pth' % (epoch_idx + 1) output_path = os.path.join(cfg.DIR.CHECKPOINTS, file_name) torch.save({ 'epoch_index': epoch_idx, 'best_metrics': metrics.state_dict(), 'grnet': grnet.state_dict() }, output_path) # yapf: disable logging.info('Saved checkpoint to %s ...' % output_path) if metrics.better_than(best_metrics): best_metrics = metrics train_writer.close() val_writer.close()
def test_net(cfg, epoch_idx=-1, test_data_loader=None, test_writer=None, grnet=None): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True if test_data_loader is None: # Set up data loader dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) # 在data_loader.py中修改这里的dataset值 test_data_loader = torch.utils.data.DataLoader( dataset=dataset_loader.get_dataset( utils.data_loaders.DatasetSubset.TEST), batch_size=1, num_workers=cfg.CONST.NUM_WORKERS, collate_fn=utils.data_loaders.collate_fn, pin_memory=True, shuffle=False) # Setup networks and initialize networks if grnet is None: grnet = GRNet(cfg) if torch.cuda.is_available(): grnet = torch.nn.DataParallel(grnet).cuda() logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) grnet.load_state_dict(checkpoint['grnet']) # Switch models to evaluation mode grnet.eval() # Set up loss functions chamfer_dist = ChamferDistance() gridding_loss = GriddingLoss( scales=cfg.NETWORK.GRIDDING_LOSS_SCALES, alphas=cfg.NETWORK.GRIDDING_LOSS_ALPHAS) # lgtm [py/unused-import] # Testing loop n_samples = len(test_data_loader) test_losses = AverageMeter(['SparseLoss', 'DenseLoss']) # test_losses = AverageMeter(['GridLoss', 'DenseLoss']) test_metrics = AverageMeter(Metrics.names()) # 'F-score, CD category_metrics = dict() # Testing loop # 通过data得到sparse_pucloud, data from test_data_loader tot_recall, tot_precision, tot_emd = 0.0, 0.0, 0.0 tot_shapes = 0 score_dict = {} for model_idx, (taxonomy_id, model_id, data) in enumerate(test_data_loader): taxonomy_id = taxonomy_id[0] if isinstance( taxonomy_id[0], str) else taxonomy_id[0].item() model_id = model_id[0] with torch.no_grad(): for k, v in data.items(): data[k] = utils.helpers.var_or_cuda(v) sparse_ptcloud, dense_ptcloud = grnet(data) # print('--------dense: ', type(dense_ptcloud), dense_ptcloud.shape) # print('--------gt: ', type(data['gtcloud']), data['gtcloud'.shape]) sparse_loss = chamfer_dist(sparse_ptcloud, data['gtcloud']) # grid_loss = gridding_loss(dense_ptcloud, data['gtcloud']) dense_loss = chamfer_dist(dense_ptcloud, data['gtcloud']) # Fsore fscore_pred = o3d.geometry.PointCloud() # print(type(dense_ptcloud)) # print(dense_ptcloud.shape) # print(data['gtcloud'].shape) # print(type(data['gtcloud'])) fscore_pred.points = o3d.utility.Vector3dVector( np.array(dense_ptcloud.squeeze().cpu().detach().numpy())) fscore_gt = o3d.geometry.PointCloud() fscore_gt.points = o3d.utility.Vector3dVector( data['gtcloud'].squeeze().cpu().detach().numpy()) dist1 = fscore_pred.compute_point_cloud_distance(fscore_gt) dist2 = fscore_gt.compute_point_cloud_distance(fscore_pred) th = 0.01 recall = float(sum(d < th for d in dist2)) / float(len(dist2)) precision = float(sum(d < th for d in dist1)) / float(len(dist1)) tot_recall += recall tot_precision += precision # 计算EMD # dense_pts = np.array(dense_ptcloud.cpu()) # num_points = dense_pts.shape[1] # EMD_loss = earth_mover_distance(dense_ptcloud, data['gtcloud'], transpose=False) / num_points # EMD_loss = EMD_loss.mean().item() # tot_emd += EMD_loss tot_shapes += 1 # print('dense_pc: ', dense_ptcloud.shape, type(dense_ptcloud)) test_losses.update( [sparse_loss.item() * 1000, dense_loss.item() * 1000]) # test_losses.update([grid_loss.item() * 1000, dense_loss.item() * 1000]) _metrics = Metrics.get(dense_ptcloud, data['gtcloud']) # return: values test_metrics.update(_metrics) if taxonomy_id not in category_metrics: category_metrics[taxonomy_id] = AverageMeter(Metrics.names()) category_metrics[taxonomy_id].update(_metrics) # train时不用存数据 # 存 npz ''' save_path = '/home2/wuruihai/GRNet_FILES/Results/Completion3D_grnet_chair_ep300_npz_16384d/' save_path2 = '/home2/wuruihai/GRNet_FILES/Results/Completion3D_grent_chair_ep300_npz_2048d/' part_name = 'part_7' # 只存了 final results (dense_ptcloud) save_npz_path = save_path + part_name + '/' save_npz_path2 = save_path2 + part_name + '/' if not os.path.exists(save_npz_path): os.makedirs(save_npz_path) if not os.path.exists(save_npz_path2): os.makedirs(save_npz_path2) dense_pts = np.array(dense_ptcloud.cpu()) dense_pts2 = rescale_pc_parts(dense_pts, 2048) # rescale dense_pts /= 0.45 # 放大回我们的大小 dense_pts2 /= 0.45 np.savez(save_npz_path + '%s.npz' % model_id, pts = dense_pts) np.savez(save_npz_path2 + '%s.npz' % model_id, pts = dense_pts2) ''' # 存npz (GRNet's data), Completion3D, 没有part # save_path = '/home2/wuruihai/GRNet_FILES/Results/ShapeNet_grnet_pretrained_model_VAL_npz/' # if not os.path.exists(save_path): # os.makedirs(save_path) # dense_pts = np.array(dense_ptcloud.cpu()) # np.savez(save_path + '%s.npz' % model_id, pts=dense_pts) # 存scores为txt dense_pts = np.array(dense_ptcloud.cpu()) CD_loss = dense_loss.item() num_points = dense_pts.shape[1] EMD_loss = earth_mover_distance( dense_ptcloud, data['gtcloud'], transpose=False) / num_points EMD_loss = EMD_loss.mean().item() fscore = 2 * recall * precision / ( recall + precision) if recall + precision else 0 score_dict[model_id] = (CD_loss, EMD_loss, precision, recall, fscore) # print(score_dict) # 存 png ''' save_path = '/home2/wuruihai/GRNet_FILES/Results/Completion3D_GRNet_1003/' if not os.path.exists(save_path): os.makedirs(save_path) plt.figure() pc_ptcloud = data['partial_cloud'].squeeze().cpu().numpy() pc_ptcloud_img = utils.helpers.get_ptcloud_img(pc_ptcloud) matplotlib.image.imsave(save_path + '%s_1_pc.png' % model_id, pc_ptcloud_img) # sparse_ptcloud = sparse_ptcloud.squeeze().cpu().numpy() # sparse_ptcloud_img = utils.helpers.get_ptcloud_img(sparse_ptcloud) # matplotlib.image.imsave(save_path+'%s_sps.png' % model_id, # sparse_ptcloud_img) dense_ptcloud = dense_ptcloud.squeeze().cpu().numpy() dense_ptcloud_img = utils.helpers.get_ptcloud_img(dense_ptcloud) matplotlib.image.imsave(save_path + '%s_2_dns.png' % model_id, dense_ptcloud_img) gt_ptcloud = data['gtcloud'].squeeze().cpu().numpy() gt_ptcloud_img = utils.helpers.get_ptcloud_img(gt_ptcloud) matplotlib.image.imsave(save_path+'%s_3_gt.png' % model_id, gt_ptcloud_img) ''' ''' if model_idx in range(510, 600): now_num=model_idx-499 # if test_writer is not None and model_idx < 3: # sparse_ptcloud = sparse_ptcloud.squeeze().cpu().numpy() sparse_ptcloud = sparse_ptcloud.squeeze().numpy() sparse_ptcloud_img = utils.helpers.get_ptcloud_img(sparse_ptcloud) matplotlib.image.imsave('/home2/wuruihai/GRNet_FILES/results2/%s_%s_sps.png'%(model_idx,model_id), sparse_ptcloud_img) # dense_ptcloud = dense_ptcloud.squeeze().cpu().numpy() dense_ptcloud = dense_ptcloud.squeeze().numpy() dense_ptcloud_img = utils.helpers.get_ptcloud_img(dense_ptcloud) matplotlib.image.imsave('/home2/wuruihai/GRNet_FILES/results2/%s_%s_dns.png' % (model_idx, model_id), dense_ptcloud_img) # gt_ptcloud = data['gtcloud'].squeeze().cpu().numpy() gt_ptcloud = data['gtcloud'].squeeze().numpy() gt_ptcloud_img = utils.helpers.get_ptcloud_img(gt_ptcloud) matplotlib.image.imsave('/home2/wuruihai/GRNet_FILES/results2/%s_%s_gt.png'%(model_idx,model_id), gt_ptcloud_img) cv.imwrite("/home2/wuruihai/GRNet_FILES/out3.png", sparse_ptcloud_img) im = Image.fromarray(sparse_ptcloud_img).convert('RGB') im.save("/home2/wuruihai/GRNet_FILES/out.jpeg") test_writer.add_image('Model%02d/SparseReconstruction' % model_idx, sparse_ptcloud_img, epoch_idx) dense_ptcloud = dense_ptcloud.squeeze().cpu().numpy() dense_ptcloud_img = utils.helpers.get_ptcloud_img(dense_ptcloud) test_writer.add_image('Model%02d/DenseReconstruction' % model_idx, dense_ptcloud_img, epoch_idx) gt_ptcloud = data['gtcloud'].squeeze().cpu().numpy() gt_ptcloud_img = utils.helpers.get_ptcloud_img(gt_ptcloud) test_writer.add_image('Model%02d/GroundTruth' % model_idx, gt_ptcloud_img, epoch_idx) ''' logging.info( 'Test[%d/%d] Taxonomy = %s Sample = %s Losses = %s Metrics = %s' % (model_idx + 1, n_samples, taxonomy_id, model_id, ['%.4f' % l for l in test_losses.val()], ['%.4f' % m for m in _metrics])) plt.show() plt.savefig('/raid/wuruihai/GRNet_FILES/results.png') # Print testing results print( '============================ TEST RESULTS ============================' ) print('Taxonomy', end='\t') print('#Sample', end='\t') for metric in test_metrics.items: print(metric, end='\t') print() # 将CD, EMD存到txt中 # print(score_dict) # fname = '/home2/wuruihai/GRNet_FILES/Results/ShapeNet_grnet_pretrained_model_VAL_scores.txt' # fw = open(fname, 'w') # # print(score_dict) # for idx in score_dict.keys(): # fw.write('%s\t%s\t%s\t%s\t%s\t%s\n' % (idx, score_dict[idx][0], score_dict[idx][1], score_dict[idx][2], score_dict[idx][3], score_dict[idx][4])) # model_id \t CD \t EMD for taxonomy_id in category_metrics: print(taxonomy_id, end='\t') print(category_metrics[taxonomy_id].count(0), end='\t') for value in category_metrics[taxonomy_id].avg(): print('%.4f' % value, end='\t') print() print('Overall', end='\t\t\t') for value in test_metrics.avg(): print('%.4f' % value, end='\t') print('\n') print('recall: ', tot_recall / tot_shapes) print('precision: ', tot_precision / tot_shapes) # print('EMD: ', tot_emd / tot_shapes) # Add testing results to TensorBoard if test_writer is not None: # test_writer.add_scalar('Loss/Epoch/Sparse', test_losses.avg(0), epoch_idx) test_writer.add_scalar('Loss/Epoch/Grid', test_losses.avg(0), epoch_idx) test_writer.add_scalar('Loss/Epoch/Dense', test_losses.avg(1), epoch_idx) for i, metric in enumerate(test_metrics.items): test_writer.add_scalar('Metric/%s' % metric, test_metrics.avg(i), epoch_idx) return Metrics(cfg.TEST.METRIC_NAME, test_metrics.avg())
def test_net(cfg, epoch_idx=-1, test_data_loader=None, test_writer=None, grnet=None): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True if test_data_loader is None: # Set up data loader dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) test_data_loader = torch.utils.data.DataLoader( dataset=dataset_loader.get_dataset( utils.data_loaders.DatasetSubset.TEST), batch_size=1, num_workers=cfg.CONST.NUM_WORKERS, collate_fn=utils.data_loaders.collate_fn, pin_memory=True, shuffle=False) # Setup networks and initialize networks if grnet is None: grnet = GRNet(cfg) if torch.cuda.is_available(): grnet = torch.nn.DataParallel(grnet).cuda() logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) grnet.load_state_dict(checkpoint['grnet']) # Switch models to evaluation mode grnet.eval() # Set up loss functions chamfer_dist = ChamferDistance() gridding_loss = GriddingLoss( scales=cfg.NETWORK.GRIDDING_LOSS_SCALES, alphas=cfg.NETWORK.GRIDDING_LOSS_ALPHAS) # lgtm [py/unused-import] # Testing loop n_samples = len(test_data_loader) test_losses = AverageMeter(['SparseLoss', 'DenseLoss']) test_metrics = AverageMeter(Metrics.names()) category_metrics = dict() # Testing loop for model_idx, (taxonomy_id, model_id, data) in enumerate(test_data_loader): taxonomy_id = taxonomy_id[0] if isinstance( taxonomy_id[0], str) else taxonomy_id[0].item() model_id = model_id[0] with torch.no_grad(): for k, v in data.items(): data[k] = utils.helpers.var_or_cuda(v) sparse_ptcloud, dense_ptcloud = grnet(data) sparse_loss = chamfer_dist(sparse_ptcloud, data['gtcloud']) dense_loss = chamfer_dist(dense_ptcloud, data['gtcloud']) test_losses.update( [sparse_loss.item() * 1000, dense_loss.item() * 1000]) _metrics = Metrics.get(dense_ptcloud, data['gtcloud']) test_metrics.update(_metrics) # save predicted point cloud if cfg.TEST.SAVE_PRED: if cfg.DATASET.TEST_DATASET == 'FrankaScan': dirname, obj_idx = model_id.split('-') out_ptcloud = dense_ptcloud[0].cpu() IO.put( cfg.DATASETS.FRANKASCAN.PREDICTION_PATH % (dirname, obj_idx), out_ptcloud) if taxonomy_id not in category_metrics: category_metrics[taxonomy_id] = AverageMeter(Metrics.names()) category_metrics[taxonomy_id].update(_metrics) if test_writer is not None and model_idx < 3: sparse_ptcloud = sparse_ptcloud.squeeze().cpu().numpy() sparse_ptcloud_img = utils.helpers.get_ptcloud_img( sparse_ptcloud) test_writer.add_image( 'Model%02d/SparseReconstruction' % model_idx, sparse_ptcloud_img, epoch_idx) dense_ptcloud = dense_ptcloud.squeeze().cpu().numpy() dense_ptcloud_img = utils.helpers.get_ptcloud_img( dense_ptcloud) test_writer.add_image( 'Model%02d/DenseReconstruction' % model_idx, dense_ptcloud_img, epoch_idx) gt_ptcloud = data['gtcloud'].squeeze().cpu().numpy() gt_ptcloud_img = utils.helpers.get_ptcloud_img(gt_ptcloud) test_writer.add_image('Model%02d/GroundTruth' % model_idx, gt_ptcloud_img, epoch_idx) logging.info( 'Test[%d/%d] Taxonomy = %s Sample = %s Losses = %s Metrics = %s' % (model_idx + 1, n_samples, taxonomy_id, model_id, ['%.4f' % l for l in test_losses.val()], ['%.4f' % m for m in _metrics])) # Print testing results print( '============================ TEST RESULTS ============================' ) print('Taxonomy', end='\t') print('#Sample', end='\t') for metric in test_metrics.items: print(metric, end='\t') print() for taxonomy_id in category_metrics: print(taxonomy_id, end='\t') print(category_metrics[taxonomy_id].count(0), end='\t') for value in category_metrics[taxonomy_id].avg(): print('%.4f' % value, end='\t') print() print('Overall', end='\t\t\t') for value in test_metrics.avg(): print('%.4f' % value, end='\t') print('\n') # Add testing results to TensorBoard if test_writer is not None: test_writer.add_scalar('Loss/Epoch/Sparse', test_losses.avg(0), epoch_idx) test_writer.add_scalar('Loss/Epoch/Dense', test_losses.avg(1), epoch_idx) for i, metric in enumerate(test_metrics.items): test_writer.add_scalar('Metric/%s' % metric, test_metrics.avg(i), epoch_idx) return Metrics(cfg.TEST.METRIC_NAME, test_metrics.avg())
def train_net_new(cfg): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Set up data loader pnum = 2048 crop_point_num = 512 workers = 1 batchSize = 16 class_name = "Pistol" train_dataset_loader = shapenet_part_loader.PartDataset( root='./dataset/shapenetcore_partanno_segmentation_benchmark_v0/', classification=False, class_choice=class_name, npoints=pnum, split='train') train_data_loader = torch.utils.data.DataLoader(train_dataset_loader, batch_size=batchSize, shuffle=True, num_workers=int(workers)) test_dataset_loader = shapenet_part_loader.PartDataset( root='./dataset/shapenetcore_partanno_segmentation_benchmark_v0/', classification=False, class_choice=class_name, npoints=pnum, split='test') val_data_loader = torch.utils.data.DataLoader(test_dataset_loader, batch_size=batchSize, shuffle=True, num_workers=int(workers)) # Set up folders for logs and checkpoints output_dir = os.path.join(cfg.DIR.OUT_PATH, '%s', datetime.now().isoformat()) cfg.DIR.CHECKPOINTS = output_dir % 'checkpoints' cfg.DIR.LOGS = output_dir % 'logs' if not os.path.exists(cfg.DIR.CHECKPOINTS): os.makedirs(cfg.DIR.CHECKPOINTS) # Create tensorboard writers train_writer = SummaryWriter(os.path.join(cfg.DIR.LOGS, 'train')) val_writer = SummaryWriter(os.path.join(cfg.DIR.LOGS, 'test')) # Create the networks grnet = GRNet(cfg, seg_class_no) grnet.apply(utils.helpers.init_weights) logging.debug('Parameters in GRNet: %d.' % utils.helpers.count_parameters(grnet)) # Move the network to GPU if possible grnet = grnet.to(device) # Create the optimizers grnet_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, grnet.parameters()), lr=cfg.TRAIN.LEARNING_RATE, weight_decay=cfg.TRAIN.WEIGHT_DECAY, betas=cfg.TRAIN.BETAS) grnet_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( grnet_optimizer, milestones=cfg.TRAIN.LR_MILESTONES, gamma=cfg.TRAIN.GAMMA) # Set up loss functions chamfer_dist = ChamferDistance() gridding_loss = GriddingLoss( # lgtm [py/unused-local-variable] scales=cfg.NETWORK.GRIDDING_LOSS_SCALES, alphas=cfg.NETWORK.GRIDDING_LOSS_ALPHAS) seg_criterion = torch.nn.CrossEntropyLoss().cuda() # Load pretrained model if exists init_epoch = 0 best_metrics = None if 'WEIGHTS' in cfg.CONST: logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) grnet.load_state_dict(checkpoint['grnet']) logging.info( 'Recover complete. Current epoch = #%d; best metrics = %s.' % (init_epoch, best_metrics)) train_seg_on_sparse = False train_seg_on_dense = False miou = 0 # Training/Testing the network for epoch_idx in range(init_epoch + 1, cfg.TRAIN.N_EPOCHS + 1): epoch_start_time = time() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter(['SparseLoss', 'DenseLoss']) grnet.train() if epoch_idx == 5: train_seg_on_sparse = True if epoch_idx == 7: train_seg_on_dense = True batch_end_time = time() n_batches = len(train_data_loader) for batch_idx, ( data, seg, model_ids, ) in enumerate(train_data_loader): data_time.update(time() - batch_end_time) input_cropped1 = torch.FloatTensor(data.size()[0], pnum, 3) input_cropped1 = input_cropped1.data.copy_(data) if batch_idx == 10: pass #break data = data.to(device) seg = seg.to(device) input_cropped1 = input_cropped1.to(device) # remove points to make input incomplete choice = [ torch.Tensor([1, 0, 0]), torch.Tensor([0, 0, 1]), torch.Tensor([1, 0, 1]), torch.Tensor([-1, 0, 0]), torch.Tensor([-1, 1, 0]) ] for m in range(data.size()[0]): index = random.sample(choice, 1) p_center = index[0].to(device) distances = torch.sum((data[m] - p_center)**2, dim=1) order = torch.argsort(distances) zero_point = torch.FloatTensor([0, 0, 0]).to(device) input_cropped1.data[m, order[:crop_point_num]] = zero_point if save_crop_mode: np.save(class_name + "_orig", data[0].detach().cpu().numpy()) np.save(class_name + "_cropped", input_cropped1[0].detach().cpu().numpy()) sys.exit() sparse_ptcloud, dense_ptcloud, sparse_seg, full_seg, dense_seg = grnet( input_cropped1) data_seg = get_data_seg(data, full_seg) seg_loss = seg_criterion(torch.transpose(data_seg, 1, 2), seg) if train_seg_on_sparse and train_seg: gt_seg = get_seg_gts(seg, data, sparse_ptcloud) seg_loss += seg_criterion(torch.transpose(sparse_seg, 1, 2), gt_seg) seg_loss /= 2 if train_seg_on_dense and train_seg: gt_seg = get_seg_gts(seg, data, dense_ptcloud) dense_seg_loss = seg_criterion( torch.transpose(dense_seg, 1, 2), gt_seg) print(dense_seg_loss.item()) if draw_mode: plot_ptcloud(data[0], seg[0], "orig") plot_ptcloud(input_cropped1[0], seg[0], "cropped") plot_ptcloud(sparse_ptcloud[0], torch.argmax(sparse_seg[0], dim=1), "sparse_pred") if not train_seg_on_sparse: gt_seg = get_seg_gts(seg, data, sparse_ptcloud) #plot_ptcloud(sparse_ptcloud[0], gt_seg[0], "sparse_gt") #if not train_seg_on_dense: #gt_seg = get_seg_gts(seg, data, sparse_ptcloud) print(dense_seg.size()) plot_ptcloud(dense_ptcloud[0], torch.argmax(dense_seg[0], dim=1), "dense_pred") sys.exit() print(seg_loss.item()) lamb = 0.8 sparse_loss = chamfer_dist(sparse_ptcloud, data).to(device) dense_loss = chamfer_dist(dense_ptcloud, data).to(device) grid_loss = gridding_loss(sparse_ptcloud, data).to(device) if train_seg: _loss = lamb * (sparse_loss + dense_loss + grid_loss) + (1 - lamb) * seg_loss else: _loss = (sparse_loss + dense_loss + grid_loss) if train_seg_on_dense and train_seg: _loss += (1 - lamb) * dense_seg_loss _loss.to(device) losses.update( [sparse_loss.item() * 1000, dense_loss.item() * 1000]) grnet.zero_grad() _loss.backward() grnet_optimizer.step() n_itr = (epoch_idx - 1) * n_batches + batch_idx train_writer.add_scalar('Loss/Batch/Sparse', sparse_loss.item() * 1000, n_itr) train_writer.add_scalar('Loss/Batch/Dense', dense_loss.item() * 1000, n_itr) batch_time.update(time() - batch_end_time) batch_end_time = time() logging.info( '[Epoch %d/%d][Batch %d/%d] BatchTime = %.3f (s) DataTime = %.3f (s) Losses = %s' % (epoch_idx, cfg.TRAIN.N_EPOCHS, batch_idx + 1, n_batches, batch_time.val(), data_time.val(), ['%.4f' % l for l in losses.val()])) # Validate the current model if train_seg: miou_new = test_net_new(cfg, epoch_idx, val_data_loader, val_writer, grnet) else: miou_new = 0 grnet_lr_scheduler.step() epoch_end_time = time() train_writer.add_scalar('Loss/Epoch/Sparse', losses.avg(0), epoch_idx) train_writer.add_scalar('Loss/Epoch/Dense', losses.avg(1), epoch_idx) logging.info('[Epoch %d/%d] EpochTime = %.3f (s) Losses = %s' % (epoch_idx, cfg.TRAIN.N_EPOCHS, epoch_end_time - epoch_start_time, ['%.4f' % l for l in losses.avg()])) if not train_seg or miou_new > miou: file_name = class_name + 'noseg-ckpt-epoch.pth' output_path = os.path.join(cfg.DIR.CHECKPOINTS, file_name) torch.save({ 'epoch_index': epoch_idx, 'grnet': grnet.state_dict() }, output_path) # yapf: disable logging.info('Saved checkpoint to %s ...' % output_path) miou = miou_new train_writer.close() val_writer.close()
def test_net(cfg, epoch_idx=-1, test_data_loader=None, test_writer=None, tflownet=None, rmnet=None): # Set up data loader if test_data_loader is None: # Set up data loader test_data_loader = torch.utils.data.DataLoader( dataset=utils.data_loaders.DatasetCollector.get_dataset( cfg, cfg.DATASET.TEST_DATASET, utils.data_loaders.DatasetSubset.VAL), batch_size=1, num_workers=cfg.CONST.N_WORKERS, pin_memory=True, shuffle=False) # Setup networks and initialize networks if rmnet is None: tflownet = TinyFlowNet(cfg) rmnet = RMNet(cfg) if torch.cuda.is_available(): tflownet = torch.nn.DataParallel(tflownet).cuda() rmnet = torch.nn.DataParallel(rmnet).cuda() logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) tflownet.load_state_dict(checkpoint['tflownet']) rmnet.load_state_dict(checkpoint['rmnet']) # Switch models to evaluation mode tflownet.eval() rmnet.eval() # Set up loss functions l1_loss = torch.nn.L1Loss() nll_loss = torch.nn.NLLLoss(ignore_index=cfg.CONST.IGNORE_IDX) lovasz_loss = LovaszLoss(ignore_index=cfg.CONST.IGNORE_IDX) # The testing loop n_videos = len(test_data_loader) test_losses = AverageMeter() test_metrics = AverageMeter(Metrics.names()) for idx, (video_name, n_objects, frames, masks, optical_flows) in enumerate(test_data_loader): # Test only selected videos to accelerate the testing process if not epoch_idx == -1 and idx not in cfg.TEST.TESTING_VIDEOS_INDEXES: continue with torch.no_grad(): # Fix Assertion Error: all(map(lambda i: i.is_cuda, inputs)) if torch.cuda.device_count() > 1: frames = utils.helpers.var_or_cuda(frames) masks = utils.helpers.var_or_cuda(masks) optical_flows = utils.helpers.var_or_cuda(optical_flows) # Fix bugs: OOM error for large videos try: if epoch_idx == -1: est_flows, est_probs = utils.helpers.multi_scale_inference( cfg, tflownet, rmnet, frames, masks, n_objects) else: est_flows = tflownet(frames) est_probs = rmnet(frames, masks, est_flows, n_objects, cfg.TEST.MEMORIZE_EVERY) est_probs = est_probs.permute(0, 2, 1, 3, 4) masks = torch.argmax(masks, dim=2) est_masks = torch.argmax(est_probs, dim=1) if cfg.TRAIN.NETWORK == 'TinyFlowNet': loss = l1_loss(est_flows, optical_flows) else: # RMNet loss = lovasz_loss(est_probs, masks) + nll_loss( torch.log(est_probs), masks) except Exception as ex: logging.exception(ex) continue test_losses.update(loss.item()) metrics = Metrics.get(est_masks[0], masks[0]) test_metrics.update(metrics, torch.max(n_objects[0]).item()) video_name = video_name[0] if test_writer is not None and idx < 3 and cfg.TEST.VISUALIZE_EVERY > 0: frames = frames[0] n_frames = est_masks.size(1) for i in tqdm(range(0, n_frames, cfg.TEST.VISUALIZE_EVERY), leave=False, desc=video_name): est_segmentation = utils.helpers.get_segmentation( frames[i], est_masks[0][i], { 'mean': cfg.CONST.DATASET_MEAN, 'std': cfg.CONST.DATASET_STD, }, cfg.CONST.IGNORE_IDX) gt_segmentation = utils.helpers.get_segmentation( frames[i], masks[0][i], { 'mean': cfg.CONST.DATASET_MEAN, 'std': cfg.CONST.DATASET_STD, }, cfg.CONST.IGNORE_IDX) test_writer.add_image( '%s/Frame%03d' % (video_name, i), np.concatenate((est_segmentation, gt_segmentation), axis=0), epoch_idx) logging.info( 'Test[%d/%d] VideoName = %s Loss = %.4f Metrics = %s' % (idx + 1, n_videos, video_name, loss, ['%.4f' % m for m in metrics])) # Print testing results logging.info( '[Test Summary] Loss = %.4f Metrics = %s' % (test_losses.avg(), ['%.4f' % tm for tm in test_metrics.avg()])) # Add testing results to TensorBoard if test_writer is not None: test_writer.add_scalar('Loss/Epoch', test_losses.avg(), epoch_idx) for i, metric in enumerate(test_metrics.items): test_writer.add_scalar('Metric/%s' % metric, test_metrics.avg(i), epoch_idx) return Metrics(cfg.TEST.MAIN_METRIC_NAME, test_metrics.avg())
def train_net(cfg): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use # train_dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[cfg.DATASET.TRAIN_DATASET](cfg) # test_dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[cfg.DATASET.TEST_DATASET](cfg) train_dataset_loader = dataloader_jt.DATASET_LOADER_MAPPING[ cfg.DATASET.TRAIN_DATASET](cfg) test_dataset_loader = dataloader_jt.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) train_data_loader = train_dataset_loader.get_dataset( dataloader_jt.DatasetSubset.TRAIN, batch_size=cfg.TRAIN.BATCH_SIZE, num_workers=cfg.CONST.NUM_WORKERS, shuffle=True) val_data_loader = test_dataset_loader.get_dataset( dataloader_jt.DatasetSubset.VAL, batch_size=cfg.TRAIN.BATCH_SIZE, num_workers=cfg.CONST.NUM_WORKERS, shuffle=False) # Set up folders for logs and checkpoints output_dir = os.path.join(cfg.DIR.OUT_PATH, '%s', datetime.now().isoformat()) cfg.DIR.CHECKPOINTS = output_dir % 'checkpoints' cfg.DIR.LOGS = output_dir % 'logs' if not os.path.exists(cfg.DIR.CHECKPOINTS): os.makedirs(cfg.DIR.CHECKPOINTS) # Create tensorboard writers train_writer = SummaryWriter(os.path.join(cfg.DIR.LOGS, 'train')) val_writer = SummaryWriter(os.path.join(cfg.DIR.LOGS, 'test')) model = Model(dataset=cfg.DATASET.TRAIN_DATASET) init_epoch = 0 best_metrics = float('inf') optimizer = nn.Adam(model.parameters(), lr=cfg.TRAIN.LEARNING_RATE, weight_decay=cfg.TRAIN.WEIGHT_DECAY, betas=cfg.TRAIN.BETAS) lr_scheduler = jittor.lr_scheduler.MultiStepLR( optimizer, milestones=cfg.TRAIN.LR_MILESTONES, gamma=cfg.TRAIN.GAMMA, last_epoch=init_epoch) # Training/Testing the network for epoch_idx in range(init_epoch + 1, cfg.TRAIN.N_EPOCHS + 1): epoch_start_time = time() model.train() loss_metric = AverageMeter() n_batches = len(train_data_loader) print('epoch: ', epoch_idx, 'optimizer: ', lr_scheduler.get_lr()) with tqdm(train_data_loader) as t: for batch_idx, (taxonomy_ids, model_ids, data) in enumerate(t): partial = jittor.array(data['partial_cloud']) gt = jittor.array(data['gtcloud']) pcds, deltas = model(partial) cd1 = chamfer(pcds[0], gt) cd2 = chamfer(pcds[1], gt) cd3 = chamfer(pcds[2], gt) loss_cd = cd1 + cd2 + cd3 delta_losses = [] for delta in deltas: delta_losses.append(jittor.sum(delta**2)) loss_pmd = jittor.sum(jittor.stack(delta_losses)) / 3 loss = loss_cd * cfg.TRAIN.LAMBDA_CD + loss_pmd * cfg.TRAIN.LAMBDA_PMD optimizer.step(loss) loss_item = loss.item() loss_metric.update(loss_item) jittor.sync_all() t.set_description( '[Epoch %d/%d][Batch %d/%d]' % (epoch_idx, cfg.TRAIN.N_EPOCHS, batch_idx + 1, n_batches)) t.set_postfix(loss='%s' % ['%.4f' % l for l in [loss_item]]) lr_scheduler.step() epoch_end_time = time() train_writer.add_scalar('Loss/Epoch/loss', loss_metric.avg(), epoch_idx) logging.info( '[Epoch %d/%d] EpochTime = %.3f (s) Losses = %s' % (epoch_idx, cfg.TRAIN.N_EPOCHS, epoch_end_time - epoch_start_time, ['%.4f' % l for l in [loss_metric.avg()]])) # Validate the current model cd_eval = test_net(cfg, epoch_idx, val_data_loader, val_writer, model) # Save checkpoints if epoch_idx % cfg.TRAIN.SAVE_FREQ == 0 or cd_eval < best_metrics: file_name = 'ckpt-best.pkl' if cd_eval < best_metrics else 'ckpt-epoch-%03d.pkl' % epoch_idx output_path = os.path.join(cfg.DIR.CHECKPOINTS, file_name) model.save(output_path) logging.info('Saved checkpoint to %s ...' % output_path) if cd_eval < best_metrics: best_metrics = cd_eval train_writer.close() val_writer.close()
def test_net(cfg, epoch_idx=-1, test_data_loader=None, test_writer=None, grnet=None): # Enable the inbuilt cudnn auto-tuner to find the best algorithm to use torch.backends.cudnn.benchmark = True if test_data_loader is None: # Set up data loader dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[cfg.DATASET.TEST_DATASET](cfg) # 在data_loader.py中修改这里的dataset值 test_data_loader = torch.utils.data.DataLoader(dataset=dataset_loader.get_dataset( utils.data_loaders.DatasetSubset.VAL), batch_size=1, num_workers=cfg.CONST.NUM_WORKERS, collate_fn=utils.data_loaders.collate_fn, pin_memory=True, shuffle=False) # Setup networks and initialize networks if grnet is None: grnet = GRNet(cfg) if torch.cuda.is_available(): grnet = torch.nn.DataParallel(grnet).cuda() logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) grnet.load_state_dict(checkpoint['grnet']) # Switch models to evaluation mode grnet.eval() # Set up loss functions chamfer_dist = ChamferDistance() gridding_loss = GriddingLoss(scales=cfg.NETWORK.GRIDDING_LOSS_SCALES, alphas=cfg.NETWORK.GRIDDING_LOSS_ALPHAS) # lgtm [py/unused-import] # Testing loop n_samples = len(test_data_loader) test_losses = AverageMeter(['SparseLoss', 'DenseLoss']) # test_losses = AverageMeter(['GridLoss', 'DenseLoss']) test_metrics = AverageMeter(Metrics.names()) # 'F-score, CD category_metrics = dict() # Testing loop # 通过data得到sparse_pucloud, data from test_data_loader ''' gt_path = '/raid/wuruihai/GRNet_FILES/xkh/Completion3D/val/gt/03001627/' # pred_path = '/raid/wuruihai/GRNet_FILES/Results/Completion3D_grnet_data_ep300_npz_2048d/' # 0.0033 pred_path = '/raid/wuruihai/GRNet_FILES/Results/Completion3D_grnet_alldata_ep300_npz_small_2048d/' # 0.0030 n_points = 2048 for root, dirs, files in os.walk(pred_path): len_files = len(files) pred_batch = np.zeros((1, n_points, 3)) gt_batch = np.zeros((1, n_points, 3)) idx = -1 tot = 0 for file in files: file_id = os.path.splitext(file)[0] idx += 1 pred = np.load(pred_path + file)['pts'] # pred = rescale_pc_parts(pred, num_points=n_points) # pred = pred.reshape(n_points, 3) pred_batch[0] = pred gt = h5py.File(gt_path + file_id + '.h5', 'r')['data'][:] # Completion3D gt = np.array(gt).astype(np.float32) # gt = rescale_pc_parts(gt, num_points=n_points) # gt = gt.reshape(n_points, 3) gt_batch[0] = gt with torch.no_grad(): cd = chamfer_dist(torch.tensor(pred_batch, dtype=torch.float32).cuda(), torch.tensor(gt_batch, dtype=torch.float32).cuda()) print(cd) tot += cd print('avg: ', tot/len_files) return ''' for model_idx, (taxonomy_id, model_id, data) in enumerate(test_data_loader): taxonomy_id = taxonomy_id[0] if isinstance(taxonomy_id[0], str) else taxonomy_id[0].item() model_id = model_id[0] with torch.no_grad(): for k, v in data.items(): data[k] = utils.helpers.var_or_cuda(v) sparse_ptcloud, dense_ptcloud = grnet(data) sparse_loss = chamfer_dist(sparse_ptcloud, data['gtcloud']) # grid_loss = gridding_loss(dense_ptcloud, data['gtcloud']) dense_loss = chamfer_dist(dense_ptcloud, data['gtcloud']) print(dense_ptcloud.shape, data['gtcloud'].shape) test_losses.update([sparse_loss.item() * 1000, dense_loss.item() * 1000]) # test_losses.update([grid_loss.item() * 1000, dense_loss.item() * 1000]) _metrics = Metrics.get(dense_ptcloud, data['gtcloud']) # return: values test_metrics.update(_metrics) if taxonomy_id not in category_metrics: category_metrics[taxonomy_id] = AverageMeter(Metrics.names()) category_metrics[taxonomy_id].update(_metrics) # train时不用存数据 # 存 npz ''' save_path = '/home2/wuruihai/GRNet_FILES/Results/Completion3D_grnet_chair_ep300_npz_16384d/' save_path2 = '/home2/wuruihai/GRNet_FILES/Results/Completion3D_grent_chair_ep300_npz_2048d/' part_name = 'part_7' # 只存了 final results (dense_ptcloud) save_npz_path = save_path + part_name + '/' save_npz_path2 = save_path2 + part_name + '/' if not os.path.exists(save_npz_path): os.makedirs(save_npz_path) if not os.path.exists(save_npz_path2): os.makedirs(save_npz_path2) dense_pts = np.array(dense_ptcloud.cpu()) dense_pts2 = rescale_pc_parts(dense_pts, 2048) # rescale dense_pts /= 0.45 # 放大回我们的大小 dense_pts2 /= 0.45 np.savez(save_npz_path + '%s.npz' % model_id, pts = dense_pts) np.savez(save_npz_path2 + '%s.npz' % model_id, pts = dense_pts2) ''' # 存npz (GRNet's data), Completion3D, 没有part # 和grnet自己的数据集比较,不需要放大(/0.45) ''' save_path = '/home2/wuruihai/GRNet_FILES/Results/Completion3D_grnet_alldata_ep300_npz_small_16384d/' save_path2 = '/home2/wuruihai/GRNet_FILES/Results/Completion3D_grnet_alldata_ep300_npz_small_2048d/' if not os.path.exists(save_path): os.makedirs(save_path) if not os.path.exists(save_path2): os.makedirs(save_path2) dense_pts = np.array(dense_ptcloud.cpu()) dense_pts2 = rescale_pc_parts(dense_pts, 2048) # rescale np.savez(save_path + '%s.npz' % model_id, pts=dense_pts) np.savez(save_path2 + '%s.npz' % model_id, pts = dense_pts2) ''' # 存 png ''' save_path = '/home2/wuruihai/GRNet_FILES/Results/ShapeNet_zy_chair_ep500_part0_16384d_png/' if not os.path.exists(save_path): os.makedirs(save_path) plt.figure() pc_ptcloud = data['partial_cloud'].squeeze().cpu().numpy() pc_ptcloud_img = utils.helpers.get_ptcloud_img(pc_ptcloud) matplotlib.image.imsave(save_path + '%s_1_pc.png' % model_id, pc_ptcloud_img) # sparse_ptcloud = sparse_ptcloud.squeeze().cpu().numpy() # sparse_ptcloud_img = utils.helpers.get_ptcloud_img(sparse_ptcloud) # matplotlib.image.imsave(save_path+'%s_sps.png' % model_id, # sparse_ptcloud_img) dense_ptcloud = dense_ptcloud.squeeze().cpu().numpy() dense_ptcloud_img = utils.helpers.get_ptcloud_img(dense_ptcloud) matplotlib.image.imsave(save_path+'%s_2_dns.png' % model_id, dense_ptcloud_img) gt_ptcloud = data['gtcloud'].squeeze().cpu().numpy() gt_ptcloud_img = utils.helpers.get_ptcloud_img(gt_ptcloud) matplotlib.image.imsave(save_path+'%s_3_gt.png' % model_id, gt_ptcloud_img) ''' ''' if model_idx in range(510, 600): now_num=model_idx-499 # if test_writer is not None and model_idx < 3: # sparse_ptcloud = sparse_ptcloud.squeeze().cpu().numpy() sparse_ptcloud = sparse_ptcloud.squeeze().numpy() sparse_ptcloud_img = utils.helpers.get_ptcloud_img(sparse_ptcloud) matplotlib.image.imsave('/home2/wuruihai/GRNet_FILES/results2/%s_%s_sps.png'%(model_idx,model_id), sparse_ptcloud_img) # dense_ptcloud = dense_ptcloud.squeeze().cpu().numpy() dense_ptcloud = dense_ptcloud.squeeze().numpy() dense_ptcloud_img = utils.helpers.get_ptcloud_img(dense_ptcloud) matplotlib.image.imsave('/home2/wuruihai/GRNet_FILES/results2/%s_%s_dns.png' % (model_idx, model_id), dense_ptcloud_img) # gt_ptcloud = data['gtcloud'].squeeze().cpu().numpy() gt_ptcloud = data['gtcloud'].squeeze().numpy() gt_ptcloud_img = utils.helpers.get_ptcloud_img(gt_ptcloud) matplotlib.image.imsave('/home2/wuruihai/GRNet_FILES/results2/%s_%s_gt.png'%(model_idx,model_id), gt_ptcloud_img) cv.imwrite("/home2/wuruihai/GRNet_FILES/out3.png", sparse_ptcloud_img) im = Image.fromarray(sparse_ptcloud_img).convert('RGB') im.save("/home2/wuruihai/GRNet_FILES/out.jpeg") test_writer.add_image('Model%02d/SparseReconstruction' % model_idx, sparse_ptcloud_img, epoch_idx) dense_ptcloud = dense_ptcloud.squeeze().cpu().numpy() dense_ptcloud_img = utils.helpers.get_ptcloud_img(dense_ptcloud) test_writer.add_image('Model%02d/DenseReconstruction' % model_idx, dense_ptcloud_img, epoch_idx) gt_ptcloud = data['gtcloud'].squeeze().cpu().numpy() gt_ptcloud_img = utils.helpers.get_ptcloud_img(gt_ptcloud) test_writer.add_image('Model%02d/GroundTruth' % model_idx, gt_ptcloud_img, epoch_idx) ''' logging.info('Test[%d/%d] Taxonomy = %s Sample = %s Losses = %s Metrics = %s' % (model_idx + 1, n_samples, taxonomy_id, model_id, ['%.4f' % l for l in test_losses.val() ], ['%.4f' % m for m in _metrics])) plt.show() plt.savefig('/raid/wuruihai/GRNet_FILES/results.png') # Print testing results print('============================ TEST RESULTS ============================') print('Taxonomy', end='\t') print('#Sample', end='\t') for metric in test_metrics.items: print(metric, end='\t') print() for taxonomy_id in category_metrics: print(taxonomy_id, end='\t') print(category_metrics[taxonomy_id].count(0), end='\t') for value in category_metrics[taxonomy_id].avg(): print('%.4f' % value, end='\t') print() print('Overall', end='\t\t\t') for value in test_metrics.avg(): print('%.4f' % value, end='\t') print('\n') # Add testing results to TensorBoard if test_writer is not None: # test_writer.add_scalar('Loss/Epoch/Sparse', test_losses.avg(0), epoch_idx) test_writer.add_scalar('Loss/Epoch/Grid', test_losses.avg(0), epoch_idx) test_writer.add_scalar('Loss/Epoch/Dense', test_losses.avg(1), epoch_idx) for i, metric in enumerate(test_metrics.items): test_writer.add_scalar('Metric/%s' % metric, test_metrics.avg(i), epoch_idx) return Metrics(cfg.TEST.METRIC_NAME, test_metrics.avg())
def test_net(cfg, epoch_idx=-1, test_data_loader=None, test_writer=None, model=None): if test_data_loader is None: # Set up data loader dataset_loader = dataloader_jt.DATASET_LOADER_MAPPING[ cfg.DATASET.TEST_DATASET](cfg) test_data_loader = dataset_loader.get_dataset( dataloader_jt.DatasetSubset.VAL, batch_size=1, shuffle=False) # Setup networks and initialize networks if model is None: model = Model(dataset=cfg.DATASET.TEST_DATASET) assert 'WEIGHTS' in cfg.CONST and cfg.CONST.WEIGHTS print('loading: ', cfg.CONST.WEIGHTS) model.load(cfg.CONST.WEIGHTS) # Switch models to evaluation mode model.eval() n_samples = len(test_data_loader) test_losses = AverageMeter(['cd1', 'cd2', 'cd3', 'pmd']) test_metrics = AverageMeter(Metrics.names()) category_metrics = dict() # Testing loop with tqdm(test_data_loader) as t: # print('repeating') for model_idx, (taxonomy_id, model_id, data) in enumerate(t): taxonomy_id = taxonomy_id[0] if isinstance( taxonomy_id[0], str) else taxonomy_id[0].item() model_id = model_id[0] # for k, v in data.items(): # data[k] = utils.helpers.var_or_cuda(v) partial = jittor.array(data['partial_cloud']) gt = jittor.array(data['gtcloud']) b, n, _ = partial.shape pcds, deltas = model(partial) cd1 = chamfer(pcds[0], gt).item() * 1e3 cd2 = chamfer(pcds[1], gt).item() * 1e3 cd3 = chamfer(pcds[2], gt).item() * 1e3 # pmd loss pmd_losses = [] for delta in deltas: pmd_losses.append(jittor.sum(delta**2)) pmd = jittor.sum(jittor.stack(pmd_losses)) / 3 pmd_item = pmd.item() _metrics = [pmd_item, cd3] test_losses.update([cd1, cd2, cd3, pmd_item]) test_metrics.update(_metrics) if taxonomy_id not in category_metrics: category_metrics[taxonomy_id] = AverageMeter(Metrics.names()) category_metrics[taxonomy_id].update(_metrics) t.set_description( 'Test[%d/%d] Taxonomy = %s Sample = %s Losses = %s Metrics = %s' % (model_idx + 1, n_samples, taxonomy_id, model_id, ['%.4f' % l for l in test_losses.val()], ['%.4f' % m for m in _metrics])) # Print testing results print( '============================ TEST RESULTS ============================' ) print('Taxonomy', end='\t') print('#Sample', end='\t') for metric in test_metrics.items: print(metric, end='\t') print() for taxonomy_id in category_metrics: print(taxonomy_id, end='\t') print(category_metrics[taxonomy_id].count(0), end='\t') for value in category_metrics[taxonomy_id].avg(): print('%.4f' % value, end='\t') print() print('Overall', end='\t\t\t') for value in test_metrics.avg(): print('%.4f' % value, end='\t') print('\n') # Add testing results to TensorBoard if test_writer is not None: test_writer.add_scalar('Loss/Epoch/cd1', test_losses.avg(0), epoch_idx) test_writer.add_scalar('Loss/Epoch/cd2', test_losses.avg(1), epoch_idx) test_writer.add_scalar('Loss/Epoch/cd3', test_losses.avg(2), epoch_idx) test_writer.add_scalar('Loss/Epoch/delta', test_losses.avg(3), epoch_idx) for i, metric in enumerate(test_metrics.items): test_writer.add_scalar('Metric/%s' % metric, test_metrics.avg(i), epoch_idx) model.train() return test_losses.avg(2)
def train_net(cfg): # Set up data loader train_data_loader = torch.utils.data.DataLoader( dataset=utils.data_loaders.DatasetCollector.get_dataset( cfg, cfg.DATASET.TRAIN_DATASET, utils.data_loaders.DatasetSubset.TRAIN), batch_size=cfg.TRAIN.BATCH_SIZE, num_workers=cfg.CONST.N_WORKERS, pin_memory=True, shuffle=True, drop_last=True) val_data_loader = torch.utils.data.DataLoader( dataset=utils.data_loaders.DatasetCollector.get_dataset( cfg, cfg.DATASET.TEST_DATASET, utils.data_loaders.DatasetSubset.VAL), batch_size=1, num_workers=cfg.CONST.N_WORKERS, pin_memory=True, shuffle=False) # Set up networks tflownet = TinyFlowNet(cfg) rmnet = RMNet(cfg) tflownet.apply(utils.helpers.init_weights) rmnet.kv_memory.apply(utils.helpers.init_weights) rmnet.kv_query.apply(utils.helpers.init_weights) rmnet.decoder.apply(utils.helpers.init_weights) logging.info('Parameters in TinyFlowNet: %d.' % (utils.helpers.count_parameters(tflownet))) logging.info('Parameters in RMNet: %d.' % (utils.helpers.count_parameters(rmnet))) # Move the network to GPU if possible if torch.cuda.is_available(): if torch.__version__ >= '1.2.0' and cfg.TRAIN.USE_BATCH_NORM: torch.distributed.init_process_group( 'nccl', init_method='file:///tmp/rmnet-%s' % uuid.uuid4().hex, world_size=1, rank=0) tflownet = torch.nn.SyncBatchNorm.convert_sync_batchnorm(tflownet) rmnet = torch.nn.SyncBatchNorm.convert_sync_batchnorm(rmnet) tflownet = torch.nn.DataParallel(tflownet).cuda() rmnet = torch.nn.DataParallel(rmnet).cuda() # Create the optimizers network = rmnet if cfg.TRAIN.NETWORK == 'RMNet' else tflownet optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, network.parameters()), lr=cfg.TRAIN.LEARNING_RATE, weight_decay=cfg.TRAIN.WEIGHT_DECAY, betas=cfg.TRAIN.BETAS) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, cfg.TRAIN.N_EPOCHS) # Set up loss functions l1_loss = torch.nn.L1Loss() nll_loss = torch.nn.NLLLoss(ignore_index=cfg.CONST.IGNORE_IDX) lovasz_loss = LovaszLoss(ignore_index=cfg.CONST.IGNORE_IDX) # Load the pretrained model if exists init_epoch = 0 best_metrics = None METRICS_THRESHOLD = Metrics( cfg.TEST.MAIN_METRIC_NAME, [cfg.TRAIN.CKPT_SAVE_THRESHOLD for i in range(len(Metrics.names()))]) if 'WEIGHTS' in cfg.CONST: logging.info('Recovering from %s ...' % (cfg.CONST.WEIGHTS)) checkpoint = torch.load(cfg.CONST.WEIGHTS) best_metrics = Metrics(cfg.TEST.MAIN_METRIC_NAME, checkpoint['best_metrics']) tflownet.load_state_dict(checkpoint['tflownet']) rmnet.load_state_dict(checkpoint['rmnet']) logging.info( 'Recover completed. Current epoch = #%d; best metrics = %s.' % (init_epoch, best_metrics)) # Set up folders for logs, snapshot and checkpoints output_dir = os.path.join(cfg.DIR.OUTPUT_DIR, '%s', cfg.CONST.EXP_NAME) cfg.DIR.CHECKPOINTS = output_dir % 'checkpoints' cfg.DIR.LOGS = output_dir % 'logs' if not os.path.exists(cfg.DIR.CHECKPOINTS): os.makedirs(cfg.DIR.CHECKPOINTS) # Create tensorboard writers train_writer = SummaryWriter(cfg, 'train') val_writer = SummaryWriter(cfg, 'test') # Backup current code snapshot cfg.DIR.SNAPSHOTS = os.path.join(cfg.DIR.OUTPUT_DIR, 'snapshots') if not os.path.exists(cfg.DIR.SNAPSHOTS): os.makedirs(cfg.DIR.SNAPSHOTS) with zipfile.ZipFile( os.path.join(cfg.DIR.SNAPSHOTS, '%s.zip' % cfg.CONST.EXP_NAME), 'w') as zf: root_dir = os.getcwd() for dirname, subdirs, files in os.walk(root_dir): if os.path.normpath(dirname).find( os.path.normpath(cfg.DIR.OUTPUT_DIR)) != -1: continue _dirname = os.path.relpath(dirname, root_dir) zf.write(_dirname) for filename in files: zf.write(os.path.join(_dirname, filename)) # Training/Testing the network n_batches = len(train_data_loader) last_epoch_idx_keep_frame_steps = -cfg.TRAIN.N_EPOCHS for epoch_idx in range(init_epoch + 1, cfg.TRAIN.N_EPOCHS + 1): epoch_start_time = time() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() if cfg.TRAIN.USE_BATCH_NORM: tflownet.train() rmnet.train() else: tflownet.eval() rmnet.eval() # Update frame step if cfg.TRAIN.USE_RANDOM_FRAME_STEPS: if epoch_idx >= cfg.TRAIN.EPOCH_INDEX_FIXING_FRAME_STEPS and \ epoch_idx <= last_epoch_idx_keep_frame_steps + cfg.TRAIN.N_EPOCHS_KEEP_FRAME_STEPS: # Keep the frame step == 1 when JF Mean exceed a threshold for several epochs max_frame_steps = 1 else: max_frame_steps = random.randint( 1, min(cfg.TRAIN.MAX_FRAME_STEPS, epoch_idx // 5 + 2)) train_data_loader.dataset.set_frame_step( random.randint(1, max_frame_steps)) logging.info('[Epoch %d/%d] Set frame step to %d' % (epoch_idx, cfg.TRAIN.N_EPOCHS, train_data_loader.dataset.frame_step)) batch_end_time = time() for batch_idx, (video_name, n_objects, frames, masks, optical_flows) in enumerate(train_data_loader): n_itr = (epoch_idx - 1) * n_batches + batch_idx data_time.update(time() - batch_end_time) try: frames = utils.helpers.var_or_cuda(frames) masks = utils.helpers.var_or_cuda(masks) optical_flows = utils.helpers.var_or_cuda(optical_flows) est_flows = tflownet(frames) est_flows = utils.helpers.var_or_cuda(est_flows) est_probs = rmnet(frames, masks, optical_flows, n_objects, cfg.TRAIN.MEMORIZE_EVERY) est_probs = utils.helpers.var_or_cuda( est_probs[:, 1:]).permute(0, 2, 1, 3, 4) masks = torch.argmax(masks[:, 1:], dim=2) if cfg.TRAIN.NETWORK == 'TinyFlowNet': loss = l1_loss(est_flows, optical_flows) else: # RMNet loss = lovasz_loss(est_probs, masks) + nll_loss( torch.log(est_probs), masks) losses.update(loss.item()) tflownet.zero_grad() rmnet.zero_grad() loss.backward() optimizer.step() except Exception as ex: logging.exception(ex) continue train_writer.add_scalar('Loss/Batch', loss.item(), n_itr) batch_time.update(time() - batch_end_time) batch_end_time = time() logging.info( '[Epoch %d/%d][Batch %d/%d] BatchTime = %.3f (s) DataTime = %.3f (s) Loss = %.4f' % (epoch_idx, cfg.TRAIN.N_EPOCHS, batch_idx + 1, n_batches, batch_time.val(), data_time.val(), losses.val())) lr_scheduler.step() epoch_end_time = time() train_writer.add_scalar('Loss/Epoch', losses.avg(), epoch_idx) logging.info('[Epoch %d/%d] EpochTime = %.3f (s) Loss = %.4f' % (epoch_idx, cfg.TRAIN.N_EPOCHS, epoch_end_time - epoch_start_time, losses.avg())) # Evaluate the current model metrics = test_net(cfg, epoch_idx, val_data_loader, val_writer, tflownet, rmnet) if metrics.state_dict( )[cfg.TEST.MAIN_METRIC_NAME] > cfg.TRAIN.KEEP_FRAME_STEPS_THRESHOLD: last_epoch_idx_keep_frame_steps = epoch_idx # Save ckeckpoints if epoch_idx % cfg.TRAIN.CKPT_SAVE_FREQ == 0 and metrics.better_than( METRICS_THRESHOLD): output_path = os.path.join(cfg.DIR.CHECKPOINTS, 'ckpt-epoch-%03d.pth' % epoch_idx) torch.save({ 'epoch_index': epoch_idx, 'best_metrics': metrics.state_dict(), 'tflownet': tflownet.state_dict(), 'rmnet': rmnet.state_dict() }, output_path) # yapf: disable logging.info('Saved checkpoint to %s ...' % output_path) if metrics.better_than(best_metrics): output_path = os.path.join(cfg.DIR.CHECKPOINTS, 'ckpt-best.pth') best_metrics = metrics torch.save({ 'epoch_index': epoch_idx, 'best_metrics': metrics.state_dict(), 'tflownet': tflownet.state_dict(), 'rmnet': rmnet.state_dict() }, output_path) # yapf: disable logging.info('Saved checkpoint to %s ...' % output_path) train_writer.close() val_writer.close()