def evaluate(model, loader, criterion, device): model.eval() losses = [] accuracies = [] ious = [] with torch.no_grad(): for points, labels in tqdm(loader, desc='Validation', leave=False): points = points.to(device) labels = labels.to(device) scores = model(points) loss = criterion(scores, labels) losses.append(loss.cpu().item()) accuracies.append(accuracy(scores, labels)) ious.append(intersection_over_union(scores, labels)) return np.mean(losses), np.nanmean(np.array(accuracies), axis=0), np.nanmean(np.array(ious), axis=0)
def train(args): train_path = args.dataset / args.train_dir val_path = args.dataset / args.val_dir logs_dir = args.logs_dir / args.name logs_dir.mkdir(exist_ok=True, parents=True) # determine number of classes try: with open(args.dataset / 'classes.json') as f: labels = json.load(f) num_classes = len(labels.keys()) except FileNotFoundError: num_classes = int(input("Number of distinct classes in the dataset: ")) train_loader, val_loader = data_loaders(args.dataset, args.dataset_sampling, batch_size=cfg.batch_size, num_workers=args.num_workers, pin_memory=True) d_in = next(iter(train_loader))[0].size(-1) model = RandLANet(d_in, num_classes, num_neighbors=args.neighbors, decimation=args.decimation, device=args.gpu) print('Computing weights...', end='\t') samples_per_class = np.array(cfg.class_weights) # weight = samples_per_class / float(sum(samples_per_class)) # class_weights = 1 / (weight + 0.02) # effective = 1.0 - np.power(0.99, samples_per_class) # class_weights = (1 - 0.99) / effective # class_weights = class_weights / (np.sum(class_weights) * num_classes) # class_weights = class_weights / float(sum(class_weights)) # weights = torch.tensor(class_weights).float().to(args.gpu) n_samples = torch.tensor(cfg.class_weights, dtype=torch.float, device=args.gpu) ratio_samples = n_samples / n_samples.sum() weights = 1 / (ratio_samples + 0.02) #weights = F.softmin(n_samples) # weights = (1/ratio_samples) / (1/ratio_samples).sum() print('Done.') print('Weights:', weights) criterion = nn.CrossEntropyLoss(weight=weights) optimizer = torch.optim.Adam(model.parameters(), lr=args.adam_lr) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, args.scheduler_gamma) first_epoch = 1 if args.load: path = max(list((args.logs_dir / args.load).glob('*.pth'))) print(f'Loading {path}...') checkpoint = torch.load(path) first_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) with SummaryWriter(logs_dir) as writer: for epoch in range(first_epoch, args.epochs + 1): print(f'=== EPOCH {epoch:d}/{args.epochs:d} ===') t0 = time.time() # Train model.train() # metrics losses = [] accuracies = [] ious = [] # iterate over dataset for points, labels in tqdm(train_loader, desc='Training', leave=False): points = points.to(args.gpu) labels = labels.to(args.gpu) optimizer.zero_grad() scores = model(points) logp = torch.distributions.utils.probs_to_logits( scores, is_binary=False) loss = criterion(logp, labels) # logpy = torch.gather(logp, 1, labels) # loss = -(logpy).mean() loss.backward() optimizer.step() losses.append(loss.cpu().item()) accuracies.append(accuracy(scores, labels)) ious.append(intersection_over_union(scores, labels)) scheduler.step() accs = np.nanmean(np.array(accuracies), axis=0) ious = np.nanmean(np.array(ious), axis=0) val_loss, val_accs, val_ious = evaluate(model, val_loader, criterion, args.gpu) loss_dict = { 'Training loss': np.mean(losses), 'Validation loss': val_loss } acc_dicts = [{ 'Training accuracy': acc, 'Validation accuracy': val_acc } for acc, val_acc in zip(accs, val_accs)] iou_dicts = [{ 'Training accuracy': iou, 'Validation accuracy': val_iou } for iou, val_iou in zip(ious, val_ious)] # acc_dicts = [ # { # f'{i:02d}_train_acc': acc, # f'{}': val_acc # } # for i, (acc, val_accs) in enumerate(zip(accs, val_accs)) # ] t1 = time.time() d = t1 - t0 # Display results for k, v in loss_dict.items(): print(f'{k}: {v:.7f}', end='\t') print() print('Accuracy ', *[f'{i:>5d}' for i in range(num_classes)], ' OA', sep=' | ') print('Training: ', *[ f'{acc:.3f}' if not np.isnan(acc) else ' nan' for acc in accs ], sep=' | ') print('Validation: ', *[ f'{acc:.3f}' if not np.isnan(acc) else ' nan' for acc in val_accs ], sep=' | ') print('IoU ', *[f'{i:>5d}' for i in range(num_classes)], ' mIoU', sep=' | ') print('Training: ', *[ f'{iou:.3f}' if not np.isnan(iou) else ' nan' for iou in ious ], sep=' | ') print('Validation: ', *[ f'{iou:.3f}' if not np.isnan(iou) else ' nan' for iou in val_ious ], sep=' | ') print( 'Time elapsed:', '{:.0f} s'.format(d) if d < 60 else '{:.0f} min {:02.0f} s'.format(*divmod(d, 60))) # send results to tensorboard writer.add_scalars('Loss', loss_dict, epoch) for i in range(num_classes): writer.add_scalars(f'Per-class accuracy/{i+1:02d}', acc_dicts[i], epoch) writer.add_scalars(f'Per-class IoU/{i+1:02d}', iou_dicts[i], epoch) writer.add_scalars('Per-class accuracy/Overall', acc_dicts[-1], epoch) writer.add_scalars('Per-class IoU/Mean IoU', iou_dicts[-1], epoch) if epoch % args.save_freq == 0: torch.save( dict(epoch=epoch, model_state_dict=model.state_dict(), optimizer_state_dict=optimizer.state_dict(), scheduler_state_dict=scheduler.state_dict()), args.logs_dir / args.name / f'checkpoint_{epoch:02d}.pth')
def train(args): train_path = args.dataset / args.train_dir val_path = args.dataset / args.val_dir logs_dir = args.logs_dir / args.name logs_dir.mkdir(exist_ok=True, parents=True) num_classes = len(cfg.class_weights) train_loader, val_loader = data_loaders( args.dataset, args.dataset_sampling, batch_size=args.batch_size, num_workers=args.num_workers, pin_memory=True, ) d_in = next(iter(train_loader))[0].size(-1) model = RandLANet( d_in, num_classes, num_neighbors=args.neighbors, decimation=args.decimation, device=args.gpu, ) print("Computing weights...", end="\t") samples_per_class = np.array(cfg.class_weights) n_samples = torch.tensor(cfg.class_weights, dtype=torch.float, device=args.gpu) ratio_samples = n_samples / n_samples.sum() weights = 1 / (ratio_samples + 0.02) print("Done.") print("Weights:", weights) criterion = nn.CrossEntropyLoss(weight=weights) optimizer = torch.optim.Adam(model.parameters(), lr=args.adam_lr) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, args.scheduler_gamma) first_epoch = 1 if args.load: path = max(list((args.logs_dir / args.load).glob("*.pth"))) print(f"Loading {path}...") checkpoint = torch.load(path) first_epoch = checkpoint["epoch"] + 1 model.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) scheduler.load_state_dict(checkpoint["scheduler_state_dict"]) with SummaryWriter(logs_dir) as writer: for epoch in range(first_epoch, args.epochs + 1): print(f"=== EPOCH {epoch:d}/{args.epochs:d} ===") t0 = time.time() # Train model.train() # metrics losses = [] accuracies = [] ious = [] # iterate over dataset for points, labels in tqdm(train_loader, desc="Training", leave=False): points = points.to(args.gpu) labels = labels.to(args.gpu) optimizer.zero_grad() scores = model(points) logp = torch.distributions.utils.probs_to_logits( scores, is_binary=False) loss = criterion(logp, labels) # logpy = torch.gather(logp, 1, labels) # loss = -(logpy).mean() loss.backward() optimizer.step() losses.append(loss.cpu().item()) accuracies.append(accuracy(scores, labels)) ious.append(intersection_over_union(scores, labels)) scheduler.step() accs = np.nanmean(np.array(accuracies), axis=0) ious = np.nanmean(np.array(ious), axis=0) val_loss, val_accs, val_ious = evaluate(model, val_loader, criterion, args.gpu) loss_dict = { "Training loss": np.mean(losses), "Validation loss": val_loss } acc_dicts = [{ "Training accuracy": acc, "Validation accuracy": val_acc } for acc, val_acc in zip(accs, val_accs)] iou_dicts = [{ "Training accuracy": iou, "Validation accuracy": val_iou } for iou, val_iou in zip(ious, val_ious)] t1 = time.time() d = t1 - t0 # Display results for k, v in loss_dict.items(): print(f"{k}: {v:.7f}", end="\t") print() print( "Accuracy ", *[f"{i:>5d}" for i in range(num_classes)], " OA", sep=" | ", ) print( "Training: ", *[ f"{acc:.3f}" if not np.isnan(acc) else " nan" for acc in accs ], sep=" | ", ) print( "Validation: ", *[ f"{acc:.3f}" if not np.isnan(acc) else " nan" for acc in val_accs ], sep=" | ", ) print( "IoU ", *[f"{i:>5d}" for i in range(num_classes)], " mIoU", sep=" | ", ) print( "Training: ", *[ f"{iou:.3f}" if not np.isnan(iou) else " nan" for iou in ious ], sep=" | ", ) print( "Validation: ", *[ f"{iou:.3f}" if not np.isnan(iou) else " nan" for iou in val_ious ], sep=" | ", ) print(f"Time elapsed: {d}") # send results to tensorboard writer.add_scalars("Loss", loss_dict, epoch) for i in range(num_classes): writer.add_scalars(f"Per-class accuracy/{i+1:02d}", acc_dicts[i], epoch) writer.add_scalars(f"Per-class IoU/{i+1:02d}", iou_dicts[i], epoch) writer.add_scalars("Per-class accuracy/Overall", acc_dicts[-1], epoch) writer.add_scalars("Per-class IoU/Mean IoU", iou_dicts[-1], epoch) if epoch % args.save_freq == 0: torch.save( dict( epoch=epoch, model_state_dict=model.state_dict(), optimizer_state_dict=optimizer.state_dict(), scheduler_state_dict=scheduler.state_dict(), ), args.logs_dir / args.name / f"checkpoint_{epoch:02d}.pth", )