def train(args): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Generate the train and validation sets for the model: split_train_val(args, per_val=args.per_val) current_time ='%b%d_%H%M%S') log_dir = os.path.join('runs', current_time + "_{}".format(args.arch)) writer = SummaryWriter(log_dir=log_dir) # Setup Augmentations if args.aug: data_aug = Compose( [RandomRotate(10), RandomHorizontallyFlip(), AddNoise()]) else: data_aug = None train_set = PatchLoader(is_transform=True, split='train', stride=args.stride, patch_size=args.patch_size, augmentations=data_aug) # Without Augmentation: val_set = PatchLoader(is_transform=True, split='val', stride=args.stride, patch_size=args.patch_size) n_classes = train_set.n_classes trainloader = data.DataLoader(train_set, batch_size=args.batch_size, num_workers=1, shuffle=True) valloader = data.DataLoader(val_set, batch_size=args.batch_size, num_workers=1) # Setup Metrics running_metrics = runningScore(n_classes) running_metrics_val = runningScore(n_classes) # Setup Model edited by Tannistha if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) model = torch.load(args.resume) else: print("No checkpoint found at '{}'".format(args.resume)) else: #model = getattr(deeplab, 'resnet101')( #pretrained=(not args.scratch), #num_classes=n_classes, #num_groups=args.groups, #weight_std=args.weight_std, #beta=args.beta) # edited by Tannistha model = getattr(ResNet9, 'resnet9')( pretrained=(args.scratch), num_classes=n_classes, num_groups=args.groups, weight_std=args.weight_std, beta=args.beta) # Use as many GPUs as we can model = torch.nn.DataParallel( model, device_ids=range(torch.cuda.device_count())) model = # Send to GPU # PYTROCH NOTE: ALWAYS CONSTRUCT OPTIMIZERS AFTER MODEL IS PUSHED TO GPU/CPU, # Check if model has custom optimizer / loss if hasattr(model.module, 'optimizer'): print('Using custom optimizer') optimizer = model.module.optimizer else: # optimizer = torch.optim.Adadelta(model.parameters()) optimizer = torch.optim.SGD(model.parameters(),lr=args.base_lr, weight_decay=0.0001, momentum=0.9) #optimizer = torch.optim.Adam(model.parameters(),lr=args.base_lr, weight_decay=0.0001, amsgrad=True) ### edited by Tannistha to work with new optimizer if args.train: criterion = nn.CrossEntropyLoss(ignore_index=255) model.train() if args.freeze_bn: for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() m.weight.requires_grad = False m.bias.requires_grad = False #optimizer = torch.optim.SGD(model.parameters(),lr=args.base_lr, weight_decay=0.0001, momentum=0.9) optimizer = torch.optim.Adam(model.parameters(),lr=args.base_lr, weight_decay=0.0001, amsgrad=True) start_epoch = 0 loss_fn = core.loss.cross_entropy if args.class_weights: # weights are inversely proportional to the frequency of the classes in the training set class_weights = torch.tensor( [0.7151, 0.8811, 0.5156, 0.9346, 0.9683, 0.9852], device=device, requires_grad=False) else: class_weights = None best_iou = -100.0 class_names = ['upper_ns', 'middle_ns', 'lower_ns', 'rijnland_chalk', 'scruff', 'zechstein'] for arg in vars(args): text = arg + ': ' + str(getattr(args, arg)) writer.add_text('Parameters/', text) model_fname = 'data/deeplab_' + str(args.base_lr) + '_batch_size_' + str(args.batch_size) + '_' + args.exp + '_epoch_%d.pth' val_fname = 'val_lr_' + str(args.base_lr) + '_batch_size_' + str(args.batch_size) + '_' + args.exp for epoch in range(args.n_epoch): # Training Mode: model.train() loss_train, total_iteration = 0, 0 for i, (images, labels) in enumerate(trainloader): image_original, labels_original = images, labels images, labels =, outputs = model(images) pred = outputs.detach().max(1)[1].cpu().numpy() gt = labels.detach().cpu().numpy() running_metrics.update(gt, pred) loss = loss_fn(input=outputs, target=labels, weight=class_weights) loss_train += loss.item() optimizer.zero_grad() loss.backward() if args.clip != 0: torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_iteration = total_iteration + 1 if (i) % 20 == 0: print('epoch: {0}/{1}\t\t' 'iter: {2}/{3}\t\t' 'training Loss:{4:.4f}'.format(epoch + 1, args.n_epoch, i + 1, len(trainloader), loss.item())) numbers = [0] if i in numbers: # number 0 image in the batch tb_original_image = vutils.make_grid( image_original[0][0], normalize=True, scale_each=True) writer.add_image('train/original_image', tb_original_image, epoch + 1) labels_original = labels_original.numpy()[0] correct_label_decoded = train_set.decode_segmap(np.squeeze(labels_original)) writer.add_image('train/original_label',np_to_tb(correct_label_decoded), epoch + 1) out = F.softmax(outputs, dim=1) # this returns the max. channel number: prediction = out.max(1)[1].cpu().numpy()[0] # this returns the confidence: confidence = out.max(1)[0].cpu().detach()[0] tb_confidence = vutils.make_grid( confidence, normalize=True, scale_each=True) decoded = train_set.decode_segmap(np.squeeze(prediction)) writer.add_image('train/predicted', np_to_tb(decoded), epoch + 1) writer.add_image('train/confidence', tb_confidence, epoch + 1) unary = outputs.cpu().detach() unary_max = torch.max(unary) unary_min = torch.min(unary) unary = unary.add((-1*unary_min)) unary = unary/(unary_max - unary_min) for channel in range(0, len(class_names)): decoded_channel = unary[0][channel] tb_channel = vutils.make_grid( decoded_channel, normalize=True, scale_each=True) writer.add_image(f'train_classes/_{class_names[channel]}', tb_channel, epoch + 1) # Average metrics, and save in writer() loss_train /= total_iteration score, class_iou = running_metrics.get_scores() writer.add_scalar('train/Pixel Acc', score['Pixel Acc: '], epoch+1) writer.add_scalar('train/Mean Class Acc', score['Mean Class Acc: '], epoch+1) writer.add_scalar('train/Freq Weighted IoU', score['Freq Weighted IoU: '], epoch+1) writer.add_scalar('train/Mean_IoU', score['Mean IoU: '], epoch+1) running_metrics.reset() writer.add_scalar('train/loss', loss_train, epoch+1) if args.per_val != 0: with torch.no_grad(): # operations inside don't track history # Validation Mode: model.eval() loss_val, total_iteration_val = 0, 0 for i_val, (images_val, labels_val) in enumerate(valloader): image_original, labels_original = images_val, labels_val images_val, labels_val = device), #image_val = to_3_channels(images_val) outputs_val = model(images_val) #outputs_val = model(image_val) pred = outputs_val.detach().max(1)[1].cpu().numpy() gt = labels_val.detach().cpu().numpy() running_metrics_val.update(gt, pred) loss = loss_fn(input=outputs_val, target=labels_val) loss_val += loss.item() total_iteration_val = total_iteration_val + 1 if (i_val) % 20 == 0: print("Epoch [%d/%d] validation Loss: %.4f" % (epoch+1, args.n_epoch, loss.item())) numbers = [0] if i_val in numbers: # number 0 image in the batch tb_original_image = vutils.make_grid( image_original[0][0], normalize=True, scale_each=True) writer.add_image('val/original_image', tb_original_image, epoch) labels_original = labels_original.numpy()[0] correct_label_decoded = train_set.decode_segmap( np.squeeze(labels_original)) writer.add_image('val/original_label', np_to_tb(correct_label_decoded), epoch + 1) out = F.softmax(outputs_val, dim=1) # this returns the max. channel number: prediction = out.max(1)[1].cpu().detach().numpy()[0] # this returns the confidence: confidence = out.max(1)[0].cpu().detach()[0] tb_confidence = vutils.make_grid( confidence, normalize=True, scale_each=True) decoded = train_set.decode_segmap( np.squeeze(prediction)) writer.add_image('val/predicted', np_to_tb(decoded), epoch + 1) writer.add_image('val/confidence', tb_confidence, epoch + 1) unary = outputs.cpu().detach() unary_max, unary_min = torch.max( unary), torch.min(unary) unary = unary.add((-1*unary_min)) unary = unary/(unary_max - unary_min) for channel in range(0, len(class_names)): tb_channel = vutils.make_grid( unary[0][channel], normalize=True, scale_each=True) writer.add_image( f'val_classes/_{class_names[channel]}', tb_channel, epoch + 1) loss_val /= total_iteration_val score, class_iou = running_metrics_val.get_scores() pd.DataFrame([running_metrics_val.get_scores()[0]["Pixel Acc: "]]).to_csv(os.path.join(val_fname, "metrics", "pixel_acc.csv"), index=False, mode='a', header=(i==0)) pd.DataFrame([running_metrics_val.get_scores()[0]["Mean Class Acc: "]]).to_csv(os.path.join(val_fname, "metrics", "mean_class_acc.csv"),index=False, mode='a', header=(i==0)) pd.DataFrame([running_metrics_val.get_scores()[0]["Freq Weighted IoU: "]]).to_csv(os.path.join(val_fname, "metrics", "freq_weighted_iou.csv"),index=False, mode='a', header=(i==0)) pd.DataFrame([running_metrics_val.get_scores()[0]["Mean IoU: "]]).to_csv(os.path.join(val_fname, "metrics", "mean_iou.csv"), index=False, mode='a', header=(i==0)) cname = os.path.join(val_fname, "metrics", "confusion_matrix", "confusion_matrix_" + str(epoch + 1) + ".csv") pd.DataFrame(running_metrics_val.get_scores()[0]["confusion_matrix"]).to_csv(cname, index=False) pd.DataFrame(running_metrics_val.get_scores()[0]["Class Accuracy: "].reshape((1, 6)), columns=[0, 1, 2, 3, 4, 5]).to_csv(os.path.join(val_fname, "metrics", "class_acc.csv"), index=False, mode = "a", header = (i == 0)) pd.DataFrame(running_metrics_val.get_scores()[1], columns=[0, 1, 2, 3, 4, 5], index=[0]).to_csv(os.path.join(val_fname, "metrics", "cls_iu.csv"), mode = "a", header = (i == 0)) writer.add_scalar( 'val/Pixel Acc', score['Pixel Acc: '], epoch+1) writer.add_scalar('val/Mean IoU', score['Mean IoU: '], epoch+1) writer.add_scalar('val/Mean Class Acc', score['Mean Class Acc: '], epoch+1) writer.add_scalar('val/Freq Weighted IoU', score['Freq Weighted IoU: '], epoch+1) writer.add_scalar('val/loss', loss_val, epoch+1) running_metrics_val.reset() if score['Mean IoU: '] >= best_iou: best_iou = score['Mean IoU: '] model_dir = os.path.join( log_dir, f"{args.arch}_model.pkl"), model_dir){'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(),}, model_fname % (epoch + 1)) else: # validation is turned off: # just save the latest model: if (epoch+1) % 5 == 0: model_dir = os.path.join( log_dir, f"{args.arch}_ep{epoch+1}_model.pkl"), model_dir){'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(),}, model_fname % (epoch + 1)) writer.add_scalar('train/epoch_lr', optimizer.param_groups[0]["lr"], epoch+1) writer.close()
