fintune_transform = alb.Compose([ alb.Resize(512,512), ToTensorV2(), ]) finetune_dataset = AerialDataset("train", "gen", finetune_imgs, finetune_masks, transform=fintune_transform) finetune_loader = DataLoader(finetune_dataset, batch_size=16, pin_memory=True, drop_last=True) eval_imgs = f"/home/admin/segmentation/task2/data/vaihingen/train/cropped/images/val" eval_masks = f"/home/admin/segmentation/task2/data/vaihingen/train/cropped/masks/val" eval_dataset = AerialDataset("val", "vaihingen", eval_imgs, eval_masks) eval_loader = DataLoader(eval_dataset, batch_size=16, pin_memory=True, drop_last=True) _loss_fn = init_loss(config["loss_fn"]) loss_fn = LossWithAux(_loss_fn) _optimizer = init_optimizer(config) optimizer = _optimizer(model.parameters(), lr = config["learning_rate"]) trainer = engine.create_supervised_trainer( model = model, optimizer = optimizer, loss_fn = loss_fn, device = device, non_blocking = True, ) evaluator = engine.create_supervised_evaluator( model = model, metrics={ "Loss": metrics.Loss(nn.CrossEntropyLoss()), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)),
args, unknown = parse_arguments(sys.argv) gpus = [int(id) for id in args.gpu.split(',') if int(id) >= 0] # device & visualizers device, visualizers, model_params = utils.initialize(args) plot_viz = visualizers[0] img_viz = visualizers[1] # model model = models.get_model(args.model, model_params) utils.init.initialize_weights(model, args.weight_init, pred_bias=args.pred_bias) if (len(gpus) > 1): model = torch.nn.parallel.DataParallel(model, gpus) model = model.to(device) # optimizer optimizer = utils.init_optimizer(model, args) # train data train_data = dataset.dataset_360D.Dataset360D(args.train_path, " ", args.configuration, [256, 512]) train_data_iterator = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,\ num_workers=args.batch_size // len(gpus) // len(gpus), pin_memory=False, shuffle=True) # test data test_data = dataset.dataset_360D.Dataset360D(args.test_path, " ", args.configuration, [256, 512]) test_data_iterator = torch.utils.data.DataLoader(test_data, batch_size=args.test_batch_size,\ num_workers=args.batch_size // len(gpus) // len(gpus), pin_memory=False, shuffle=True) print("Data size : {0} | Test size : {1}".format(\ args.batch_size * train_data_iterator.__len__(), \ args.test_batch_size * test_data_iterator.__len__()))
def train(): # initiate command line arguments, configuration file and logging block args = parse_args() config = read_config() try: if args.overwrite: shutil.rmtree(f"./logs/{args.name}", ignore_errors=True) os.mkdir(f"./logs/{args.name}") except: print(f"log folder {args.name} already exits.") init_logging(log_path=f"./logs/{args.name}") # determine train model on which device, cuda or cpu device = 'cuda' if torch.cuda.is_available() else 'cpu' logger.info(f"running training on {device}") device += f':{args.main_cuda}' # prepare training and validation datasets logger.info('creating dataset and data loaders') dataset = args.dataset train_dataset = AerialDataset("train", dataset, config[dataset]["train"]["image_path"], config[dataset]["train"]["mask_path"]) val_dataset = AerialDataset("val", dataset, config[dataset]["val"]["image_path"], config[dataset]["val"]["mask_path"]) train_loader, train_metrics_loader, val_metrics_loader = create_data_loaders( train_dataset=train_dataset, val_dataset=val_dataset, num_workers=config["num_workers"], batch_size=config["batchsize"], ) # create model logger.info( f'creating BiseNetv2 and optimizer with initial lr of {config["learning_rate"]}' ) model = BiSeNetV2(config["n_classes"]) model = nn.DataParallel(model, device_ids=[x for x in range(args.main_cuda, 4) ]).to(device) # initiate loss function and optimizer optimizer_fn = init_optimizer(config) optimizer = optimizer_fn(model.parameters(), lr=config["learning_rate"]) logger.info('creating trainer and evaluator engines') _loss_fn = init_loss(config["loss_fn"]) loss_fn = LossWithAux(_loss_fn) # create trainer and evaluator wiht ignite.engine trainer = engine.create_supervised_trainer( model=model, optimizer=optimizer, loss_fn=loss_fn, device=device, non_blocking=True, ) evaluator = engine.create_supervised_evaluator( model=model, metrics={ 'loss': metrics.Loss(nn.CrossEntropyLoss()), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "IOU": metrics.IoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), "mIOU": metrics.mIoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), }, device=device, non_blocking=True, output_transform=lambda x, y, y_pred: (torch.sigmoid(y_pred["out"]), y), ) # attach event listener to do post process after each iteration and epoch logger.info(f'creating summary writer with tag {config["model_tag"]}') writer = tensorboard.SummaryWriter(log_dir=f'logs/{config["model_tag"]}') # logger.info('attaching lr scheduler') # lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) # attach_lr_scheduler(trainer, lr_scheduler, writer) logger.info('attaching event driven calls') attach_model_checkpoint(trainer, {config["model_tag"]: model.module}, args.name) attach_training_logger(trainer, writer=writer) attach_metric_logger(trainer, evaluator, 'train', train_metrics_loader, writer) attach_metric_logger(trainer, evaluator, 'val', val_metrics_loader, writer) # start training (evaluation is included too) logger.info('training...') trainer.run(train_loader, max_epochs=config["epochs"])