Exemple #1
0
    fintune_transform = alb.Compose([
        alb.Resize(512,512),
        ToTensorV2(),
    ])
    finetune_dataset = AerialDataset("train", "gen", finetune_imgs, finetune_masks, transform=fintune_transform)
    finetune_loader = DataLoader(finetune_dataset, batch_size=16, pin_memory=True, drop_last=True)

    eval_imgs = f"/home/admin/segmentation/task2/data/vaihingen/train/cropped/images/val"
    eval_masks = f"/home/admin/segmentation/task2/data/vaihingen/train/cropped/masks/val"
    eval_dataset = AerialDataset("val", "vaihingen", eval_imgs, eval_masks)
    eval_loader = DataLoader(eval_dataset, batch_size=16, pin_memory=True, drop_last=True)


    _loss_fn = init_loss(config["loss_fn"])
    loss_fn = LossWithAux(_loss_fn)
    _optimizer = init_optimizer(config)
    optimizer = _optimizer(model.parameters(), lr = config["learning_rate"])

    trainer = engine.create_supervised_trainer(
        model = model,
        optimizer = optimizer,
        loss_fn = loss_fn,
        device = device,
        non_blocking = True,
    )

    evaluator = engine.create_supervised_evaluator(
        model = model,
        metrics={
            "Loss": metrics.Loss(nn.CrossEntropyLoss()),
            "[email protected]": metrics.Accuracy(thresholded_transform(0.3)),
 args, unknown = parse_arguments(sys.argv)
 gpus = [int(id) for id in args.gpu.split(',') if int(id) >= 0]
 # device & visualizers
 device, visualizers, model_params = utils.initialize(args)
 plot_viz = visualizers[0]
 img_viz = visualizers[1]
 # model
 model = models.get_model(args.model, model_params)
 utils.init.initialize_weights(model,
                               args.weight_init,
                               pred_bias=args.pred_bias)
 if (len(gpus) > 1):
     model = torch.nn.parallel.DataParallel(model, gpus)
 model = model.to(device)
 # optimizer
 optimizer = utils.init_optimizer(model, args)
 # train data
 train_data = dataset.dataset_360D.Dataset360D(args.train_path, " ",
                                               args.configuration,
                                               [256, 512])
 train_data_iterator = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,\
     num_workers=args.batch_size // len(gpus) // len(gpus), pin_memory=False, shuffle=True)
 # test data
 test_data = dataset.dataset_360D.Dataset360D(args.test_path, " ",
                                              args.configuration,
                                              [256, 512])
 test_data_iterator = torch.utils.data.DataLoader(test_data, batch_size=args.test_batch_size,\
     num_workers=args.batch_size // len(gpus) // len(gpus), pin_memory=False, shuffle=True)
 print("Data size : {0} | Test size : {1}".format(\
     args.batch_size * train_data_iterator.__len__(), \
     args.test_batch_size * test_data_iterator.__len__()))
def train():
    # initiate command line arguments, configuration file and logging block
    args = parse_args()
    config = read_config()
    try:
        if args.overwrite:
            shutil.rmtree(f"./logs/{args.name}", ignore_errors=True)
        os.mkdir(f"./logs/{args.name}")
    except:
        print(f"log folder {args.name} already exits.")

    init_logging(log_path=f"./logs/{args.name}")

    # determine train model on which device, cuda or cpu
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    logger.info(f"running training on {device}")
    device += f':{args.main_cuda}'

    # prepare training and validation datasets
    logger.info('creating dataset and data loaders')
    dataset = args.dataset

    train_dataset = AerialDataset("train", dataset,
                                  config[dataset]["train"]["image_path"],
                                  config[dataset]["train"]["mask_path"])
    val_dataset = AerialDataset("val", dataset,
                                config[dataset]["val"]["image_path"],
                                config[dataset]["val"]["mask_path"])
    train_loader, train_metrics_loader, val_metrics_loader = create_data_loaders(
        train_dataset=train_dataset,
        val_dataset=val_dataset,
        num_workers=config["num_workers"],
        batch_size=config["batchsize"],
    )

    # create model
    logger.info(
        f'creating BiseNetv2 and optimizer with initial lr of {config["learning_rate"]}'
    )

    model = BiSeNetV2(config["n_classes"])
    model = nn.DataParallel(model,
                            device_ids=[x for x in range(args.main_cuda, 4)
                                        ]).to(device)

    # initiate loss function and optimizer
    optimizer_fn = init_optimizer(config)
    optimizer = optimizer_fn(model.parameters(), lr=config["learning_rate"])

    logger.info('creating trainer and evaluator engines')

    _loss_fn = init_loss(config["loss_fn"])
    loss_fn = LossWithAux(_loss_fn)

    # create trainer and evaluator wiht ignite.engine
    trainer = engine.create_supervised_trainer(
        model=model,
        optimizer=optimizer,
        loss_fn=loss_fn,
        device=device,
        non_blocking=True,
    )

    evaluator = engine.create_supervised_evaluator(
        model=model,
        metrics={
            'loss':
            metrics.Loss(nn.CrossEntropyLoss()),
            "[email protected]":
            metrics.Accuracy(thresholded_transform(0.3)),
            "[email protected]":
            metrics.Accuracy(thresholded_transform(0.3)),
            "IOU":
            metrics.IoU(
                metrics.ConfusionMatrix(num_classes=config["n_classes"])),
            "mIOU":
            metrics.mIoU(
                metrics.ConfusionMatrix(num_classes=config["n_classes"])),
        },
        device=device,
        non_blocking=True,
        output_transform=lambda x, y, y_pred:
        (torch.sigmoid(y_pred["out"]), y),
    )

    # attach event listener to do post process after each iteration and epoch

    logger.info(f'creating summary writer with tag {config["model_tag"]}')
    writer = tensorboard.SummaryWriter(log_dir=f'logs/{config["model_tag"]}')

    # logger.info('attaching lr scheduler')
    # lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    # attach_lr_scheduler(trainer, lr_scheduler, writer)

    logger.info('attaching event driven calls')
    attach_model_checkpoint(trainer, {config["model_tag"]: model.module},
                            args.name)
    attach_training_logger(trainer, writer=writer)

    attach_metric_logger(trainer, evaluator, 'train', train_metrics_loader,
                         writer)
    attach_metric_logger(trainer, evaluator, 'val', val_metrics_loader, writer)

    # start training (evaluation is included too)
    logger.info('training...')
    trainer.run(train_loader, max_epochs=config["epochs"])