trf.RandomVerticalFlip(p=0.5),
                                                          trf.RandomTranspose(p=0.5),
                                                        ]))
  train_loader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True)
  validation_loader = DataLoader(validation_dataset, batch_size=cfg.batch_size, shuffle=True,)
  print('Dataset loaded!')

  # Set up model
  model = UNet().to(device)

  # Set up loss function
  loss_func = nn.L1Loss()
  # loss_func = perceptual_loss(perceptual_model='vgg16', dist_func=nn.MSELoss(), device=device) # Perceptual loss

  # Set up optimizer
  optimizer = optim.Adam(model.parameters(), lr=cfg.initial_learning_rate)


  # Experiment with 16-bit precision
  amp.initialize(model, optimizer, opt_level='O2')

  # Learning rate scheduling
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=cfg.epochs/2, gamma=0.1)

  # Set up TensorBoard writer
  writer = SummaryWriter('runs/'+cfg.run_name, flush_secs=1)

  # Load model (if applicable) - by default load the latest 
  start_epoch = 0
  if os.path.exists(cfg.checkpoint_to_load):
    checkpoint = torch.load(cfg.checkpoint_to_load)
Example #2
0
def main_worker(train_loader, val_loader, args):
    global best_loss

    # create model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f'=> device used: {device}')

    norm_kwargs = {
        'mode': args.norm_mode,
        'alpha_fwd': args.afwd,
        'alpha_bkw': args.abkw,
        'ecm': args.ecm
    }

    print("=> creating model...")
    model = UNet(args.classes, norm_layer=norm_layer,
                 norm_kwargs=norm_kwargs).to(device)
    print(model)

    print("=> creating optimizer...")
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    print("=> setting up learning rate scheduler...")
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=args.lr_milestone,
                                    gamma=args.lr_multiplier)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            scheduler.load_state_dict(checkpoint['scheduler'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = False if args.seed else True

    if args.evaluate:
        validate(val_loader, model, args.start_epoch, device, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if epoch: scheduler.step()

        # train for one epoch
        train(train_loader, model, optimizer, epoch, device, args)

        # evaluate on validation set
        eval_loss = validate(val_loader, model, epoch, device, args)

        # remember best loss and save checkpoint
        is_best = eval_loss < best_loss
        best_loss = min(eval_loss, best_loss)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_loss': best_loss,
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
            }, is_best, args)

    print('best val loss: {:4f}'.format(best_loss))

    # load best model weights
    model_best_file = os.path.join(args.model_dir, 'model_best.pth.tar')
    if os.path.isfile(model_best_file):
        print("=> loading checkpoint '{}'".format(model_best_file))
        checkpoint = torch.load(model_best_file)
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})".format(
            model_best_file, checkpoint['epoch']))

    return model
    epochs = args.epoch

    tag = 'Unet'
    if args.model == 'Unet':
        model = UNet(start_fm=args.startfm).to(device)
    else:
        tag = 'UnetRes'
        model = UNet_ResNet(dropout=args.dropout,
                            start_fm=args.startfm).to(device)

    run.tags = [tag]

    criterion = nn.SmoothL1Loss()

    # loss_func   = Weighted_Cross_Entropy_Loss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    # wandb watch
    run.watch(models=model, criterion=criterion, log='all', log_freq=10)

    # training
    best_iou = -1

    for epoch in range(epochs):
        t0 = time.time()
        train_loss, train_iou = train(model, device, trainloader, optimizer,
                                      criterion)
        t1 = time.time()
        print(
            f'Epoch: {epoch} | Train loss: {train_loss:.3f} | Train IoU: {train_iou:.3f} | Time: {(t1-t0):.1f}s'
        )