def train(model, train_set, test_set, save, valid_set, n_epochs):
    """
    Main training function
    """
    # Dataloaders
    train_loader = DataLoader(
        train_set, batch_size=cfg.batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers
    )
    test_loader = DataLoader(
        test_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers
    )
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(
            valid_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers
        )
    # Model on cuda
    model = to_device(model)
    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        if cfg.use_syncbn:
            print("Using sync-bn")
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()
    # optimizer and scheduler
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones, gamma=cfg.gamma)
    # Start logging
    logs = ["loss", "iou", "dice", "iou0", "iou1", "dice0", "dice1", "dice_global"]
    train_logs = ["train_" + log for log in logs]
    test_logs = ["test_" + log for log in logs]

    log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0)
    with open(os.path.join(save, "logs.csv"), "w") as f:
        f.write("epoch,")
        for key in log_dict.keys():
            f.write(key + ",")
        f.write("\n")
    with open(os.path.join(save, "loss_logs.csv"), "w") as f:
        f.write("iter,train_loss,\n")
    writer = SummaryWriter(log_dir=os.path.join(save, "Tensorboard_Results"))

    # train and test the model
    best_dice_global = 0
    global iteration
    iteration = 0
    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, "epoch_{}".format(epoch)))
        print("learning rate: ", scheduler.get_lr())
        # train epoch
        train_meters = train_epoch(
            model=model_wrapper, loader=train_loader, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, writer=writer
        )
        # test epoch
        test_meters = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, is_test=True, writer=writer)
        scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint
        if cfg.save_all:
            torch.save(model.state_dict(), os.path.join(save, "epoch_{}".format(epoch), "model.dat"))

        if log_dict["test_dice_global"] > best_dice_global:
            torch.save(model.state_dict(), os.path.join(save, "model.dat"))
            best_dice_global = log_dict["test_dice_global"]
            print("New best global dice: %.4f" % log_dict["test_dice_global"])
        else:
            print("Current best global dice: %.4f" % best_dice_global)
    # end
    writer.close()
    with open(os.path.join(save, "logs.csv"), "a") as f:
        f.write(",,,,best global dice,%0.5f\n" % (best_dice_global))
    print("best global dice: ", best_dice_global)
def main(save_path=cfg.save_path):
    # back up your code 
    backup_code(save_path)
    # set seed
    set_seed(cfg.seed)
    # accelaration
    torch.backends.cudnn.benchmark = True

    # Datasets
    train_set = LIDCSegDataset(crop_size=48, move=5, data_path=env.data, train=True)
    test_set = LIDCSegDataset(crop_size=48, move=5, data_path=env.data, train=False)
    train_loader = DataLoader(train_set, batch_size=cfg.batch_size, shuffle=True,
                                pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers)
    test_loader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=False,
                                pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers)

    # Define model
    model_dict = {'resnet18': FCNResNet, 'vgg16': FCNVGG, 'densenet121': FCNDenseNet}
    model = model_dict[cfg.backbone](pretrained=cfg.pretrained, num_classes=2, backbone=cfg.backbone)

    print(model)
    torch.save(model.state_dict(), os.path.join(save_path, 'model.dat'))

    # Model on cuda and then wrap model for multi-GPUs, if necessary
    model = to_device(model)
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:       
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()
    else:
        model_wrapper = model

    # optimizer and scheduler
    optimizer = getattr(torch.optim, cfg.optimizer_choice)(model_wrapper.parameters(), lr=cfg.optimizer_lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.scheduler_milestones,
                                                     gamma=cfg.scheduler_gamma)

    results_logger = ResultsLogger(save_path, train_log_items=[] , test_log_items=[])

    # train and test the model
    best_dice_global = 0
    global iteration
    iteration = 0
    for epoch in range(n_epochs):
        # os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch)))
        print('learning rate: ', scheduler.get_lr())

        train_results = train_epoch(model=model_wrapper, loader=train_loader, optimizer=optimizer,
                                    epoch=epoch, results_logger=results_logger)
        test_results = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, results_logger=results_logger)
        scheduler.step()

        results_logger.log_epoch(train_results, test_results)

        # save model checkpoint
        if cfg.save_all:
            torch.save(model.state_dict(), os.path.join(save, 'epoch_{}'.format(epoch), 'model.dat'))

        if  > best_dice_global:
            torch.save(model.state_dict(), os.path.join(save, 'best_model.dat'))
            best_dice_global = 
            print('New best global dice: %.4f' % )
        else:
            print('Current best global dice: %.4f' % best_dice_global)
            

    results_logger.close(best_result=best_dice_global)
    print('best global dice: ', best_dice_global)
    print('Done!')
예제 #3
0
def train(model, train_set, test_set, save, valid_set, n_epochs):
    '''
    Main training function
    '''
    # Dataloaders
    train_loader = DataLoader(train_set,
                              batch_size=cfg.batch_size,
                              shuffle=True,
                              pin_memory=(torch.cuda.is_available()),
                              num_workers=cfg.num_workers)
    test_loader = DataLoader(test_set,
                             batch_size=cfg.batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)  # modified
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    print('num_of_cuda:', torch.cuda.device_count())
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        print('multi-gpus')
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()

    # optimizer and scheduler
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones,
    #                                                  gamma=cfg.gamma)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=cfg.max_lr,
        epochs=n_epochs,
        steps_per_epoch=len(train_loader))
    # Start logging
    logs = ['loss', 'acc', 'acc0', 'acc1']
    train_logs = ['train_' + log for log in logs] + [
        'train_auc',
    ]
    valid_logs = ['valid_' + log
                  for log in logs] + ['valid_auc', 'valid_auc_pat']
    test_logs = ['test_' + log for log in logs] + ['test_auc', 'test_auc_pat']

    log_dict = OrderedDict.fromkeys(train_logs + valid_logs + test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key + ',')
        f.write('\n')
    with open(os.path.join(save, 'loss_logs.csv'), 'w') as f:
        f.write('iter,train_loss,\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # train and test the model
    best_auc = 0
    global iteration
    iteration = 0
    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch)))
        print('learning rate: ', scheduler.get_lr())
        # train epoch
        train_meters = train_epoch(model=model_wrapper,
                                   loader=train_loader,
                                   optimizer=optimizer,
                                   scheduler=scheduler,
                                   epoch=epoch,
                                   n_epochs=n_epochs,
                                   writer=writer)
        # valid epoch
        valid_meters = test_epoch(model=model_wrapper,
                                  loader=valid_loader,
                                  epoch=epoch,
                                  is_test=False,
                                  writer=writer)
        # test epoch
        test_meters = test_epoch(model=model_wrapper,
                                 loader=test_loader,
                                 epoch=epoch,
                                 is_test=True,
                                 writer=writer)
        # scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(valid_logs):
            log_dict[key] = valid_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint
        if cfg.save_all:
            torch.save(
                model.state_dict(),
                os.path.join(save, 'epoch_{}'.format(epoch), 'model.dat'))

        if log_dict['valid_auc'] > best_auc:
            torch.save(model.state_dict(), os.path.join(save, 'model.dat'))
            best_auc = log_dict['valid_auc']
            print('New best auc: %.4f' % log_dict['valid_auc'])
        else:
            print('Current best auc: %.4f' % best_auc)
    # end
    writer.close()
    with open(os.path.join(save, 'logs.csv'), 'a') as f:
        f.write(',,,,best auc,%0.5f\n' % (best_auc))
    print('best auc: ', best_auc)
예제 #4
0
def train(model,  test_set, save, n_epochs):
    '''
    Main training function
    '''
    # Dataloaders
    test_loader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=False,
                                pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    print('num_of_cuda:',torch.cuda.device_count())
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:     
        print('multi-gpus')  
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()

    # optimizer and scheduler
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones,gamma=cfg.gamma) 
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=cfg.factor, patience=cfg.patience, min_lr=cfg.min_lr, eps=cfg.eps)
    # scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=cfg.max_lr, epochs=n_epochs, steps_per_epoch=len(train_loader), 
    #                                                div_factor=cfg.div_factor, final_div_factor=cfg.final_div_factor)
    # Start logging
    logs = ['loss', 'acc', 'acc0', 'acc1']
    test_logs = ['test_'+log for log in logs]+['test_auc','test_auc_pat']

    log_dict = OrderedDict.fromkeys(test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key+',')
        f.write('\n')
    with open(os.path.join(save, 'loss_logs.csv'), 'w') as f:
        f.write('iter,train_loss,\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # train and test the model
    best_auc = 0
    global iteration
    iteration = 0
    for epoch in range(1):
        
        print('learning rate: ', optimizer.state_dict()['param_groups'][0]['lr'])
    
        # test epoch
        test_meters = test_epoch(
            model=model_wrapper,
            loader=test_loader,
            epoch=epoch,
            is_test=True,
            writer = writer
        )

        # Log results
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint

        
        # if cfg.save_all:

    # end 
    writer.close()