def train(model, train_set, test_set, save, valid_set, n_epochs, canal):

    # Data loaders
    train_loader = DataLoader(train_set,
                              batch_size=cfg.train_batch_size,
                              shuffle=True,
                              pin_memory=(torch.cuda.is_available()),
                              num_workers=cfg.num_workers)
    test_loader = DataLoader(test_set,
                             batch_size=cfg.test_batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        model_wrapper = torch.nn.DataParallel(model).cuda()

    # Optimizer
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    #optimizer= torch.optim.SGD(model_wrapper.parameters(), lr=cfg.lr, momentum=0.9)
    #optimizer = adabound.AdaBound(model_wrapper.parameters(), lr=1e-3, final_lr=0.1)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=cfg.milestones,
                                                     gamma=cfg.gamma)

    # Start log
    logs = ['loss', 'iou', 'dice'
            ] + ['iou{}'.format(i)
                 for i in range(6)] + ['dice{}'.format(i) for i in range(6)]
    train_logs = ['train_' + log for log in logs]
    test_logs = ['test_' + log for log in logs]
    log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key + ',')
        f.write('\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'canal_{}'.format(canal),
                                                'Tensorboard_Results'))

    # Train model
    best_dice = 0

    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, 'canal_{}'.format(canal)),
                    exist_ok=True)
        train_meters = train_epoch(model=model_wrapper,
                                   loader=train_loader,
                                   optimizer=optimizer,
                                   epoch=epoch,
                                   n_epochs=n_epochs,
                                   writer=writer)
        # if (epoch+1)%5==0:
        test_meters = test_epoch(model=model_wrapper,
                                 loader=test_loader,
                                 epoch=epoch,
                                 is_test=True,
                                 writer=writer)
        scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]

        log_results(save, epoch, log_dict, writer=writer)

        if cfg.save_all:
            torch.save(
                model.state_dict(),
                os.path.join(save, 'canal_{}'.format(canal), 'model.dat'))

        if log_dict['test_dice'] > best_dice:
            torch.save(
                model.state_dict(),
                os.path.join(save, 'canal_{}'.format(canal), 'model.dat'))
            best_dice = log_dict['test_dice']
            print('New best dice: %.4f' % log_dict['test_dice'])
            #print(2.*intersection/union)
        else:
            print('Current best dice: %.4f' % best_dice)
            #print(2.*intersection/union)
    writer.close()

    with open(os.path.join(save, 'canal_{}'.format(canal), 'logs.csv'),
              'a') as f:
        f.write(',,,,best dice,%0.5f\n' % (best_dice))
    # Final test of the best model on test set
    print('best dice: ', best_dice)
def train(model, train_set, test_set, save, valid_set, n_epochs):
    """
    Main training function
    """
    # Dataloaders
    train_loader = DataLoader(
        train_set, batch_size=cfg.batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers
    )
    test_loader = DataLoader(
        test_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers
    )
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(
            valid_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers
        )
    # Model on cuda
    model = to_device(model)
    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        if cfg.use_syncbn:
            print("Using sync-bn")
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()
    # optimizer and scheduler
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones, gamma=cfg.gamma)
    # Start logging
    logs = ["loss", "iou", "dice", "iou0", "iou1", "dice0", "dice1", "dice_global"]
    train_logs = ["train_" + log for log in logs]
    test_logs = ["test_" + log for log in logs]

    log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0)
    with open(os.path.join(save, "logs.csv"), "w") as f:
        f.write("epoch,")
        for key in log_dict.keys():
            f.write(key + ",")
        f.write("\n")
    with open(os.path.join(save, "loss_logs.csv"), "w") as f:
        f.write("iter,train_loss,\n")
    writer = SummaryWriter(log_dir=os.path.join(save, "Tensorboard_Results"))

    # train and test the model
    best_dice_global = 0
    global iteration
    iteration = 0
    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, "epoch_{}".format(epoch)))
        print("learning rate: ", scheduler.get_lr())
        # train epoch
        train_meters = train_epoch(
            model=model_wrapper, loader=train_loader, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, writer=writer
        )
        # test epoch
        test_meters = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, is_test=True, writer=writer)
        scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint
        if cfg.save_all:
            torch.save(model.state_dict(), os.path.join(save, "epoch_{}".format(epoch), "model.dat"))

        if log_dict["test_dice_global"] > best_dice_global:
            torch.save(model.state_dict(), os.path.join(save, "model.dat"))
            best_dice_global = log_dict["test_dice_global"]
            print("New best global dice: %.4f" % log_dict["test_dice_global"])
        else:
            print("Current best global dice: %.4f" % best_dice_global)
    # end
    writer.close()
    with open(os.path.join(save, "logs.csv"), "a") as f:
        f.write(",,,,best global dice,%0.5f\n" % (best_dice_global))
    print("best global dice: ", best_dice_global)
Exemple #3
0
def train(model, train_set, test_set, save, valid_set, n_epochs):

    # Data loaders
    train_loader = DataLoader(train_set,
                              batch_size=cfg.batch_size,
                              shuffle=True,
                              pin_memory=(torch.cuda.is_available()),
                              num_workers=cfg.num_workers)
    test_loader = DataLoader(test_set,
                             batch_size=cfg.batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        model_wrapper = torch.nn.DataParallel(model).cuda()

    # Optimizer
    # optimizer = torch.optim.SGD(model_wrapper.parameters(), lr=cfg.lr, weight_decay=cfg.wd, momentum=cfg.momentum)
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=cfg.milestones,
                                                     gamma=cfg.gamma)

    # Start log
    logs = ['loss', 'acc']
    train_logs = ['train_' + log for log in logs]
    test_logs = ['test_' + log for log in logs]

    log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key + ',')
        f.write('\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # Train model
    best_iou = 0

    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch)))
        train_meters = train_epoch(model=model_wrapper,
                                   loader=train_loader,
                                   optimizer=optimizer,
                                   epoch=epoch,
                                   n_epochs=n_epochs,
                                   writer=writer)
        test_meters = test_epoch(model=model_wrapper,
                                 loader=test_loader,
                                 epoch=epoch,
                                 is_test=True,
                                 writer=writer)
        scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]

        log_results(save, epoch, log_dict, writer=writer)

        if cfg.save_all:
            torch.save(
                model.state_dict(),
                os.path.join(save, 'epoch_{}'.format(epoch), 'model.dat'))

        if log_dict['test_iou'] > best_iou:
            torch.save(model.state_dict(), os.path.join(save, 'model.dat'))
            best_iou = log_dict['test_iou']
            print('New best iou: %.4f' % log_dict['test_iou'])
    writer.close()

    with open(os.path.join(save, 'logs.csv'), 'a') as f:
        f.write(',,,,best iou,%0.5f\n' % (best_iou))
    # Final test of the best model on test set
    print('best iou: ', best_iou)
def train(model, train_set, test_set, save, valid_set, n_epochs):
    '''
    Main training function
    '''
    # Dataloaders
    train_loader = DataLoader(train_set,
                              batch_size=cfg.batch_size,
                              shuffle=True,
                              pin_memory=(torch.cuda.is_available()),
                              num_workers=cfg.num_workers)
    test_loader = DataLoader(test_set,
                             batch_size=cfg.batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)  # modified
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    print('num_of_cuda:', torch.cuda.device_count())
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        print('multi-gpus')
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()

    # optimizer and scheduler
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones,
    #                                                  gamma=cfg.gamma)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=cfg.max_lr,
        epochs=n_epochs,
        steps_per_epoch=len(train_loader))
    # Start logging
    logs = ['loss', 'acc', 'acc0', 'acc1']
    train_logs = ['train_' + log for log in logs] + [
        'train_auc',
    ]
    valid_logs = ['valid_' + log
                  for log in logs] + ['valid_auc', 'valid_auc_pat']
    test_logs = ['test_' + log for log in logs] + ['test_auc', 'test_auc_pat']

    log_dict = OrderedDict.fromkeys(train_logs + valid_logs + test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key + ',')
        f.write('\n')
    with open(os.path.join(save, 'loss_logs.csv'), 'w') as f:
        f.write('iter,train_loss,\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # train and test the model
    best_auc = 0
    global iteration
    iteration = 0
    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch)))
        print('learning rate: ', scheduler.get_lr())
        # train epoch
        train_meters = train_epoch(model=model_wrapper,
                                   loader=train_loader,
                                   optimizer=optimizer,
                                   scheduler=scheduler,
                                   epoch=epoch,
                                   n_epochs=n_epochs,
                                   writer=writer)
        # valid epoch
        valid_meters = test_epoch(model=model_wrapper,
                                  loader=valid_loader,
                                  epoch=epoch,
                                  is_test=False,
                                  writer=writer)
        # test epoch
        test_meters = test_epoch(model=model_wrapper,
                                 loader=test_loader,
                                 epoch=epoch,
                                 is_test=True,
                                 writer=writer)
        # scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(valid_logs):
            log_dict[key] = valid_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint
        if cfg.save_all:
            torch.save(
                model.state_dict(),
                os.path.join(save, 'epoch_{}'.format(epoch), 'model.dat'))

        if log_dict['valid_auc'] > best_auc:
            torch.save(model.state_dict(), os.path.join(save, 'model.dat'))
            best_auc = log_dict['valid_auc']
            print('New best auc: %.4f' % log_dict['valid_auc'])
        else:
            print('Current best auc: %.4f' % best_auc)
    # end
    writer.close()
    with open(os.path.join(save, 'logs.csv'), 'a') as f:
        f.write(',,,,best auc,%0.5f\n' % (best_auc))
    print('best auc: ', best_auc)
def train(model, test_set, save, valid_set, n_epochs):
    '''
    Main training function
    '''
    # Dataloaders

    test_loader = DataLoader(test_set,
                             batch_size=cfg.batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        print('multi-gpus')
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()

    # Start logging
    logs = ['loss', 'acc', 'acc0', 'acc1']

    test_logs = ['test_' + log for log in logs] + ['test_auc', 'test_auc_pat']

    log_dict = OrderedDict.fromkeys(test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key + ',')
        f.write('\n')
    with open(os.path.join(save, 'loss_logs.csv'), 'w') as f:
        f.write('iter,train_loss,\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # train and test the model
    best_auc = 0
    global iteration
    iteration = 0
    for epoch in range(1):
        os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch)))
        # test epoch
        test_meters = test_epoch(model=model_wrapper,
                                 loader=test_loader,
                                 epoch=epoch,
                                 is_test=True,
                                 writer=writer)

        # Log results
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint
        if log_dict['test_auc'] > best_auc:
            torch.save(model.state_dict(), os.path.join(save, 'model.dat'))
            best_auc = log_dict['test_auc']
            print('New best auc: %.4f' % log_dict['test_auc'])
        else:
            print('Current best auc: %.4f' % best_auc)
    # end
    writer.close()
    with open(os.path.join(save, 'logs.csv'), 'a') as f:
        f.write(',,,,best auc,%0.5f\n' % (best_auc))
    print('best auc: ', best_auc)
Exemple #6
0
def train(model,  test_set, save, n_epochs):
    '''
    Main training function
    '''
    # Dataloaders
    test_loader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=False,
                                pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    print('num_of_cuda:',torch.cuda.device_count())
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:     
        print('multi-gpus')  
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()

    # optimizer and scheduler
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones,gamma=cfg.gamma) 
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=cfg.factor, patience=cfg.patience, min_lr=cfg.min_lr, eps=cfg.eps)
    # scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=cfg.max_lr, epochs=n_epochs, steps_per_epoch=len(train_loader), 
    #                                                div_factor=cfg.div_factor, final_div_factor=cfg.final_div_factor)
    # Start logging
    logs = ['loss', 'acc', 'acc0', 'acc1']
    test_logs = ['test_'+log for log in logs]+['test_auc','test_auc_pat']

    log_dict = OrderedDict.fromkeys(test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key+',')
        f.write('\n')
    with open(os.path.join(save, 'loss_logs.csv'), 'w') as f:
        f.write('iter,train_loss,\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # train and test the model
    best_auc = 0
    global iteration
    iteration = 0
    for epoch in range(1):
        
        print('learning rate: ', optimizer.state_dict()['param_groups'][0]['lr'])
    
        # test epoch
        test_meters = test_epoch(
            model=model_wrapper,
            loader=test_loader,
            epoch=epoch,
            is_test=True,
            writer = writer
        )

        # Log results
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint

        
        # if cfg.save_all:

    # end 
    writer.close()