def train_epoch(model, loader, optimizer, epoch, results_logger):
    '''
    One training epoch
    '''
    meters = AverageMeter()
    # Model on train mode
    model.train()
    global iteration
    intersection = 0
    union = 0
    for batch_idx, (x, y) in enumerate(loader):
        x = to_device(x)
        y = to_device(y)
        # forward and backward
        pred_logit = model(x)
        y_one_hot = categorical_to_one_hot(y, dim=1, expand_dim=False)

        loss = soft_dice_loss(pred_logit, y_one_hot)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # calculate metrics
        pred_classes = pred_logit.argmax(1)
        intersection += ((pred_classes==1) * (y[:,0]==1)).sum().item()
        union += ((pred_classes==1).sum() + y[:,0].sum()).item()
        batch_size = y.size(0)

        iou = cal_batch_iou(pred_logit, y_one_hot)
        dice = cal_batch_dice(pred_logit, y_one_hot)
        # log
        writer.add_scalar('train_loss_logs', loss.item(), iteration)
        with open(os.path.join(cfg.save, 'loss_logs.csv'), 'a') as f:
            f.write('%09d,%0.6f,\n'%((iteration + 1),loss.item(),))
        iteration += 1

        logs = [loss.item(), iou[1:].mean(), dice[1:].mean()]+ \
                            [iou[i].item() for i in range(len(iou))]+ \
                            [dice[i].item() for i in range(len(dice))]
        meters.update(logs, batch_size)   

        # print stats
        print_freq = 2 // meters.val[-1] + 1
        if batch_idx % print_freq == 0:
            res = '\t'.join([
                'Epoch: [%d/%d]' % (epoch + 1, cfg.n_epochs),
                'Iter: [%d/%d]' % (batch_idx + 1, len(loader)),
                'Time %.3f (%.3f)' % (meters.val[-1], meters.avg[-1]),
                'Loss %.4f (%.4f)' % (meters.val[0], meters.avg[0]),
                'IOU %.4f (%.4f)' % (meters.val[1], meters.avg[1]),
                'DICE %.4f (%.4f)' % (meters.val[2], meters.avg[2]),
            ])
            print(res)
    dice_global = 2. * intersection / union
    return meters.avg[:-1] + [dice_global]
def test_epoch(model, loader, optimizer, epoch, results_logger):
    '''
    One test epoch
    '''
    meters = AverageMeter()
    model.eval()
    intersection = 0
    union = 0
    with torch.no_grad():
        for batch_idx, (x, y) in enumerate(loader):
            x = to_device(x)
            y = to_device(y)
            # forward
            pred_logit = model(x)
            # calculate metrics
            y_one_hot = categorical_to_one_hot(y, dim=1, expand_dim=False)
            pred_classes = pred_logit.argmax(1)
            intersection += ((pred_classes==1) * (y[:,0]==1)).sum().item()
            union += ((pred_classes==1).sum() + y[:,0].sum()).item()

            loss = soft_dice_loss(pred_logit, y_one_hot)
            batch_size = y.size(0)

            iou = cal_batch_iou(pred_logit, y_one_hot)
            dice = cal_batch_dice(pred_logit, y_one_hot)

            logs = [loss.item(), iou[1:].mean(), dice[1:].mean()]+ \
                                [iou[i].item() for i in range(len(iou))]+ \
                                [dice[i].item() for i in range(len(dice))]
            meters.update(logs, batch_size)   

            print_freq = 2 // meters.val[-1] + 1
            if batch_idx % print_freq == 0:
                res = '\t'.join([
                    'Test',
                    'Iter: [%d/%d]' % (batch_idx + 1, len(loader)),
                    'Time %.3f (%.3f)' % (meters.val[-1], meters.avg[-1]),
                    'Loss %.4f (%.4f)' % (meters.val[0], meters.avg[0]),
                    'IOU %.4f (%.4f)' % (meters.val[1], meters.avg[1]),
                    'DICE %.4f (%.4f)' % (meters.val[2], meters.avg[2]),
                ])
                print(res)
    dice_global = 2. * intersection / union

    return meters.avg[:-1] + [dice_global]
Exemple #3
0
def train(model, train_set, test_set, save, valid_set, n_epochs):
    '''
    Main training function
    '''
    # Dataloaders

    test_loader = DataLoader(test_set,
                             batch_size=cfg.batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)
    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        print('multi gpus')
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()

    # train and test the model
    best_dice_global = 0
    global iteration
    iteration = 0
    for epoch in range(1):

        # test epoch
        test_meters = test_epoch(
            model=model_wrapper,
            loader=test_loader,
            epoch=epoch,
            is_test=True,  # valid
            writer=None)
def train(model, train_set, test_set, save, valid_set, n_epochs, canal):

    # Data loaders
    train_loader = DataLoader(train_set,
                              batch_size=cfg.train_batch_size,
                              shuffle=True,
                              pin_memory=(torch.cuda.is_available()),
                              num_workers=cfg.num_workers)
    test_loader = DataLoader(test_set,
                             batch_size=cfg.test_batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        model_wrapper = torch.nn.DataParallel(model).cuda()

    # Optimizer
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    #optimizer= torch.optim.SGD(model_wrapper.parameters(), lr=cfg.lr, momentum=0.9)
    #optimizer = adabound.AdaBound(model_wrapper.parameters(), lr=1e-3, final_lr=0.1)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=cfg.milestones,
                                                     gamma=cfg.gamma)

    # Start log
    logs = ['loss', 'iou', 'dice'
            ] + ['iou{}'.format(i)
                 for i in range(6)] + ['dice{}'.format(i) for i in range(6)]
    train_logs = ['train_' + log for log in logs]
    test_logs = ['test_' + log for log in logs]
    log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key + ',')
        f.write('\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'canal_{}'.format(canal),
                                                'Tensorboard_Results'))

    # Train model
    best_dice = 0

    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, 'canal_{}'.format(canal)),
                    exist_ok=True)
        train_meters = train_epoch(model=model_wrapper,
                                   loader=train_loader,
                                   optimizer=optimizer,
                                   epoch=epoch,
                                   n_epochs=n_epochs,
                                   writer=writer)
        # if (epoch+1)%5==0:
        test_meters = test_epoch(model=model_wrapper,
                                 loader=test_loader,
                                 epoch=epoch,
                                 is_test=True,
                                 writer=writer)
        scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]

        log_results(save, epoch, log_dict, writer=writer)

        if cfg.save_all:
            torch.save(
                model.state_dict(),
                os.path.join(save, 'canal_{}'.format(canal), 'model.dat'))

        if log_dict['test_dice'] > best_dice:
            torch.save(
                model.state_dict(),
                os.path.join(save, 'canal_{}'.format(canal), 'model.dat'))
            best_dice = log_dict['test_dice']
            print('New best dice: %.4f' % log_dict['test_dice'])
            #print(2.*intersection/union)
        else:
            print('Current best dice: %.4f' % best_dice)
            #print(2.*intersection/union)
    writer.close()

    with open(os.path.join(save, 'canal_{}'.format(canal), 'logs.csv'),
              'a') as f:
        f.write(',,,,best dice,%0.5f\n' % (best_dice))
    # Final test of the best model on test set
    print('best dice: ', best_dice)
def train(model, train_set, test_set, save, valid_set, n_epochs):
    """
    Main training function
    """
    # Dataloaders
    train_loader = DataLoader(
        train_set, batch_size=cfg.batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers
    )
    test_loader = DataLoader(
        test_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers
    )
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(
            valid_set, batch_size=cfg.batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers
        )
    # Model on cuda
    model = to_device(model)
    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        if cfg.use_syncbn:
            print("Using sync-bn")
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()
    # optimizer and scheduler
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones, gamma=cfg.gamma)
    # Start logging
    logs = ["loss", "iou", "dice", "iou0", "iou1", "dice0", "dice1", "dice_global"]
    train_logs = ["train_" + log for log in logs]
    test_logs = ["test_" + log for log in logs]

    log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0)
    with open(os.path.join(save, "logs.csv"), "w") as f:
        f.write("epoch,")
        for key in log_dict.keys():
            f.write(key + ",")
        f.write("\n")
    with open(os.path.join(save, "loss_logs.csv"), "w") as f:
        f.write("iter,train_loss,\n")
    writer = SummaryWriter(log_dir=os.path.join(save, "Tensorboard_Results"))

    # train and test the model
    best_dice_global = 0
    global iteration
    iteration = 0
    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, "epoch_{}".format(epoch)))
        print("learning rate: ", scheduler.get_lr())
        # train epoch
        train_meters = train_epoch(
            model=model_wrapper, loader=train_loader, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, writer=writer
        )
        # test epoch
        test_meters = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, is_test=True, writer=writer)
        scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint
        if cfg.save_all:
            torch.save(model.state_dict(), os.path.join(save, "epoch_{}".format(epoch), "model.dat"))

        if log_dict["test_dice_global"] > best_dice_global:
            torch.save(model.state_dict(), os.path.join(save, "model.dat"))
            best_dice_global = log_dict["test_dice_global"]
            print("New best global dice: %.4f" % log_dict["test_dice_global"])
        else:
            print("Current best global dice: %.4f" % best_dice_global)
    # end
    writer.close()
    with open(os.path.join(save, "logs.csv"), "a") as f:
        f.write(",,,,best global dice,%0.5f\n" % (best_dice_global))
    print("best global dice: ", best_dice_global)
Exemple #6
0
def train(model, train_set, test_set, save, valid_set, n_epochs):

    # Data loaders
    train_loader = DataLoader(train_set,
                              batch_size=cfg.batch_size,
                              shuffle=True,
                              pin_memory=(torch.cuda.is_available()),
                              num_workers=cfg.num_workers)
    test_loader = DataLoader(test_set,
                             batch_size=cfg.batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        model_wrapper = torch.nn.DataParallel(model).cuda()

    # Optimizer
    # optimizer = torch.optim.SGD(model_wrapper.parameters(), lr=cfg.lr, weight_decay=cfg.wd, momentum=cfg.momentum)
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                     milestones=cfg.milestones,
                                                     gamma=cfg.gamma)

    # Start log
    logs = ['loss', 'acc']
    train_logs = ['train_' + log for log in logs]
    test_logs = ['test_' + log for log in logs]

    log_dict = OrderedDict.fromkeys(train_logs + test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key + ',')
        f.write('\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # Train model
    best_iou = 0

    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch)))
        train_meters = train_epoch(model=model_wrapper,
                                   loader=train_loader,
                                   optimizer=optimizer,
                                   epoch=epoch,
                                   n_epochs=n_epochs,
                                   writer=writer)
        test_meters = test_epoch(model=model_wrapper,
                                 loader=test_loader,
                                 epoch=epoch,
                                 is_test=True,
                                 writer=writer)
        scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]

        log_results(save, epoch, log_dict, writer=writer)

        if cfg.save_all:
            torch.save(
                model.state_dict(),
                os.path.join(save, 'epoch_{}'.format(epoch), 'model.dat'))

        if log_dict['test_iou'] > best_iou:
            torch.save(model.state_dict(), os.path.join(save, 'model.dat'))
            best_iou = log_dict['test_iou']
            print('New best iou: %.4f' % log_dict['test_iou'])
    writer.close()

    with open(os.path.join(save, 'logs.csv'), 'a') as f:
        f.write(',,,,best iou,%0.5f\n' % (best_iou))
    # Final test of the best model on test set
    print('best iou: ', best_iou)
def train(model, train_set, test_set, save, valid_set, n_epochs):
    '''
    Main training function
    '''
    # Dataloaders
    train_loader = DataLoader(train_set,
                              batch_size=cfg.batch_size,
                              shuffle=True,
                              pin_memory=(torch.cuda.is_available()),
                              num_workers=cfg.num_workers)
    test_loader = DataLoader(test_set,
                             batch_size=cfg.batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)  # modified
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    print('num_of_cuda:', torch.cuda.device_count())
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        print('multi-gpus')
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()

    # optimizer and scheduler
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones,
    #                                                  gamma=cfg.gamma)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=cfg.max_lr,
        epochs=n_epochs,
        steps_per_epoch=len(train_loader))
    # Start logging
    logs = ['loss', 'acc', 'acc0', 'acc1']
    train_logs = ['train_' + log for log in logs] + [
        'train_auc',
    ]
    valid_logs = ['valid_' + log
                  for log in logs] + ['valid_auc', 'valid_auc_pat']
    test_logs = ['test_' + log for log in logs] + ['test_auc', 'test_auc_pat']

    log_dict = OrderedDict.fromkeys(train_logs + valid_logs + test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key + ',')
        f.write('\n')
    with open(os.path.join(save, 'loss_logs.csv'), 'w') as f:
        f.write('iter,train_loss,\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # train and test the model
    best_auc = 0
    global iteration
    iteration = 0
    for epoch in range(n_epochs):
        os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch)))
        print('learning rate: ', scheduler.get_lr())
        # train epoch
        train_meters = train_epoch(model=model_wrapper,
                                   loader=train_loader,
                                   optimizer=optimizer,
                                   scheduler=scheduler,
                                   epoch=epoch,
                                   n_epochs=n_epochs,
                                   writer=writer)
        # valid epoch
        valid_meters = test_epoch(model=model_wrapper,
                                  loader=valid_loader,
                                  epoch=epoch,
                                  is_test=False,
                                  writer=writer)
        # test epoch
        test_meters = test_epoch(model=model_wrapper,
                                 loader=test_loader,
                                 epoch=epoch,
                                 is_test=True,
                                 writer=writer)
        # scheduler.step()

        # Log results
        for i, key in enumerate(train_logs):
            log_dict[key] = train_meters[i]
        for i, key in enumerate(valid_logs):
            log_dict[key] = valid_meters[i]
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint
        if cfg.save_all:
            torch.save(
                model.state_dict(),
                os.path.join(save, 'epoch_{}'.format(epoch), 'model.dat'))

        if log_dict['valid_auc'] > best_auc:
            torch.save(model.state_dict(), os.path.join(save, 'model.dat'))
            best_auc = log_dict['valid_auc']
            print('New best auc: %.4f' % log_dict['valid_auc'])
        else:
            print('Current best auc: %.4f' % best_auc)
    # end
    writer.close()
    with open(os.path.join(save, 'logs.csv'), 'a') as f:
        f.write(',,,,best auc,%0.5f\n' % (best_auc))
    print('best auc: ', best_auc)
def main(save_path=cfg.save_path):
    # back up your code 
    backup_code(save_path)
    # set seed
    set_seed(cfg.seed)
    # accelaration
    torch.backends.cudnn.benchmark = True

    # Datasets
    train_set = LIDCSegDataset(crop_size=48, move=5, data_path=env.data, train=True)
    test_set = LIDCSegDataset(crop_size=48, move=5, data_path=env.data, train=False)
    train_loader = DataLoader(train_set, batch_size=cfg.batch_size, shuffle=True,
                                pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers)
    test_loader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=False,
                                pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers)

    # Define model
    model_dict = {'resnet18': FCNResNet, 'vgg16': FCNVGG, 'densenet121': FCNDenseNet}
    model = model_dict[cfg.backbone](pretrained=cfg.pretrained, num_classes=2, backbone=cfg.backbone)

    print(model)
    torch.save(model.state_dict(), os.path.join(save_path, 'model.dat'))

    # Model on cuda and then wrap model for multi-GPUs, if necessary
    model = to_device(model)
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:       
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()
    else:
        model_wrapper = model

    # optimizer and scheduler
    optimizer = getattr(torch.optim, cfg.optimizer_choice)(model_wrapper.parameters(), lr=cfg.optimizer_lr)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.scheduler_milestones,
                                                     gamma=cfg.scheduler_gamma)

    results_logger = ResultsLogger(save_path, train_log_items=[] , test_log_items=[])

    # train and test the model
    best_dice_global = 0
    global iteration
    iteration = 0
    for epoch in range(n_epochs):
        # os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch)))
        print('learning rate: ', scheduler.get_lr())

        train_results = train_epoch(model=model_wrapper, loader=train_loader, optimizer=optimizer,
                                    epoch=epoch, results_logger=results_logger)
        test_results = test_epoch(model=model_wrapper, loader=test_loader, epoch=epoch, results_logger=results_logger)
        scheduler.step()

        results_logger.log_epoch(train_results, test_results)

        # save model checkpoint
        if cfg.save_all:
            torch.save(model.state_dict(), os.path.join(save, 'epoch_{}'.format(epoch), 'model.dat'))

        if  > best_dice_global:
            torch.save(model.state_dict(), os.path.join(save, 'best_model.dat'))
            best_dice_global = 
            print('New best global dice: %.4f' % )
        else:
            print('Current best global dice: %.4f' % best_dice_global)
            

    results_logger.close(best_result=best_dice_global)
    print('best global dice: ', best_dice_global)
    print('Done!')
def train(model, test_set, save, valid_set, n_epochs):
    '''
    Main training function
    '''
    # Dataloaders

    test_loader = DataLoader(test_set,
                             batch_size=cfg.batch_size,
                             shuffle=False,
                             pin_memory=(torch.cuda.is_available()),
                             num_workers=cfg.num_workers)
    if valid_set is None:
        valid_loader = None
    else:
        valid_loader = DataLoader(valid_set,
                                  batch_size=cfg.batch_size,
                                  shuffle=False,
                                  pin_memory=(torch.cuda.is_available()),
                                  num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:
        print('multi-gpus')
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()

    # Start logging
    logs = ['loss', 'acc', 'acc0', 'acc1']

    test_logs = ['test_' + log for log in logs] + ['test_auc', 'test_auc_pat']

    log_dict = OrderedDict.fromkeys(test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key + ',')
        f.write('\n')
    with open(os.path.join(save, 'loss_logs.csv'), 'w') as f:
        f.write('iter,train_loss,\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # train and test the model
    best_auc = 0
    global iteration
    iteration = 0
    for epoch in range(1):
        os.makedirs(os.path.join(cfg.save, 'epoch_{}'.format(epoch)))
        # test epoch
        test_meters = test_epoch(model=model_wrapper,
                                 loader=test_loader,
                                 epoch=epoch,
                                 is_test=True,
                                 writer=writer)

        # Log results
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint
        if log_dict['test_auc'] > best_auc:
            torch.save(model.state_dict(), os.path.join(save, 'model.dat'))
            best_auc = log_dict['test_auc']
            print('New best auc: %.4f' % log_dict['test_auc'])
        else:
            print('Current best auc: %.4f' % best_auc)
    # end
    writer.close()
    with open(os.path.join(save, 'logs.csv'), 'a') as f:
        f.write(',,,,best auc,%0.5f\n' % (best_auc))
    print('best auc: ', best_auc)
Exemple #10
0
def train(model,  test_set, save, n_epochs):
    '''
    Main training function
    '''
    # Dataloaders
    test_loader = DataLoader(test_set, batch_size=cfg.batch_size, shuffle=False,
                                pin_memory=(torch.cuda.is_available()), num_workers=cfg.num_workers)
    # Model on cuda
    model = to_device(model)

    # Wrap model for multi-GPUs, if necessary
    model_wrapper = model
    print('num_of_cuda:',torch.cuda.device_count())
    if torch.cuda.is_available() and torch.cuda.device_count() > 1:     
        print('multi-gpus')  
        if cfg.use_syncbn:
            print('Using sync-bn')
            model_wrapper = DataParallelWithCallback(model).cuda()
        else:
            model_wrapper = torch.nn.DataParallel(model).cuda()

    # optimizer and scheduler
    optimizer = torch.optim.Adam(model_wrapper.parameters(), lr=cfg.lr)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg.milestones,gamma=cfg.gamma) 
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,factor=cfg.factor, patience=cfg.patience, min_lr=cfg.min_lr, eps=cfg.eps)
    # scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=cfg.max_lr, epochs=n_epochs, steps_per_epoch=len(train_loader), 
    #                                                div_factor=cfg.div_factor, final_div_factor=cfg.final_div_factor)
    # Start logging
    logs = ['loss', 'acc', 'acc0', 'acc1']
    test_logs = ['test_'+log for log in logs]+['test_auc','test_auc_pat']

    log_dict = OrderedDict.fromkeys(test_logs, 0)
    with open(os.path.join(save, 'logs.csv'), 'w') as f:
        f.write('epoch,')
        for key in log_dict.keys():
            f.write(key+',')
        f.write('\n')
    with open(os.path.join(save, 'loss_logs.csv'), 'w') as f:
        f.write('iter,train_loss,\n')
    writer = SummaryWriter(log_dir=os.path.join(save, 'Tensorboard_Results'))

    # train and test the model
    best_auc = 0
    global iteration
    iteration = 0
    for epoch in range(1):
        
        print('learning rate: ', optimizer.state_dict()['param_groups'][0]['lr'])
    
        # test epoch
        test_meters = test_epoch(
            model=model_wrapper,
            loader=test_loader,
            epoch=epoch,
            is_test=True,
            writer = writer
        )

        # Log results
        for i, key in enumerate(test_logs):
            log_dict[key] = test_meters[i]
        log_results(save, epoch, log_dict, writer=writer)
        # save model checkpoint

        
        # if cfg.save_all:

    # end 
    writer.close()