Exemple #1
0
def run(criterion, net_name, use_dataset_index, use_mask, save_img_pred: bool):
    path_data = config.path_data[use_dataset_index]

    # 文件路径
    path_data_root = path_data["dataset"]
    path_checkpoints = path_data["checkpoints"]
    path_result = os.path.join(path_data["result"], net_name)
    os.makedirs(path_result, exist_ok=True)

    net = UNet().to(config.device)
    for param in net.parameters():
        param.requires_grad = False

    path_pert = "/home/pengzx/deepLearning/result/Glaucoma/UNet/pert/"

    iou_total = []
    for index in config.checkpoints_indexes:
        path_pert_save = os.path.join(path_pert, str(index))

        attacked_dataset = MyAttackedDataset(path_data_root=path_data_root,
                                             phase="train",
                                             path_pert=path_pert_save,
                                             transform_list=transform_compose)
        attacked_data_loader = DataLoader(attacked_dataset,
                                          batch_size=1,
                                          shuffle=False)

        for index_2 in config.checkpoints_indexes:
            net.load_state_dict(
                torch.load(os.path.join(path_checkpoints,
                                        "{}_{}.pth").format(net_name, index_2),
                           map_location=config.device))

            iou_list = []
            for i, (img, label, mask, pert,
                    name) in enumerate(attacked_data_loader):
                img, pert = img.to(config.device), pert.to(config.device)

                img_pert = img.clone() + pert

                pred = net(img_pert)
                pred[pred > 0] = 1.
                pred[pred < 1.] = 0.

                label[label > 0.5] = 1.
                label[label < 1.] = 0.
                iou = get_iou(pred[0].data.cpu().numpy(),
                              label.data.cpu().numpy())
                iou_list.append(iou)
            iou_list = np.array(iou_list)
            print("模型[{}]产生的扰动,模型[{}]的预测结果iou={}", index, index_2,
                  iou_list.mean())
            iou_total.append(iou_list)
    iou_total = np.array(iou_total)
    np.save("./iou_total.npy", iou_total)
Exemple #2
0
def main(config):
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    train_dataset, valid_dataset = generate_datasets(
        config['data_dir'], valid_ids=config['val_ids'])
    # TODO: define and add data augmentation + image normalization
    # train_dataset.transform = train_transform
    # valid_dataset.transform = valid_transform
    transforms = A.Compose([
        A.Normalize(),  # TODO: change values
        ToTensorV2()
    ])
    train_dataset.transform = transforms
    valid_dataset.transform = transforms

    train_loader = DataLoader(train_dataset,
                              batch_size=config['batch_size'],
                              shuffle=True,
                              num_workers=config['num_workers'])
    valid_loader = DataLoader(valid_dataset,
                              config['batch_size'],
                              shuffle=False,
                              num_workers=config['num_workers'])
    model = UNet()
    model = model.to(device)

    criterion = config['criterion']
    optimizer = torch.optim.Adam(params=model.parameters(), lr=3e-4)

    trainer = Trainer(model=model,
                      criterion=criterion,
                      optimizer=optimizer,
                      config=config,
                      train_loader=train_loader,
                      val_loader=valid_loader,
                      device=device)
    trainer.train()

    return model
Exemple #3
0
def train(fold_idx=1):

    # 1. Load dataset
    dataset_train = ICH_CT_32(
        ROOT=config['dataset_root'],
        transform=T.Compose([T.ToTensor(),
                             T.Normalize([
                                 0.5,
                             ], [
                                 0.5,
                             ])]),
        is_train=True,
        fold_idx=fold_idx)
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=config['batch_size'],
                                  shuffle=True,
                                  num_workers=1)

    dataset_eval = ICH_CT_32(ROOT=config['dataset_root'],
                             transform=T.Compose(
                                 [T.ToTensor(),
                                  T.Normalize([
                                      0.5,
                                  ], [
                                      0.5,
                                  ])]),
                             is_train=False,
                             fold_idx=fold_idx)
    dataloader_eval = DataLoader(dataset_eval,
                                 batch_size=config['batch_size'],
                                 shuffle=False,
                                 num_workers=1)

    # 2. Build model
    net = UNet()
    # net.finetune_from('pretrained_weights/vgg16-397923af.pth')
    net = nn.DataParallel(net, device_ids=[0])
    print(net)

    # 3. Criterion
    criterion = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 85.0]))

    # 4. Optimizer
    optimizer = optim.SGD(net.parameters(),
                          lr=config['lr'],
                          momentum=config['momentum'],
                          weight_decay=config['weight_decay'])
    scheduler = lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.8)

    # 5. Tensorboard logger
    logger_train = Logger(logdir=os.path.join(config['log_folder'],
                                              'fold_{}'.format(fold_idx),
                                              'train'),
                          flush_secs=2)
    logger_eval = Logger(logdir=os.path.join(config['log_folder'],
                                             'fold_{}'.format(fold_idx),
                                             'eval'),
                         flush_secs=2)

    # 6. Train loop
    DSC_MAX, IOU1_MAX, sensitivity_MAX, specificity_MAX = -1.0, -1.0, -1.0, -1.0
    for epoch in range(config['num_epoch']):

        train_op(net, dataloader_train, criterion, optimizer, scheduler, epoch,
                 logger_train)
        DSC, IOU1, sensitivity, specificity = eval_op(net, dataloader_eval,
                                                      criterion, epoch,
                                                      logger_eval)

        torch.save(net.state_dict(),
                   os.path.join(config['save_folder'], 'UNet.newest.pkl'))

        if DSC_MAX <= DSC:
            DSC_MAX = DSC
            torch.save(net.state_dict(),
                       os.path.join(config['save_folder'], 'UNet.pkl'))
        if IOU1_MAX <= IOU1: IOU1_MAX = IOU1
        if sensitivity_MAX <= sensitivity: sensitivity_MAX = sensitivity
        if specificity_MAX <= specificity: specificity_MAX = specificity

    return DSC_MAX, IOU1_MAX, sensitivity_MAX, specificity_MAX, DSC, IOU1, sensitivity, specificity
def main():
    torch.backends.cudnn.benchmark = True
    args = getArgs()
    torch.manual_seed(args.seed)
    args.cuda = torch.cuda.is_available()
    if args.cuda:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    # horovod 初始化
    hvd.init()
    torch.manual_seed(args.seed)
    # 打印一下训练使用的配置
    if hvd.rank() == 0:
        print("Training with configure: ")
        for arg in vars(args):
            print("{}:\t{}".format(arg, getattr(args, arg)))
        if not osp.exists(args.save_model_path):
            os.makedirs(args.save_model_path)
        # 保存训练配置
        with open(osp.join(args.save_model_path, 'train-config.json'),
                  'w') as f:
            json.dump(args.__dict__, f, indent=4)
    # 设置随机种子,保证每个 GPU 上的权重初始化都一样
    if args.cuda:
        # Pin GPU to local rank
        torch.cuda.set_device(hvd.local_rank())
        # 这一句似乎没有用的吧。不过按照 horovod 的回复来说,还是加上好了。
        torch.cuda.manual_seed(args.seed)
    # data
    dataset_train = SpineDataset(root=args.data, transform=my_transform)
    # 分布式训练需要使用这个 sampler
    sampler_train = DistributedSampler(dataset_train,
                                       num_replicas=hvd.size(),
                                       rank=hvd.rank())
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=1,
                                  sampler=sampler_train,
                                  num_workers=args.num_workers,
                                  pin_memory=True)
    # model
    if args.network == 'DeepLab':
        if args.voc:
            model = gcv.models.get_deeplab_resnet101_voc(pretrained=True)
        elif args.ade:
            model = gcv.models.get_deeplab_resnet101_ade(pretrained=True)
        else:
            model = gcv.models.DeepLabV3(nclass=args.num_classes,
                                         backbone=args.backbone)
        model.auxlayer.conv5[-1] = nn.Conv2d(256,
                                             args.num_classes,
                                             kernel_size=1)
        model.head.block[-1] = nn.Conv2d(256, args.num_classes, kernel_size=1)
    elif args.network == 'FCN':
        if args.voc:
            model = gcv.models.get_fcn_resnet101_voc(pretrained=True)
        elif args.ade:
            model = gcv.models.get_fcn_resnet101_ade(pretrained=True)
        else:
            model = gcv.models.FCN(nclass=args.num_classes,
                                   backbone=args.backbone)
        model.auxlayer.conv5[-1] = nn.Conv2d(256,
                                             args.num_classes,
                                             kernel_size=1)
        model.head.conv5[-1] = nn.Conv2d(512, args.num_classes, kernel_size=1)
    elif args.network == 'PSPNet':
        if args.voc:
            model = gcv.models.get_psp_resnet101_voc(pretrained=True)
        elif args.ade:
            model = gcv.models.get_psp_resnet101_ade(pretrained=True)
        else:
            model = gcv.models.PSP(nclass=args.num_classes,
                                   backbone=args.backbone)
        model.auxlayer.conv5[-1] = nn.Conv2d(256, 2, kernel_size=1)
        model.head.conv5[-1] = nn.Conv2d(512, args.num_classes, kernel_size=1)
    elif args.network == 'UNet':
        model = UNet(n_class=args.num_classes,
                     backbone=args.backbone,
                     pretrained=True)
    model = convert_syncbn_model(model)
    model = model.to(device)

    # optimizer 要用 hvd 的版本包一下
    # optimizer = torch.optim.Adam(model.parameters(), args.learning_rate * hvd.size())
    # 不同层使用不同的学习率
    if args.network == 'UNet':
        optimizer = torch.optim.SGD([
            {
                'params': model.down_blocks.parameters(),
                'lr': args.learning_rate * 0.5
            },
            {
                'params': model.bridge.parameters()
            },
            {
                'params': model.head.parameters()
            },
        ],
                                    lr=args.learning_rate,
                                    momentum=0.9,
                                    weight_decay=0.0001)
    elif args.network in ['FCN', 'PSPNet', 'DeepLab']:
        optimizer = optim.SGD([{
            'params': model.pretrained.parameters(),
            'lr': args.learning_rate * 0.5
        }, {
            'params': model.auxlayer.parameters()
        }, {
            'params': model.head.parameters()
        }],
                              lr=args.learning_rate,
                              momentum=0.9,
                              weight_decay=0.0001)
    else:
        optimizer = optim.SGD(model.parameters(),
                              lr=args.learning_rate,
                              momentum=0.9,
                              weight_decay=0.0001)
    optimizer = hvd.DistributedOptimizer(
        optimizer, named_parameters=model.named_parameters())
    # 将模型和优化器的参数广播到各个 GPU 上
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)
    hvd.broadcast_optimizer_state(optimizer, root_rank=0)

    # lr scheduler
    def poly_lr_scheduler(epoch, num_epochs=args.num_epochs, power=args.power):
        return (1 - epoch / num_epochs)**power

    lr_scheduler = LambdaLR(optimizer=optimizer, lr_lambda=poly_lr_scheduler)

    def train(epoch):
        model.train()
        # Horovod: set epoch to sampler for shuffling.
        sampler_train.set_epoch(epoch)
        lr_scheduler.step()
        loss_fn = nn.CrossEntropyLoss()
        for batch_idx, (data, target) in enumerate(dataloader_train):
            data = data.to(device).squeeze()
            target = target.to(device).squeeze()
            for batch_data, batch_target in zip(
                    torch.split(data, args.batch_size),
                    torch.split(target, args.batch_size)):
                optimizer.zero_grad()
                output = model(batch_data)
                if args.network in ['FCN', 'PSPNet', 'DeepLab']:
                    loss = loss_fn(output[0], batch_target) \
                           + 0.2*loss_fn(output[1], batch_target)
                elif args.network == 'UNet':
                    loss = loss_fn(output, batch_target)
                loss.backward()
                optimizer.step()
            if hvd.rank() == 0 and batch_idx % args.log_interval == 0:
                print("Train loss: ", loss.item())

    for epoch in range(args.num_epochs):
        train(epoch)
        if hvd.rank() == 0:
            print("Saving model to {}".format(
                osp.join(args.save_model_path,
                         "checkpoint-{:0>3d}.pth".format(epoch))))
            torch.save({'state_dict': model.state_dict()},
                       osp.join(args.save_model_path,
                                "checkpoint-{:0>3d}.pth".format(epoch)))