def find_2d_configuration():
    # lets compute a reference for 3D
    # we select hyperparameters here so that we get approximately the same patch size as we would get with the
    # regular unet. This is just my choice. You can do whatever you want
    # These default hyperparemeters will then be used by the experiment planner

    # since this is more parameter intensive than the UNet, we will test a configuration that has a lot of parameters
    # herefore we copy the UNet configuration for Task003_Liver
    cudnn.deterministic = False
    cudnn.benchmark = True

    patch_size = (512, 512)
    max_num_features = 512
    num_modalities = 1
    num_classes = 3
    batch_size = 12

    # now we fiddle with the network specific hyperparameters until everything just barely fits into a titanx
    blocks_per_stage_encoder = FabiansUNet.default_blocks_per_stage_encoder
    blocks_per_stage_decoder = FabiansUNet.default_blocks_per_stage_decoder
    initial_num_features = 30

    # we neeed to add a [1, 1, 1] for the res unet because in this implementation all stages of the encoder can have a stride
    pool_op_kernel_sizes = [[1, 1],
                            [2, 2],
                            [2, 2],
                            [2, 2],
                            [2, 2],
                            [2, 2],
                            [2, 2],
                            [2, 2]]

    conv_op_kernel_sizes = [[3, 3],
                           [3, 3],
                           [3, 3],
                           [3, 3],
                           [3, 3],
                           [3, 3],
                           [3, 3],
                           [3, 3]]

    unet = FabiansUNet(num_modalities, initial_num_features, blocks_per_stage_encoder[:len(conv_op_kernel_sizes)], 2,
                       pool_op_kernel_sizes, conv_op_kernel_sizes,
                       get_default_network_config(2, dropout_p=None), num_classes,
                       blocks_per_stage_decoder[:len(conv_op_kernel_sizes)-1], False, False,
                       max_features=max_num_features).cuda()

    optimizer = SGD(unet.parameters(), lr=0.1, momentum=0.95)
    loss = DC_and_CE_loss({'batch_dice': True, 'smooth': 1e-5, 'do_bg': False}, {})

    dummy_input = torch.rand((batch_size, num_modalities, *patch_size)).cuda()
    dummy_gt = (torch.rand((batch_size, 1, *patch_size)) * num_classes).round().clamp_(0, 2).cuda().long()

    for _ in range(20):
        optimizer.zero_grad()
        skips = unet.encoder(dummy_input)
        print([i.shape for i in skips])
        output = unet.decoder(skips)

        l = loss(output, dummy_gt)
        l.backward()

        optimizer.step()
        if _ == 0:
            torch.cuda.empty_cache()

    # that should do. Now take the network hyperparameters and insert them in FabiansUNet.compute_approx_vram_consumption
    # whatever number this spits out, save it to FabiansUNet.use_this_for_batch_size_computation_2D
    print(FabiansUNet.compute_approx_vram_consumption(patch_size, initial_num_features, max_num_features, num_modalities,
                                                num_classes, pool_op_kernel_sizes,
                                                blocks_per_stage_encoder[:len(conv_op_kernel_sizes)],
                                                blocks_per_stage_decoder[:len(conv_op_kernel_sizes)-1], 2, batch_size))
Example #2
0
def find_2d_configuration():
    cudnn.benchmark = True
    cudnn.deterministic = False

    conv_op_kernel_sizes = ((3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3),
                            (3, 3))
    pool_op_kernel_sizes = ((1, 1), (2, 2), (2, 2), (2, 2), (2, 2), (2, 2),
                            (2, 2))

    patch_size = (256, 256)
    base_num_features = 32
    input_modalities = 4
    blocks_per_stage_encoder = (1, 3, 4, 6, 6, 6, 6)
    blocks_per_stage_decoder = (2, 2, 2, 2, 2, 2)
    feat_map_mult_on_downscale = 2
    num_classes = 5
    max_features = 512
    batch_size = 50

    unet = FabiansPreActUNet(input_modalities,
                             base_num_features,
                             blocks_per_stage_encoder,
                             feat_map_mult_on_downscale,
                             pool_op_kernel_sizes,
                             conv_op_kernel_sizes,
                             get_default_network_config(2, dropout_p=None),
                             num_classes,
                             blocks_per_stage_decoder,
                             True,
                             False,
                             max_features=max_features).cuda()

    scaler = GradScaler()
    optimizer = SGD(unet.parameters(), lr=0.1, momentum=0.95)

    print(
        unet.compute_approx_vram_consumption(
            patch_size, base_num_features, max_features, input_modalities,
            num_classes, pool_op_kernel_sizes, blocks_per_stage_encoder,
            blocks_per_stage_decoder, feat_map_mult_on_downscale, batch_size))

    loss = DC_and_CE_loss({
        'batch_dice': True,
        'smooth': 1e-5,
        'do_bg': False
    }, {})

    dummy_input = torch.rand(
        (batch_size, input_modalities, *patch_size)).cuda()
    dummy_gt = (torch.rand(
        (batch_size, 1, *patch_size)) * num_classes).round().clamp_(
            0, num_classes - 1).cuda().long()

    for i in range(10):
        optimizer.zero_grad()

        with autocast():
            skips = unet.encoder(dummy_input)
            print([i.shape for i in skips])
            output = unet.decoder(skips)[0]

            l = loss(output, dummy_gt)
            print(l.item())
            scaler.scale(l).backward()
            scaler.step(optimizer)
            scaler.update()

    with autocast():
        import hiddenlayer as hl
        g = hl.build_graph(unet, dummy_input, transforms=None)
        g.save("/home/fabian/test_arch.pdf")