Esempio n. 1
0
def test_cnn(data_loader, subset_name, split, criterion, options):

    for selection in ["best_acc", "best_loss"]:
        # load the best trained model during the training
        model = init_model(options.model,
                           gpu=options.gpu,
                           dropout=options.dropout)
        model, best_epoch = load_model(model,
                                       os.path.join(options.output_dir,
                                                    'best_model_dir',
                                                    "fold_%i" % split, 'CNN',
                                                    selection),
                                       gpu=options.gpu,
                                       filename='model_best.pth.tar')

        results_df, metrics = test(model, data_loader, options.gpu, criterion,
                                   options.mode)
        print("Slice level balanced accuracy is %f" %
              metrics['balanced_accuracy'])

        mode_level_to_tsvs(options.output_dir,
                           results_df,
                           metrics,
                           split,
                           selection,
                           options.mode,
                           dataset=subset_name)
Esempio n. 2
0
def train_CNN_bad_data_split(params):

    # Initialize the model
    print('Do transfer learning with existed model trained on ImageNet!\n')
    print('The chosen network is %s !' % params.model)

    # most of the imagenet pretrained model has this input size
    trg_size = (224, 224)

    # All pre-trained models expect input images normalized in the same way,
    # i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H
    # and W are expected to be at least 224. The images have to be loaded in to
    # a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406]
    # and std = [0.229, 0.224, 0.225].
    transformations = transforms.Compose([
        MinMaxNormalization(),
        transforms.ToPILImage(),
        transforms.Resize(trg_size),
        transforms.ToTensor()
    ])
    params.dropout = 0.8

    total_time = time()

    if params.split is None:
        if params.n_splits is None:
            fold_iterator = range(1)
        else:
            fold_iterator = range(params.n_splits)
    else:
        fold_iterator = [params.split]

    for fi in fold_iterator:
        print("Running for the %d-th fold" % fi)

        training_sub_df, valid_sub_df = load_data(params.tsv_path,
                                                  params.diagnoses,
                                                  fi,
                                                  n_splits=params.n_splits,
                                                  baseline=params.baseline)

        # split the training + validation by slice
        training_df, valid_df = mix_slices(training_sub_df,
                                           valid_sub_df,
                                           mri_plane=params.mri_plane)

        data_train = MRIDatasetSlice(params.caps_directory,
                                     training_df,
                                     transformations=transformations,
                                     mri_plane=params.mri_plane,
                                     prepare_dl=params.prepare_dl,
                                     mixed=True)

        data_valid = MRIDatasetSlice(params.caps_directory,
                                     valid_df,
                                     transformations=transformations,
                                     mri_plane=params.mri_plane,
                                     prepare_dl=params.prepare_dl,
                                     mixed=True)

        # Use argument load to distinguish training and testing
        train_loader = DataLoader(data_train,
                                  batch_size=params.batch_size,
                                  shuffle=True,
                                  num_workers=params.num_workers,
                                  pin_memory=True)

        valid_loader = DataLoader(data_valid,
                                  batch_size=params.batch_size,
                                  shuffle=False,
                                  num_workers=params.num_workers,
                                  pin_memory=True)

        # Initialize the model
        print('Initialization of the model')
        model = init_model(params.model,
                           gpu=params.gpu,
                           dropout=params.dropout)

        # Define criterion and optimizer
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = eval("torch.optim." + params.optimizer)(
            filter(lambda x: x.requires_grad, model.parameters()),
            lr=params.learning_rate,
            weight_decay=params.weight_decay)
        setattr(params, 'beginning_epoch', 0)

        # Define output directories
        log_dir = os.path.join(params.output_dir, 'fold-%i' % fi,
                               'tensorboard_logs')
        model_dir = os.path.join(params.output_dir, 'fold-%i' % fi, 'models')

        print('Beginning the training task')
        train(model, train_loader, valid_loader, criterion, optimizer, False,
              log_dir, model_dir, params)

        test_cnn(train_loader, "train", fi, criterion, options)
        test_cnn(valid_loader, "validation", fi, criterion, options)

    total_time = time() - total_time
    print("Total time of computation: %d s" % total_time)