Ejemplo n.º 1
0
def main(options):

    # Read json
    model_options = argparse.Namespace()
    json_path = path.join(options.model_path, "commandline_cnn.json")
    model_options = read_json(model_options, json_path=json_path)
    num_cnn = compute_num_cnn(options.input_dir,
                              options.tsv_path,
                              model_options,
                              data="test")

    # Load test data
    if options.diagnoses is None:
        options.diagnoses = model_options.diagnoses

    test_df = load_data_test(options.tsv_path, options.diagnoses)
    transformations = get_transforms(model_options.mode,
                                     model_options.minmaxnormalization)
    criterion = torch.nn.CrossEntropyLoss()

    # Loop on all folds trained
    best_model_dir = os.path.join(options.model_path, 'best_model_dir')
    folds_dir = os.listdir(best_model_dir)

    # Loop on folds
    for fold_dir in folds_dir:
        split = int(fold_dir[-1])
        print("Fold %i" % split)

        for cnn_index in range(num_cnn):
            dataset = return_dataset(model_options.mode,
                                     options.input_dir,
                                     test_df,
                                     options.preprocessing,
                                     transformations,
                                     options,
                                     cnn_index=cnn_index)

            test_loader = DataLoader(dataset,
                                     batch_size=options.batch_size,
                                     shuffle=False,
                                     num_workers=options.num_workers,
                                     pin_memory=True)

            test_cnn(options.model_path, test_loader, options.dataset, split,
                     criterion, cnn_index, model_options, options.gpu)

        for selection in ['best_acc', 'best_loss']:
            soft_voting_to_tsvs(
                options.model_path,
                split,
                selection,
                mode=options.mode,
                dataset=options.dataset,
                num_cnn=num_cnn,
                selection_threshold=model_options.selection_threshold)
Ejemplo n.º 2
0
parser.add_argument("--num_workers",
                    '-w',
                    default=8,
                    type=int,
                    help='the number of batch being loaded in parallel')

if __name__ == "__main__":
    ret = parser.parse_known_args()
    options = ret[0]
    if ret[1]:
        print("unknown arguments: %s" % parser.parse_known_args()[1])

    # Read json
    model_options = argparse.Namespace()
    json_path = path.join(options.model_path, "commandline_cnn.json")
    model_options = read_json(model_options, json_path=json_path)
    num_cnn = compute_num_cnn(model_options.input_dir,
                              model_options.tsv_path,
                              model_options,
                              data="train")

    transformations = get_transforms(model_options.mode,
                                     model_options.minmaxnormalization)
    criterion = nn.CrossEntropyLoss()

    # Loop on all folds trained
    best_model_dir = os.path.join(options.model_path, 'best_model_dir')
    folds_dir = os.listdir(best_model_dir)

    for fold_dir in folds_dir:
        split = int(fold_dir[-1])
Ejemplo n.º 3
0
def main(options):

    options = read_json(options)

    if options.evaluation_steps % options.accumulation_steps != 0 and options.evaluation_steps != 1:
        raise Exception(
            'Evaluation steps %d must be a multiple of accumulation steps %d' %
            (options.evaluation_steps, options.accumulation_steps))

    if options.minmaxnormalization:
        transformations = MinMaxNormalization()
    else:
        transformations = None

    total_time = time()

    # Get the data.
    training_tsv, valid_tsv = load_data(options.diagnosis_path,
                                        options.diagnoses, options.split,
                                        options.n_splits, options.baseline)

    data_train = MRIDataset(options.input_dir,
                            training_tsv,
                            transform=transformations,
                            preprocessing=options.preprocessing)
    data_valid = MRIDataset(options.input_dir,
                            valid_tsv,
                            transform=transformations,
                            preprocessing=options.preprocessing)

    # Use argument load to distinguish training and testing
    train_loader = DataLoader(data_train,
                              batch_size=options.batch_size,
                              shuffle=True,
                              num_workers=options.num_workers,
                              pin_memory=True,
                              drop_last=options.drop_last)

    valid_loader = DataLoader(data_valid,
                              batch_size=options.batch_size,
                              shuffle=False,
                              num_workers=options.num_workers,
                              pin_memory=True,
                              drop_last=options.drop_last)

    # Initialize the model
    print('Initialization of the model')
    decoder = create_autoencoder(options.model)

    decoder, current_epoch = load_model(decoder,
                                        options.model_path,
                                        options.gpu,
                                        'checkpoint.pth.tar',
                                        device_index=options.device)
    if options.gpu:
        device = torch.device('cuda:{}'.format(options.device))
        decoder = decoder.to(device)

    options.beginning_epoch = current_epoch + 1

    # Define criterion and optimizer
    criterion = torch.nn.MSELoss()
    optimizer_path = path.join(options.model_path, 'optimizer.pth.tar')
    optimizer = load_optimizer(optimizer_path, decoder)

    # Define output directories
    log_dir = path.join(options.output_dir, 'log_dir',
                        'fold_%i' % options.split, 'ConvAutoencoder')
    visualization_dir = path.join(options.output_dir, 'visualize',
                                  'fold_%i' % options.split)
    model_dir = path.join(options.output_dir, 'best_model_dir',
                          'fold_%i' % options.split, 'ConvAutoencoder')

    print('Resuming the training task')
    train(decoder, train_loader, valid_loader, criterion, optimizer, False,
          log_dir, model_dir, options)

    if options.visualization:
        print("Visualization of autoencoder reconstruction")
        best_decoder, _ = load_model(decoder,
                                     path.join(model_dir, "best_loss"),
                                     options.gpu,
                                     filename='model_best.pth.tar',
                                     device_index=options.device)
        visualize_image(best_decoder,
                        valid_loader,
                        path.join(visualization_dir, "validation"),
                        nb_images=3,
                        device_index=options.device)
        visualize_image(best_decoder,
                        train_loader,
                        path.join(visualization_dir, "train"),
                        nb_images=3,
                        device_index=options.device)
    del decoder
    torch.cuda.empty_cache()

    total_time = time() - total_time
    print("Total time of computation: %d s" % total_time)
Ejemplo n.º 4
0
def inference_from_model(caps_dir,
                         tsv_path,
                         model_path=None,
                         json_file=None,
                         prefix=None,
                         no_labels=False,
                         gpu=True,
                         prepare_dl=False):
    """
    Inference from previously trained model.

    This functions uses a previously trained model to classify the input(s).
    The model is stored in the variable model_path and it assumes the folder
    structure given by the training stage. Particullary to have a prediction at
    image level, it assumes that results of the validation set are stored in
    the model_path folder in order to perform soft-voiting at the slice/patch
    level and also for multicnn.

    Args:

    caps_dir: folder containing the tensor files (.pt version of MRI)
    tsv_path: file with the name of the MRIs to process (single or multiple)
    model_path: file with the model (pth format).
    json_file: file containing the training parameters.
    output_dir_arg: folder where results are stored. If None it uses current
    structure.
    no_labels: by default is false. In that case, output writes a file named
    measurements.tsv
    gpu: if true, it uses gpu.
    prepare_dl: if true, uses extracted patches/slices otherwise extract them
    on-the-fly.

    Returns:

    Files written in the output folder with prediction results and metrics. By
    default the output folder is named cnn_classification and it is inside the
    model_folder.

    Raises:


    """
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("model_path",
                        type=str,
                        help="Path to the trained model folder.")
    options = parser.parse_args([model_path])
    options = read_json(options, json_path=json_file)
    num_cnn = compute_num_cnn(caps_dir, tsv_path, options, "classify")
    print("Load model with these options:")
    print(options)

    # Overwrite options with user input
    options.use_cpu = not gpu
    options.prepare_dl = prepare_dl
    # Define the path
    currentDirectory = pathlib.Path(model_path)
    # Search for 'fold-*' pattern
    currentPattern = "fold-*"

    best_model = {
        'best_acc': 'best_balanced_accuracy',
        'best_loss': 'best_loss'
    }

    # loop depending the number of folds found in the model folder
    for fold_dir in currentDirectory.glob(currentPattern):
        fold = int(str(fold_dir).split("-")[-1])
        fold_path = join(model_path, fold_dir)
        model_path = join(fold_path, 'models')

        if options.mode_task == 'multicnn':
            for cnn_dir in listdir(model_path):
                if not exists(
                        join(model_path, cnn_dir, best_model['best_acc'],
                             'model_best.pth.tar')):
                    raise FileNotFoundError(
                        errno.ENOENT, strerror(errno.ENOENT),
                        join(model_path, cnn_dir, best_model['best_acc'],
                             'model_best.pth.tar'))

        else:
            full_model_path = join(model_path, best_model['best_acc'])
            if not exists(join(full_model_path, 'model_best.pth.tar')):
                raise FileNotFoundError(
                    errno.ENOENT, strerror(errno.ENOENT),
                    join(full_model_path, 'model_best.pth.tar'))

        performance_dir = join(fold_path, 'cnn_classification',
                               best_model['best_acc'])
        if not exists(performance_dir):
            makedirs(performance_dir)

        # It launch the corresponding function, depending on the mode.
        infered_classes, metrics = inference_from_model_generic(
            caps_dir, tsv_path, model_path, options, num_cnn=num_cnn)

        # Prepare outputs
        usr_prefix = str(prefix)

        # Write output files at %mode level
        print("Prediction results and metrics are written in the "
              "following folder: %s" % performance_dir)

        mode_level_to_tsvs(currentDirectory,
                           infered_classes,
                           metrics,
                           fold,
                           best_model['best_acc'],
                           options.mode,
                           dataset=usr_prefix)

        # Soft voting
        if hasattr(options, 'selection_threshold'):
            selection_thresh = options.selection_threshold
        else:
            selection_thresh = 0.8

        # Write files at the image level (for patch, roi and slice).
        # It assumes the existance of validation files to perform soft-voting
        if options.mode in ["patch", "roi", "slice"]:
            soft_voting_to_tsvs(currentDirectory,
                                fold,
                                best_model["best_acc"],
                                options.mode,
                                usr_prefix,
                                num_cnn=num_cnn,
                                selection_threshold=selection_thresh)
Ejemplo n.º 5
0
def inference_from_model(caps_dir,
                         tsv_path,
                         model_path=None,
                         json_file=None,
                         prefix=None,
                         labels=True,
                         gpu=True,
                         num_workers=0,
                         batch_size=1,
                         prepare_dl=False,
                         selection_metrics=None,
                         diagnoses=None,
                         logger=None):
    """
    Inference from previously trained model.

    This functions uses a previously trained model to classify the input(s).
    The model is stored in the variable model_path and it assumes the folder
    structure given by the training stage. Particullary to have a prediction at
    image level, it assumes that results of the validation set are stored in
    the model_path folder in order to perform soft-voiting at the slice/patch
    level and also for multicnn.

    Args:
        caps_dir: folder containing the tensor files (.pt version of MRI)
        tsv_path: file with the name of the MRIs to process (single or multiple)
        model_path: file with the model (pth format).
        json_file: file containing the training parameters.
        prefix: prefix of all classification outputs.
        labels: by default is True. If False no metrics tsv files will be written.
        measurements.tsv
        gpu: if true, it uses gpu.
        num_workers: num_workers used in DataLoader
        batch_size: batch size of the DataLoader
        prepare_dl: if true, uses extracted patches/slices otherwise extract them
        on-the-fly.
        selection_metrics: list of metrics to find best models to be evaluated.
        diagnoses: list of diagnoses to be tested if tsv_path is a folder.
        logger: Logger instance.

    Returns:
        Files written in the output folder with prediction results and metrics. By
        default the output folder is named cnn_classification and it is inside the
        model_folder.

    Raises:


    """
    import argparse
    import logging

    if logger is None:
        logger = logging

    parser = argparse.ArgumentParser()
    parser.add_argument("model_path",
                        type=str,
                        help="Path to the trained model folder.")
    options = parser.parse_args([model_path])
    options = read_json(options, json_path=json_file)

    logger.debug("Load model with these options:")
    logger.debug(options)

    # Overwrite options with user input
    options.use_cpu = not gpu
    options.nproc = num_workers
    options.batch_size = batch_size
    options.prepare_dl = prepare_dl
    if diagnoses is not None:
        options.diagnoses = diagnoses

    options = translate_parameters(options)

    if options.mode_task == "multicnn":
        num_cnn = compute_num_cnn(caps_dir, tsv_path, options, "test")
    else:
        num_cnn = None
    # Define the path
    currentDirectory = pathlib.Path(model_path)
    # Search for 'fold-*' pattern
    currentPattern = "fold-*"

    # loop depending the number of folds found in the model folder
    for fold_dir in currentDirectory.glob(currentPattern):
        fold = int(str(fold_dir).split("-")[-1])
        fold_path = join(model_path, fold_dir)
        model_path = join(fold_path, 'models')

        for selection_metric in selection_metrics:

            if options.mode_task == 'multicnn':
                for cnn_dir in listdir(model_path):
                    if not exists(
                            join(model_path, cnn_dir, "best_%s" %
                                 selection_metric, 'model_best.pth.tar')):
                        raise FileNotFoundError(
                            errno.ENOENT, strerror(errno.ENOENT),
                            join(model_path, cnn_dir,
                                 "best_%s" % selection_metric,
                                 'model_best.pth.tar'))

            else:
                full_model_path = join(model_path,
                                       "best_%s" % selection_metric)
                if not exists(join(full_model_path, 'model_best.pth.tar')):
                    raise FileNotFoundError(
                        errno.ENOENT, strerror(errno.ENOENT),
                        join(full_model_path, 'model_best.pth.tar'))

            performance_dir = join(fold_path, 'cnn_classification',
                                   'best_%s' % selection_metric)

            makedirs(performance_dir, exist_ok=True)

            # It launch the corresponding function, depending on the mode.
            inference_from_model_generic(caps_dir,
                                         tsv_path,
                                         model_path,
                                         options,
                                         prefix,
                                         currentDirectory,
                                         fold,
                                         "best_%s" % selection_metric,
                                         labels=labels,
                                         num_cnn=num_cnn,
                                         logger=logger)

            # Soft voting
            if hasattr(options, 'selection_threshold'):
                selection_thresh = options.selection_threshold
            else:
                selection_thresh = 0.8

            # Write files at the image level (for patch, roi and slice).
            # It assumes the existance of validation files to perform soft-voting
            if options.mode in ["patch", "roi", "slice"]:
                soft_voting_to_tsvs(currentDirectory,
                                    fold,
                                    "best_%s" % selection_metric,
                                    options.mode,
                                    prefix,
                                    num_cnn=num_cnn,
                                    selection_threshold=selection_thresh,
                                    use_labels=labels,
                                    logger=logger)

            logger.info("Prediction results and metrics are written in the "
                        "following folder: %s" % performance_dir)
Ejemplo n.º 6
0
def main(options):

    options = read_json(options)

    if options.evaluation_steps % options.accumulation_steps != 0 and options.evaluation_steps != 1:
        raise Exception(
            'Evaluation steps %d must be a multiple of accumulation steps %d' %
            (options.evaluation_steps, options.accumulation_steps))

    if options.minmaxnormalization:
        transformations = MinMaxNormalization()
    else:
        transformations = None

    total_time = time()

    # Get the data.
    training_tsv, valid_tsv = load_data(options.diagnosis_path,
                                        options.diagnoses, options.split,
                                        options.n_splits, options.baseline)

    data_train = MRIDataset(options.input_dir,
                            training_tsv,
                            transform=transformations,
                            preprocessing=options.preprocessing)
    data_valid = MRIDataset(options.input_dir,
                            valid_tsv,
                            transform=transformations,
                            preprocessing=options.preprocessing)

    # Use argument load to distinguish training and testing
    train_loader = DataLoader(data_train,
                              batch_size=options.batch_size,
                              shuffle=True,
                              num_workers=options.num_workers,
                              pin_memory=True,
                              drop_last=options.drop_last)

    valid_loader = DataLoader(data_valid,
                              batch_size=options.batch_size,
                              shuffle=False,
                              num_workers=options.num_workers,
                              pin_memory=True,
                              drop_last=options.drop_last)

    # Initialize the model
    print('Initialization of the model')
    if options.model == 'UNet3D':
        print('********** init UNet3D model for test! **********')
        model = create_model(options.model,
                             gpu=options.gpu,
                             dropout=options.dropout,
                             device_index=options.device,
                             in_channels=options.in_channels,
                             out_channels=options.out_channels,
                             f_maps=options.f_maps,
                             layer_order=options.layer_order,
                             num_groups=options.num_groups,
                             num_levels=options.num_levels)
    elif options.model == 'ResidualUNet3D':
        print('********** init ResidualUNet3D model for test! **********')
        model = create_model(options.model,
                             gpu=options.gpu,
                             dropout=options.dropout,
                             device_index=options.device,
                             in_channels=options.in_channels,
                             out_channels=options.out_channels,
                             f_maps=options.f_maps,
                             layer_order=options.layer_order,
                             num_groups=options.num_groups,
                             num_levels=options.num_levels)
    elif options.model == 'UNet3D_add_more_fc':
        print('********** init UNet3D_add_more_fc model for test! **********')
        model = create_model(options.model,
                             gpu=options.gpu,
                             dropout=options.dropout,
                             device_index=options.device,
                             in_channels=options.in_channels,
                             out_channels=options.out_channels,
                             f_maps=options.f_maps,
                             layer_order=options.layer_order,
                             num_groups=options.num_groups,
                             num_levels=options.num_levels)
    elif options.model == 'ResidualUNet3D_add_more_fc':
        print(
            '********** init ResidualUNet3D_add_more_fc model for test! **********'
        )
        model = create_model(options.model,
                             gpu=options.gpu,
                             dropout=options.dropout,
                             device_index=options.device,
                             in_channels=options.in_channels,
                             out_channels=options.out_channels,
                             f_maps=options.f_maps,
                             layer_order=options.layer_order,
                             num_groups=options.num_groups,
                             num_levels=options.num_levels)
    elif options.model == 'VoxCNN':
        print('********** init VoxCNN model for test! **********')
        model = create_model(options.model,
                             gpu=options.gpu,
                             device_index=options.device)
    elif options.model == 'ConvNet3D':
        print('********** init ConvNet3D model for test! **********')
        model = create_model(options.model,
                             gpu=options.gpu,
                             device_index=options.device)
    elif 'gcn' in options.model:
        print('********** init {}-{} model for test! **********'.format(
            options.model, options.gnn_type))
        model = create_model(
            options.model,
            gpu=options.gpu,
            device_index=options.device,
            gnn_type=options.gnn_type,
            gnn_dropout=options.gnn_dropout,
            gnn_dropout_adj=options.gnn_dropout_adj,
            gnn_non_linear=options.gnn_non_linear,
            gnn_undirected=options.gnn_undirected,
            gnn_self_loop=options.gnn_self_loop,
            gnn_threshold=options.gnn_threshold,
        )
    elif options.model == 'ROI_GCN':
        print('********** init ROI_GCN model for test! **********')
        model = create_model(options.model,
                             gpu=options.gpu,
                             device_index=options.device,
                             gnn_type=options.gnn_type,
                             gnn_dropout=options.gnn_dropout,
                             gnn_dropout_adj=options.gnn_dropout_adj,
                             gnn_non_linear=options.gnn_non_linear,
                             gnn_undirected=options.gnn_undirected,
                             gnn_self_loop=options.gnn_self_loop,
                             gnn_threshold=options.gnn_threshold,
                             nodel_vetor_layer=options.nodel_vetor_layer,
                             classify_layer=options.classify_layer,
                             num_node_features=options.num_node_features,
                             num_class=options.num_class,
                             roi_size=options.roi_size,
                             num_nodes=options.num_nodes,
                             gnn_pooling_layers=options.gnn_pooling_layers,
                             global_sort_pool_k=options.global_sort_pool_k,
                             layers=options.layers,
                             shortcut_type=options.shortcut_type,
                             use_nl=options.use_nl,
                             dropout=options.dropout,
                             device=options.device)
    elif options.model == 'SwinTransformer3d':
        print('********** init SwinTransformer3d model for test! **********')
        model = create_model(
            options.model,
            gpu=options.gpu,
            dropout=options.dropout,
            device_index=options.device,
            sw_patch_size=options.sw_patch_size,
            window_size=options.window_size,
            mlp_ratio=options.mlp_ratio,
            drop_rate=options.drop_rate,
            attn_drop_rate=options.attn_drop_rate,
            drop_path_rate=options.drop_path_rate,
            qk_scale=options.qk_scale,
            embed_dim=options.embed_dim,
            depths=options.depths,
            num_heads=options.num_heads,
            qkv_bias=options.qkv_bias,
            ape=options.ape,
            patch_norm=options.patch_norm,
        )
    else:
        model = create_model(options.model,
                             gpu=options.gpu,
                             dropout=options.dropout,
                             device_index=options.device)
    model_dir = path.join(options.model_path, "best_model_dir", "CNN",
                          "fold_" + str(options.split))
    model, current_epoch = load_model(model,
                                      model_dir,
                                      options.gpu,
                                      'checkpoint.pth.tar',
                                      device_index=options.device)

    options.beginning_epoch = current_epoch + 1

    # Define criterion and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer_path = path.join(options.model_path, 'optimizer.pth.tar')
    optimizer = load_optimizer(optimizer_path, model)

    # Define output directories
    log_dir = path.join(options.output_dir, 'log_dir',
                        'fold_%i' % options.split, 'CNN')
    model_dir = path.join(options.output_dir, 'best_model_dir',
                          'fold_%i' % options.split, 'CNN')

    print('Resuming the training task')
    train(model, train_loader, valid_loader, criterion, optimizer, True,
          log_dir, model_dir, options)

    options.model_path = options.output_dir
    test_cnn(train_loader, "train", options.split, criterion, options)
    test_cnn(valid_loader, "validation", options.split, criterion, options)

    total_time = time() - total_time
    print("Total time of computation: %d s" % total_time)
Ejemplo n.º 7
0
def main(options):

    options = read_json(options)

    if options.evaluation_steps % options.accumulation_steps != 0 and options.evaluation_steps != 1:
        raise Exception(
            'Evaluation steps %d must be a multiple of accumulation steps %d' %
            (options.evaluation_steps, options.accumulation_steps))

    if options.minmaxnormalization:
        transformations = MinMaxNormalization()
    else:
        transformations = None

    total_time = time()

    # Get the data.
    training_tsv, valid_tsv = load_data(options.diagnosis_path,
                                        options.diagnoses, options.split,
                                        options.n_splits, options.baseline)

    data_train = MRIDataset(options.input_dir,
                            training_tsv,
                            transform=transformations,
                            preprocessing=options.preprocessing)
    data_valid = MRIDataset(options.input_dir,
                            valid_tsv,
                            transform=transformations,
                            preprocessing=options.preprocessing)

    # Use argument load to distinguish training and testing
    train_loader = DataLoader(data_train,
                              batch_size=options.batch_size,
                              shuffle=True,
                              num_workers=options.num_workers,
                              pin_memory=True)

    valid_loader = DataLoader(data_valid,
                              batch_size=options.batch_size,
                              shuffle=False,
                              num_workers=options.num_workers,
                              pin_memory=True)

    # Initialize the model
    print('Initialization of the model')
    model = create_model(options.model, options.gpu, dropout=options.dropout)
    model_dir = path.join(options.model_path, "best_model_dir", "CNN",
                          "fold_" + str(options.split))
    model, current_epoch = load_model(model, model_dir, options.gpu,
                                      'checkpoint.pth.tar')

    options.beginning_epoch = current_epoch + 1

    # Define criterion and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer_path = path.join(options.model_path, 'optimizer.pth.tar')
    optimizer = load_optimizer(optimizer_path, model)

    # Define output directories
    log_dir = path.join(options.output_dir, 'log_dir',
                        'fold_%i' % options.split, 'CNN')
    model_dir = path.join(options.output_dir, 'best_model_dir',
                          'fold_%i' % options.split, 'CNN')

    print('Resuming the training task')
    train(model, train_loader, valid_loader, criterion, optimizer, True,
          log_dir, model_dir, options)

    options.model_path = options.output_dir
    test_single_cnn(train_loader, "train", options.split, criterion, options)
    test_single_cnn(valid_loader, "validation", options.split, criterion,
                    options)

    total_time = time() - total_time
    print("Total time of computation: %d s" % total_time)