def evaluate_segmentation(patch_size, num_epochs, batch_size, lr, dropout,
                          num_patients_train, num_patients_val,
                          patients_segmentation, threshold, data_dirs, dataset,
                          executable_path, csv_path_per_patient, csv_path,
                          measures):
    """
        Main function for evaluating segmentation.

        :param patch_size: training patch size.
        :param num_epochs: number of epochs in training.
        :param batch_size: training batch size.
        :param lr: training learning rate.
        :param dropout: training dropout rate.
        :param num_patients_train: number of patients in training set.
        :param num_patients_val: number of patients in validation set.
        :param patients_segmentation: number of patients in the particular set on which the segmentation performance is
        evaluated.
        :param threshold: threshold value.
        :param data_dirs: directory where the segmentations are saved.
        :param dataset: train/val/test.
        :param executable_path:  path to EvaluateSegmentation.exe.
        :param csv_path_per_patient: path to csv file with results per patient.
        :param csv_path: path to csv file with results averaged over all patients.
        :param measures: string of measures to calculate.
        """
    # create the name of current run
    run_name = config.get_run_name(patch_size, num_epochs, batch_size, lr,
                                   dropout, num_patients_train,
                                   num_patients_val)
    print(run_name)

    xml_paths = []

    for patient in patients_segmentation:
        print(
            '________________________________________________________________________________'
        )
        print('patient:', patient)
        print('patch size', patch_size)
        print('batch size', batch_size)
        print('learning rate', lr)
        print('dropout', dropout)
        print('threshold', threshold)

        # load labels and segmentations
        label_path = data_dirs[dataset] + patient + '_label.nii'
        segmentation_path = config.get_probs_filepath(run_name, patient,
                                                      dataset)
        # for saving results of evaluate segmentation to xml and to csv
        xml_path_patient = config.get_eval_segment_dataset_xmlpath(
            run_name, patient, dataset)
        xml_paths.append(xml_path_patient)

        # compare the segmentation with ground truth and save the xml file in the results folder
        segment_comparison(label_path, segmentation_path, executable_path,
                           xml_path_patient, threshold, measures)

        # parse the generated xmls and insert two more metrics: Sensibility and Conformity
        sensibility_conformity_to_xml(xml_path_patient)

        # parse the xml files in each folder, do stats and save the dataframes as csvs with the parse_xml
        # function
        run_params = {
            'patch size': patch_size,
            'num epochs': num_epochs,
            'batch size': batch_size,
            'learning rate': lr,
            'dropout': dropout,
            'patient': patient
        }
        parse_xml_to_csv(xml_path_patient, csv_path_per_patient, run_params)

    run_params = {
        'patch size': patch_size,
        'num epochs': num_epochs,
        'batch size': batch_size,
        'learning rate': lr,
        'dropout': dropout
    }
    parse_xml_to_csv_avg_for_patients(xml_paths, csv_path, run_params)
コード例 #2
0
for patch_size in patch_size_list:
    for batch_size in batch_size_list:
        for lr in learning_rate_list:
            for dropout in dropout_list:
                print(
                    '________________________________________________________________________________'
                )
                print('patch size', patch_size)
                print('batch size', batch_size)
                print('learning rate', lr)
                print('dropout', dropout)

                # create the name of current run
                run_name = config.get_run_name(patch_size, num_epochs,
                                               batch_size, lr, dropout,
                                               num_patients_train,
                                               num_patients_val)
                print(run_name)

                # -----------------------------------------------------------
                # LOADING MODEL DATA
                # -----------------------------------------------------------
                train_X, train_y, val_X, val_y, mean, std = create_training_datasets(
                    patch_size, config.NUM_PATCHES, config.PATIENTS)

                # -----------------------------------------------------------
                # LOADING MODEL
                # -----------------------------------------------------------
                model_filepath = config.get_model_filepath(run_name)
                model = load_model(model_filepath,
                                   custom_objects={
コード例 #3
0
def measure_performance_and_save_to_csv(patch_size, num_epochs, batch_size, lr,
                                        dropout, threshold, num_patients_train,
                                        num_patients_val,
                                        patients_segmentation, data_dirs,
                                        dataset, result_file):
    print(
        '________________________________________________________________________________'
    )
    print('patch size', patch_size)
    print('batch size', batch_size)
    print('learning rate', lr)
    print('dropout', dropout)
    print('threshold', threshold)

    start_row = time.time()

    # create the name of current run
    run_name = config.get_run_name(patch_size, num_epochs, batch_size, lr,
                                   dropout, num_patients_train,
                                   num_patients_val)
    print(run_name)

    # -----------------------------------------------------------
    # TRAINING RESULTS
    # -----------------------------------------------------------
    train_metadata_filepath = config.get_train_metadata_filepath(run_name)
    with open(train_metadata_filepath, 'rb') as handle:
        train_metadata = pickle.load(handle)

    print('Train params:')
    print(train_metadata['params'])
    print('Train performance:')
    tr_perf = train_metadata['performance']
    print(tr_perf)

    row = [
        patch_size, num_epochs, batch_size, lr, dropout,
        tr_perf['train_true_positives'], tr_perf['train_false_negatives'],
        tr_perf['train_false_positives'], tr_perf['train_true_negatives'],
        tr_perf['train_auc'], tr_perf['train_acc'], tr_perf['train_avg_acc'],
        tr_perf['train_dice'], tr_perf['val_true_positives'],
        tr_perf['val_false_negatives'], tr_perf['val_false_positives'],
        tr_perf['val_true_negatives'], tr_perf['val_auc'], tr_perf['val_acc'],
        tr_perf['val_avg_acc'], tr_perf['val_dice']
    ]

    # -----------------------------------------------------------
    # VALIDATION / TEST RESULTS
    # -----------------------------------------------------------
    tp_list = []
    fn_list = []
    fp_list = []
    tn_list = []
    auc_list = []
    acc_list = []
    avg_acc_list = []
    dice_list = []

    for patient in patients_segmentation:
        print(patient)
        print('> Loading label...')
        label_mat = helper.load_nifti_mat_from_file(
            data_dirs[dataset] + patient + '_label.nii')  # values 0 or 1
        print('> Loading probability map...')
        prob_mat = helper.load_nifti_mat_from_file(
            config.get_probs_filepath(run_name, patient,
                                      dataset))  # values between 0 and 1
        pred_class = (prob_mat > threshold).astype(
            np.uint8)  # convert from boolean to int, values 0 or 1

        print()
        print('Computing performance measures...')
        label_mat_f = np.asarray(label_mat).flatten()
        prob_mat_f = np.asarray(prob_mat).flatten()
        pred_classes_f = np.asarray(pred_class).flatten()
        val_auc = roc_auc_score(label_mat_f, prob_mat_f)
        val_acc = accuracy_score(label_mat_f, pred_classes_f)
        val_avg_acc, val_tn, val_fp, val_fn, val_tp = avg_class_acc(
            label_mat_f, pred_classes_f)
        val_dice = f1_score(label_mat_f, pred_classes_f)

        tp_list.append(val_tp)
        fn_list.append(val_fn)
        fp_list.append(val_fp)
        tn_list.append(val_tn)
        auc_list.append(val_auc)
        acc_list.append(val_acc)
        avg_acc_list.append(val_avg_acc)
        dice_list.append(val_dice)

    row = row + [
        np.mean(tp_list),
        np.mean(fn_list),
        np.mean(fp_list),
        np.mean(tn_list),
        np.mean(auc_list),
        np.mean(acc_list),
        np.mean(avg_acc_list),
        np.mean(dice_list)
    ]
    print('Complete row:', row)

    print('Writing to csv...')
    with open(result_file, 'a') as f:
        writer = csv.writer(f)
        writer.writerow(row)

    duration_row = int(time.time() - start_row)
    print('performance assessment took:', (duration_row // 3600) % 60, 'hours',
          (duration_row // 60) % 60, 'minutes', duration_row % 60, 'seconds')
コード例 #4
0
def predict_and_save(patch_size, num_epochs, batch_size, lr, dropout, patient, num_patients_train, num_patients_val,
                     data_dirs, dataset):
    print('________________________________________________________________________________')
    print('patch size', patch_size)
    print('batch size', batch_size)
    print('learning rate', lr)
    print('dropout', dropout)
    print('patient:', patient)

    # create the name of current run
    run_name = config.get_run_name(patch_size, num_epochs, batch_size, lr, dropout, num_patients_train,
                                   num_patients_val)
    print(run_name)

    # -----------------------------------------------------------
    # LOADING MODEL, RESULTS AND WHOLE BRAIN MATRICES
    # -----------------------------------------------------------
    model_filepath = config.get_model_filepath(run_name)
    print(model_filepath)
    model = load_model(model_filepath,
                       custom_objects={'dice_coef_loss': dice_coef_loss, 'dice_coef': dice_coef})

    train_metadata_filepath = config.get_train_metadata_filepath(run_name)
    with open(train_metadata_filepath, 'rb') as handle:
        train_metadata = pickle.load(handle)
    print('train setting: ', train_metadata['params'])

    print('> Loading image...')
    img_mat = helper.load_nifti_mat_from_file(
        data_dirs[dataset] + patient + '_img.nii')  # values between 0 and 255
    print('> Loading mask...')
    mask_mat = helper.load_nifti_mat_from_file(
        data_dirs[dataset] + patient + '_mask.nii')  # values 0 and 1

    # -----------------------------------------------------------
    # PREDICTION
    # -----------------------------------------------------------
    # the segmentation is going to be saved in this probability matrix
    prob_mat = np.zeros(img_mat.shape, dtype=np.float32)
    x_dim, y_dim, z_dim = prob_mat.shape

    # get the x, y and z coordinates where there is brain
    x, y, z = np.where(mask_mat)
    print('x shape:', x.shape)
    print('y shape:', y.shape)
    print('z shape:', z.shape)

    # get the z slices with brain
    z_slices = np.unique(z)

    # start cutting out and predicting the patches
    starttime_total = time.time()
    # proceed slice by slice
    for i in z_slices:
        print('Slice:', i)
        starttime_slice = time.time()
        slice_vox_inds = np.where(z == i)
        # find all x and y coordinates with brain in given slice
        x_in_slice = x[slice_vox_inds]
        y_in_slice = y[slice_vox_inds]
        # find min and max x and y coordinates
        slice_x_min = min(x_in_slice)
        slice_x_max = max(x_in_slice)
        slice_y_min = min(y_in_slice)
        slice_y_max = max(y_in_slice)

        # calculate number of predicted patches in x and y direction in given slice
        num_of_x_patches = np.int(np.ceil((slice_x_max - slice_x_min) / patch_size))
        num_of_y_patches = np.int(np.ceil((slice_y_max - slice_y_min) / patch_size))
        print('num x patches', num_of_x_patches)
        print('num y patches', num_of_y_patches)

        # predict patch by patch in given slice
        for j in range(num_of_x_patches):
            for k in range(num_of_y_patches):
                # find the starting and ending x and y coordinates of given patch
                patch_start_x = slice_x_min + patch_size * j
                patch_end_x = slice_x_min + patch_size * (j + 1)
                patch_start_y = slice_y_min + patch_size * k
                patch_end_y = slice_y_min + patch_size * (k + 1)
                # if the dimensions of the probability matrix are exceeded shift back the last patch
                if patch_end_x > x_dim:
                    patch_end_x = slice_x_max
                    patch_start_x = slice_x_max - patch_size
                if patch_end_y > y_dim:
                    patch_end_y = slice_y_max
                    patch_start_y = slice_y_max - patch_size

                # get the patch with the found coordinates from the image matrix
                img_patch = img_mat[patch_start_x: patch_end_x, patch_start_y: patch_end_y, i]

                # normalize the patch with mean and standard deviation calculated over training set
                img_patch = img_patch.astype(np.float)
                img_patch -= train_metadata['params']['mean']
                img_patch /= train_metadata['params']['std']

                # predict the patch with the model and save to probability matrix
                prob_mat[patch_start_x: patch_end_x, patch_start_y: patch_end_y, i] = np.reshape(
                    model.predict(
                        np.reshape(img_patch,
                                   (1, patch_size, patch_size, 1)), batch_size=1, verbose=0),
                    (patch_size, patch_size))

        # how long does the prediction take for one slice
        duration_slice = time.time() - starttime_slice
        print('prediction in slice took:', (duration_slice // 3600) % 60, 'hours',
              (duration_slice // 60) % 60, 'minutes',
              duration_slice % 60, 'seconds')

    # how long does the prediction take for a patient
    duration_total = time.time() - starttime_total
    print('prediction in total took:', (duration_total // 3600) % 60, 'hours',
          (duration_total // 60) % 60, 'minutes',
          duration_total % 60, 'seconds')

    # -----------------------------------------------------------
    # SAVE AS NIFTI
    # -----------------------------------------------------------
    helper.create_and_save_nifti(prob_mat, config.get_probs_filepath(run_name, patient, dataset))
コード例 #5
0
def train_and_save(train_X, train_y, val_X, val_y, patch_size, num_epochs, batch_size, lr, dropout, num_channels,
                   activation, final_activation, optimizer, loss, metrics, num_patches, factor_train_samples, mean, std,
                   threshold, rotation_range, horizontal_flip, vertical_flip, shear_range, width_shift_range,
                   height_shift_range):
    print('patch size', patch_size)
    print('number of epochs', num_epochs)
    print('batch size', batch_size)
    print('learning rate', lr)
    print('dropout', dropout)

    # create the name of current run
    run_name = config.get_run_name(patch_size, num_epochs, batch_size, lr, dropout,
                                   len(train_X) // num_patches, len(val_X) // num_patches)
    model_filepath = config.get_model_filepath(run_name)
    train_metadata_filepath = config.get_train_metadata_filepath(run_name)

    # if model does not exit, train it
    loading = False
    if not os.path.isfile(model_filepath):
        # -----------------------------------------------------------
        # CREATING MODEL
        # -----------------------------------------------------------
        model = get_unet(patch_size, num_channels, activation, final_activation, optimizer, lr, dropout, loss, metrics)

        # -----------------------------------------------------------
        # CREATING DATA GENERATOR
        # -----------------------------------------------------------
        # transforming images and masks together
        data_gen_args = dict(rotation_range=rotation_range,
                             horizontal_flip=horizontal_flip,
                             vertical_flip=vertical_flip,
                             shear_range=shear_range,
                             width_shift_range=width_shift_range,
                             height_shift_range=height_shift_range,
                             fill_mode='constant')
        X_datagen = ImageDataGenerator(**data_gen_args)
        y_datagen = ImageDataGenerator(**data_gen_args)

        # Provide the same seed and keyword arguments to the fit and flow methods
        seed = 1
        X_datagen.fit(train_X, augment=True, seed=seed)
        y_datagen.fit(train_y, augment=True, seed=seed)

        X_generator = X_datagen.flow(train_X, batch_size=batch_size, seed=seed, shuffle=True)
        y_generator = y_datagen.flow(train_y, batch_size=batch_size, seed=seed, shuffle=True)

        # combine generators into one which yields image and label
        train_generator = zip(X_generator, y_generator)

        # sanity check, visualise augmented patches
        # pyplot.figure()
        # shift_plotting = 0
        # for i in range(0, 9):
        #     pyplot.suptitle('original patches')
        #     pyplot.subplot(330 + 1 + i)
        #     pyplot.imshow(train_X[i + shift_plotting].reshape(patch_size, patch_size))
        # pyplot.figure()
        # for i in range(0, 9):
        #     pyplot.subplot(330 + 1 + i)
        #     pyplot.imshow(train_y[i + shift_plotting].reshape(patch_size, patch_size))
        # pyplot.figure()
        # for X_batch in X_generator:
        #     pyplot.suptitle('flip, rotation 30°, shear 20°')
        #     for i in range(0, 9):
        #         pyplot.subplot(330 + 1 + i)
        #         pyplot.imshow(X_batch[i + shift_plotting].reshape(patch_size, patch_size))
        #     break
        # pyplot.figure()
        # for y_batch in y_generator:
        #     for i in range(0, 9):
        #         pyplot.subplot(330 + 1 + i)
        #         pyplot.imshow(y_batch[i + shift_plotting].reshape(patch_size, patch_size))
        #     break
        # pyplot.show()

        # -----------------------------------------------------------
        # TRAINING MODEL
        # -----------------------------------------------------------
        start_train = time.time()
        # keras callback for saving the training history to csv file
        csv_logger = CSVLogger(config.get_train_history_filepath(run_name))
        # training
        history = model.fit_generator(train_generator, validation_data=(val_X, val_y),
                                      steps_per_epoch=factor_train_samples * len(train_X) // batch_size,
                                      epochs=num_epochs,
                                      verbose=2, shuffle=True, callbacks=[csv_logger])

        duration_train = int(time.time() - start_train)
        print('training took:', (duration_train // 3600) % 60, 'hours', (duration_train // 60) % 60,
              'minutes', duration_train % 60,
              'seconds')

        # -----------------------------------------------------------
        # SAVING MODEL
        # -----------------------------------------------------------
        print('Saving model to ', model_filepath)
        model.save(model_filepath)

        print('Saving params to ', train_metadata_filepath)
        history.params['batchsize'] = batch_size
        history.params['dropout'] = dropout
        history.params['patch_size'] = patch_size
        history.params['learning_rate'] = lr
        history.params['loss'] = loss
        history.params['mean'] = mean  # mean used for training data centering
        history.params['std'] = std  # std used for training data normalization
        history.params['samples'] = factor_train_samples * len(train_X)
        history.params['val_samples'] = len(val_X)
        history.params['total_time'] = duration_train
        history.params['rotation range'] = rotation_range
        history.params['horizontal_flip'] = horizontal_flip
        history.params['vertical_flip'] = vertical_flip
        history.params['shear_range'] = shear_range
        history.params['width_shift_range'] = width_shift_range
        history.params['height_shift_range'] = height_shift_range
        results = {'params': history.params, 'history': history.history}
        with open(train_metadata_filepath, 'wb') as handle:
            pickle.dump(results, handle)

    else:  # model exists, load it
        # -----------------------------------------------------------
        # LOADING MODEL
        # -----------------------------------------------------------
        loading = True
        print('Loading  model from', model_filepath)
        model = load_model(model_filepath,
                           custom_objects={'dice_coef_loss': dice_coef_loss, 'dice_coef': dice_coef})
        model.summary()
        with open(train_metadata_filepath, 'rb') as handle:
            history = pickle.load(handle)

    # -----------------------------------------------------------
    # PERFORMANCE MEASURES
    # -----------------------------------------------------------
    start_perf = time.time()
    train_y_pred_probs = model.predict(train_X, batch_size=batch_size, verbose=1)
    train_y_pred_class = (train_y_pred_probs > threshold).astype(
        np.uint8)  # convert from boolean to int, values 0 or 1
    val_y_pred_probs = model.predict(val_X, batch_size=batch_size, verbose=1)
    val_y_pred_class = (val_y_pred_probs > threshold).astype(
        np.uint8)  # convert from boolean to int, values 0 or 1

    print()
    print("Performance:")

    train_y_f = train_y.flatten()
    train_y_pred_probs_f = train_y_pred_probs.flatten()
    train_y_pred_class_f = train_y_pred_class.flatten()

    val_y_f = val_y.flatten()
    val_y_pred_probs_f = val_y_pred_probs.flatten()
    val_y_pred_class_f = val_y_pred_class.flatten()

    train_auc = roc_auc_score(train_y_f, train_y_pred_probs_f)
    train_acc = accuracy_score(train_y_f, train_y_pred_class_f)
    train_avg_acc, train_tn, train_fp, train_fn, train_tp = avg_class_acc(train_y_f, train_y_pred_class_f)
    train_dice = f1_score(train_y_f, train_y_pred_class_f)

    val_auc = roc_auc_score(val_y_f, val_y_pred_probs_f)
    val_acc = accuracy_score(val_y_f, val_y_pred_class_f)
    val_avg_acc, val_tn, val_fp, val_fn, val_tp = avg_class_acc(val_y_f, val_y_pred_class_f)
    val_dice = f1_score(val_y_f, val_y_pred_class_f)

    print('train auc:', train_auc)
    print('train acc:', train_acc)
    print('train avg acc:', train_avg_acc)
    print('train dice:', train_dice)

    print('val auc:', val_auc)
    print('val acc:', val_acc)
    print('val avg acc:', val_avg_acc)
    print('val dice:', val_dice)

    duration_perf = int(time.time() - start_perf)
    print('performance assessment took:', (duration_perf // 3600) % 60, 'hours', (duration_perf // 60) % 60,
          'minutes',
          duration_perf % 60,
          'seconds')
    if not loading:
        duration_total = history.params['total_time'] + duration_perf
    else:
        duration_total = history['params']['total_time'] + duration_perf
    print('total time:', (duration_total // 3600) % 60, 'hours', (duration_total // 60) % 60, 'minutes',
          duration_total % 60,
          'seconds')

    # -----------------------------------------------------------
    # SAVING RESULTS
    # -----------------------------------------------------------
    print('Saving training results to ', train_metadata_filepath)
    performance = {'train_true_positives': train_tp, 'train_true_negatives': train_tn,
                   'train_false_positives': train_fp, 'train_false_negatives': train_fn,
                   'train_auc': train_auc, 'train_acc': train_acc, 'train_avg_acc': train_avg_acc,
                   'train_dice': train_dice,
                   'val_true_positives': val_tp, 'val_true_negatives': val_tn,
                   'val_false_positives': val_fp, 'val_false_negatives': val_fn,
                   'val_auc': val_auc, 'val_acc': val_acc, 'val_avg_acc': val_avg_acc,
                   'val_dice': val_dice}
    if not loading:
        history.params['total_time'] = duration_total
        results = {'params': history.params, 'history': history.history, 'performance': performance}
    else:
        history['params']['total_time'] = duration_total
        results = {'params': history['params'], 'history': history['history'], 'performance': performance}
    with open(train_metadata_filepath, 'wb') as handle:
        pickle.dump(results, handle)
    print('________________________________________________________________________________')