def pad_dataset_to_shape(dataset_path: str, shape: tuple):
    """
    Pad a dataset to a desired shape (equal padding on both sides)
    :param dataset_path: path do dataset
    :param shape: tuple, desired shape
    :return: saves new padded dataset to same location with "padded_" prefix
    """
    data_dir = os.path.dirname(dataset_path)
    filename = os.path.basename(dataset_path)
    (clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, brain_masks, ids, params) = \
        load_saved_data(data_dir, filename)

    padded_ct_inputs = np.array(
        [pad_to_shape(ct_input, shape) for ct_input in ct_inputs])
    padded_ct_lesion_GT = np.array(
        [pad_to_shape(lesion_GT, shape) for lesion_GT in ct_lesion_GT])
    padded_brain_masks = np.array(
        [pad_to_shape(brain_mask, shape) for brain_mask in brain_masks])

    if len(mri_inputs) == 0:
        padded_mri_inputs = []
    else:
        padded_mri_inputs = np.array(
            [pad_to_shape(mri_input, shape) for mri_input in mri_inputs])
    if len(mri_lesion_GT) == 0:
        padded_mri_lesion_GT = []
    else:
        padded_mri_lesion_GT = np.array(
            [pad_to_shape(lesion_GT, shape) for lesion_GT in mri_lesion_GT])

    dataset = (clinical_inputs, padded_ct_inputs, padded_ct_lesion_GT,
               padded_mri_inputs, padded_mri_lesion_GT, padded_brain_masks,
               ids, params)

    save_dataset(dataset, data_dir, 'padded_' + filename)
def add_noise_to_channel(ct_dataset:[str, np.ndarray], channel_index=5, outfile=None):

    if isinstance(ct_dataset, str):
        data_dir = os.path.dirname(ct_dataset)
        file_name = os.path.basename(ct_dataset)
        data = dl.load_saved_data(data_dir, file_name)
        clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, masks, ids, params = data
        if masks.size <= 1:
            masks = None
    else:
        ct_inputs = ct_dataset

    noise = np.random.choice([0, 1], ct_inputs[..., channel_index].shape, p=[0.998, 0.002])
    noised_ct_inputs = ct_inputs
    noised_ct_inputs[..., channel_index] += noise
    noised_ct_inputs[..., channel_index][noised_ct_inputs[..., channel_index] == 2] = 1

    if isinstance(ct_dataset, str):
        if outfile is None:
            outfile = os.path.basename(ct_dataset).split('.')[0] + f'_noisy{channel_index}.npz'
        params = params.item()

        dataset = (clinical_inputs, noised_ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, masks, ids, params)
        dl.save_dataset(dataset, os.path.dirname(ct_dataset), out_file_name=outfile)

    return ct_inputs
Ejemplo n.º 3
0
def downsample_dataset(data_dir, scale_factor: float, filename='data_set.npz'):

    (clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, brain_masks, ids, params) = \
        load_saved_data(data_dir, filename)

    final_ct_inputs = []
    final_ct_lesion_GT = []
    final_brain_masks = []

    for subj_idx, id, in enumerate(ids):
        print('Scaling', subj_idx, id)
        final_ct_inputs.append(
            rescale(ct_inputs[subj_idx],
                    (scale_factor, scale_factor, scale_factor, 1)))
        final_ct_lesion_GT.append(
            rescale(ct_lesion_GT[subj_idx],
                    (scale_factor, scale_factor, scale_factor)))
        final_brain_masks.append(
            rescale(brain_masks[subj_idx],
                    (scale_factor, scale_factor, scale_factor)))

    dataset = (clinical_inputs, np.array(final_ct_inputs),
               np.array(final_ct_lesion_GT), mri_inputs, mri_lesion_GT,
               np.array(final_brain_masks), ids, params)

    save_dataset(dataset, data_dir, f'scale{scale_factor}_' + filename)
Ejemplo n.º 4
0
def downsample_dataset_to_shape(data_dir,
                                target_shape: tuple,
                                filename='data_set.npz',
                                n_c_ct=4):
    (clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, brain_masks, ids, params) = \
        load_saved_data(data_dir, filename)

    final_ct_inputs = []
    final_ct_lesion_GT = []
    final_brain_masks = []

    for subj_idx, id, in enumerate(ids):
        print('Scaling', subj_idx, id)
        final_ct_inputs.append(
            resize(ct_inputs[subj_idx], target_shape + (n_c_ct, )))
        final_ct_lesion_GT.append(resize(ct_lesion_GT[subj_idx], target_shape))
        final_brain_masks.append(resize(brain_masks[subj_idx], target_shape))

    dataset = (clinical_inputs, np.array(final_ct_inputs),
               np.array(final_ct_lesion_GT), mri_inputs, mri_lesion_GT,
               np.array(final_brain_masks), ids, params)

    save_dataset(
        dataset, data_dir,
        f'shape{target_shape[0]}x{target_shape[1]}x{target_shape[2]}_' +
        filename)
Ejemplo n.º 5
0
def add_penumbra_map(ct_dataset:[str, np.ndarray], masks = None, tmax_channel = 0, outfile = None, one_hot_encode=False):

    if isinstance(ct_dataset, str):
        data_dir = os.path.dirname(ct_dataset)
        file_name = os.path.basename(ct_dataset)
        data = dl.load_saved_data(data_dir, file_name)
        clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, masks, ids, params = data
        if masks.size <= 1:
            masks = None
    else:
        ct_inputs = ct_dataset

    n_subj, n_x, n_y, n_z, n_c = ct_inputs.shape

    penumbra_mask = np.zeros((n_subj, n_x, n_y, n_z))
    penumbra_mask[ct_inputs[..., tmax_channel] > 6] = 1

    if masks is not None:
        # Restrict to defined prior mask
        restr_penumbra_mask = penumbra_mask * masks
    else:
        restr_penumbra_mask = penumbra_mask

    restr_penumbra_mask = np.expand_dims(restr_penumbra_mask, axis=-1)

    if one_hot_encode:
        class_0_core = 1 - restr_penumbra_mask
        class_1_core = restr_penumbra_mask
        restr_penumbra_mask = np.concatenate((class_0_core, class_1_core), axis=-1)

    ct_inputs = np.concatenate((ct_inputs, restr_penumbra_mask), axis=-1)

    if isinstance(ct_dataset, str):
        if outfile is None:
            if one_hot_encode:
                outfile = os.path.basename(ct_dataset).split('.')[0] + '_with_one_hot_encoded_core_penumbra.npz'
            else:
                outfile = os.path.basename(ct_dataset).split('.')[0] + '_with_penumbra.npz'
        params = params.item()
        if one_hot_encode:
            params['ct_sequences'].append('penumbra_Tmax_6_class0')
            params['ct_sequences'].append('penumbra_Tmax_6_class1')
        else:
            params['ct_sequences'].append('penumbra_Tmax_6')
        dataset = (clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, masks, ids, params)
        dl.save_dataset(dataset, os.path.dirname(ct_dataset), out_file_name=outfile)

    return ct_inputs, restr_penumbra_mask
def filter_for_clinical_param(dataset_path,
                              clinical_path,
                              clinical_parameter,
                              id_parameter='anonymised_id'):
    data_dir = os.path.dirname(dataset_path)
    file_name = os.path.basename(dataset_path)
    data = dl.load_saved_data(data_dir, file_name)
    clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, masks, ids, params = data

    clinical_df = pd.read_excel(clinical_path)

    indices_to_remove = []
    for idx, subj_id in enumerate(ids):
        if not subj_id in set(clinical_df[id_parameter]):
            print(
                f'{subj_id} not found in clinical database. Will be removed.')
            indices_to_remove.append(idx)
            continue
        if math.isnan(clinical_df.loc[clinical_df[id_parameter] == subj_id,
                                      clinical_parameter].iloc[0]):
            print(
                f'{subj_id} has no parameter "{clinical_parameter}" in clinical database. Will be removed.'
            )
            indices_to_remove.append(idx)

    ct_inputs = np.delete(ct_inputs, indices_to_remove, axis=0)
    masks = np.delete(masks, indices_to_remove, axis=0)
    ids = np.delete(ids, indices_to_remove, axis=0)

    assert ct_inputs.shape[0] == masks.shape[0]
    assert ct_inputs.shape[0] == ids.shape[0]

    if not ct_lesion_GT.size <= 1:
        ct_lesion_GT = np.delete(ct_lesion_GT, indices_to_remove, axis=0)
        assert ct_inputs.shape[0] == ct_lesion_GT.shape[0]

    if not len(mri_inputs) == 0:
        mri_inputs = np.delete(mri_inputs, indices_to_remove, axis=0)
        mri_lesion_GT = np.delete(mri_lesion_GT, indices_to_remove, axis=0)

    outfile = os.path.basename(dataset_path).split(
        '.')[0] + f'_with_{clinical_parameter}.npz'

    dataset = (clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs,
               mri_lesion_GT, masks, ids, params)
    dl.save_dataset(dataset,
                    os.path.dirname(dataset_path),
                    out_file_name=outfile)
Ejemplo n.º 7
0
def group_to_file(minimal_common_shape,
                  data_dir,
                  temp_dir,
                  filename='data_set.npz',
                  n_c=4):
    ids = np.load(os.path.join(data_dir, filename), allow_pickle=True)['ids']
    clinical_inputs = np.load(os.path.join(data_dir, filename),
                              allow_pickle=True)['clinical_inputs']
    params = np.load(os.path.join(data_dir, filename),
                     allow_pickle=True)['params']
    mri_inputs = np.load(os.path.join(data_dir, filename),
                         allow_pickle=True)['mri_inputs']
    mri_lesion_GT = np.load(os.path.join(data_dir, filename),
                            allow_pickle=True)['mri_lesion_GT']

    n_x, n_y, n_z = minimal_common_shape
    final_ct_inputs = np.empty((len(ids), n_x, n_y, n_z, n_c))
    final_ct_lesion_GT = np.empty((len(ids), n_x, n_y, n_z))
    final_brain_masks = np.empty((len(ids), n_x, n_y, n_z), dtype=bool)

    for subj_index, id in enumerate(ids):
        print('Loading', subj_index, id)

        load_padded_ct_input = nib.load(
            os.path.join(temp_dir, f'{id}_padded_ct_input.nii')).get_fdata()
        load_padded_ct_lesion = nib.load(
            os.path.join(temp_dir, f'{id}_padded_ct_lesion.nii')).get_fdata()
        load_padded_brain_mask = nib.load(
            os.path.join(temp_dir, f'{id}_padded_brain_mask.nii')).get_fdata()

        final_ct_inputs[subj_index] = load_padded_ct_input
        final_ct_lesion_GT[subj_index] = load_padded_ct_lesion
        final_brain_masks[subj_index] = load_padded_brain_mask

    dataset = (clinical_inputs, np.array(final_ct_inputs),
               np.array(final_ct_lesion_GT), mri_inputs, mri_lesion_GT,
               np.array(final_brain_masks), ids, params)

    save_dataset(dataset, data_dir, 'comMin_' + filename)
def mirror_dataset_images(dataset_path: str):

    data_dir = os.path.dirname(dataset_path)
    file_name = os.path.basename(dataset_path)
    data = dl.load_saved_data(data_dir, file_name)
    clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, masks, ids, params = data

    # flip along x axis
    flipped_ct_inputs = np.flip(ct_inputs, axis=1)
    flipped_ct_lesion_GT = np.flip(ct_lesion_GT, axis=1)
    flipped_masks = np.flip(masks, axis=1)

    flipped_ids = ['flipped_' + id for id in ids]

    augmented_ct_inputs = np.concatenate((ct_inputs, flipped_ct_inputs),
                                         axis=0)
    augmented_ct_lesion_GT = np.concatenate(
        (ct_lesion_GT, flipped_ct_lesion_GT), axis=0)
    augmented_masks = np.concatenate((masks, flipped_masks), axis=0)
    augmented_ids = np.concatenate((ids, flipped_ids), axis=0)

    if len(mri_inputs) == 0:
        augmented_mri_inputs = []
        augmented_mri_lesion_GT = []
    else:
        flipped_mri_inputs = np.flip(mri_inputs, axis=1)
        flipped_mri_lesion_GT = np.flip(mri_lesion_GT, axis=1)
        augmented_mri_inputs = np.concatenate((mri_inputs, flipped_mri_inputs),
                                              axis=0)
        augmented_mri_lesion_GT = np.concatenate(
            (mri_lesion_GT, flipped_mri_lesion_GT), axis=0)

    augmented_dataset = (clinical_inputs, augmented_ct_inputs,
                         augmented_ct_lesion_GT, augmented_mri_inputs,
                         augmented_mri_lesion_GT, augmented_masks,
                         augmented_ids, params)

    save_dataset(augmented_dataset, data_dir, 'flipped_' + file_name)
Ejemplo n.º 9
0
def dataset_train_test_split(dataset_path,
                             test_size=0.33,
                             random_state=42,
                             stratification_data=None,
                             stratify_var=None,
                             shuffle=True):
    data_dir = os.path.dirname(dataset_path)
    file_name = os.path.basename(dataset_path)
    data = dl.load_saved_data(data_dir, file_name)
    clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, masks, ids, params = data
    params = params.item()
    all_indices = list(range(len(ids)))

    if stratification_data is not None:
        stratification_df = pd.read_excel(stratification_data)
        stratification_labels = [
            stratification_df.loc[stratification_df['anonymised_id'] ==
                                  subj_id, stratify_var].iloc[0]
            for subj_id in ids
        ]
        print(
            f'Initial positive label distribution: {sum(stratification_labels)/len(stratification_labels)}'
        )
        train_indices, test_indices = train_test_split(
            all_indices,
            test_size=test_size,
            random_state=random_state,
            shuffle=shuffle,
            stratify=stratification_labels)
    else:
        train_indices, test_indices = train_test_split(
            all_indices,
            test_size=test_size,
            random_state=random_state,
            shuffle=shuffle)

    def split_data(data, train_indices, test_indices):
        if len(data.shape) > 0 and data.size > 0:
            train_data = data[train_indices]
            test_data = data[test_indices]
        else:
            train_data = data
            test_data = data
        return train_data, test_data

    train_clinical, test_clinical = split_data(clinical_inputs, train_indices,
                                               test_indices)
    train_ct_inputs, test_ct_inputs = split_data(ct_inputs, train_indices,
                                                 test_indices)
    train_ct_lesion_GT, test_ct_lesion_GT = split_data(ct_lesion_GT,
                                                       train_indices,
                                                       test_indices)
    train_mri_inputs, test_mri_inputs = split_data(mri_inputs, train_indices,
                                                   test_indices)
    train_mri_lesion_GT, test_mri_lesion_GT = split_data(
        mri_lesion_GT, train_indices, test_indices)
    train_brain_masks, test_brain_masks = split_data(masks, train_indices,
                                                     test_indices)
    train_ids, test_ids = split_data(ids, train_indices, test_indices)

    if stratification_data is not None:
        print(
            f'Train positive label distribution: '
            f'{sum(np.array(stratification_labels)[train_indices])/len(np.array(stratification_labels)[train_indices])}'
        )
    train_dataset = (train_clinical, train_ct_inputs, train_ct_lesion_GT,
                     train_mri_inputs, train_mri_lesion_GT, train_brain_masks,
                     train_ids, params)

    if stratification_data is not None:
        print(
            f'Test positive label distribution: '
            f'{sum(np.array(stratification_labels)[test_indices])/len(np.array(stratification_labels)[test_indices])}'
        )
    test_dataset = (test_clinical, test_ct_inputs, test_ct_lesion_GT,
                    test_mri_inputs, test_mri_lesion_GT, test_brain_masks,
                    test_ids, params)

    train_outfile = 'train_' + file_name
    test_outfile = 'test_' + file_name

    dl.save_dataset(train_dataset, data_dir, out_file_name=train_outfile)
    dl.save_dataset(test_dataset, data_dir, out_file_name=test_outfile)
def join_datasets(dataset1_path, dataset2_path, outfile=None):
    data_dir1 = os.path.dirname(dataset1_path)
    file_name1 = os.path.basename(dataset1_path)
    data_dir2 = os.path.dirname(dataset2_path)
    file_name2 = os.path.basename(dataset2_path)
    data1 = dl.load_saved_data(data_dir1, file_name1)
    data2 = dl.load_saved_data(data_dir2, file_name2)

    clinical_inputs1, ct_inputs1, ct_lesion_GT1, mri_inputs1, mri_lesion_GT1, masks1, ids1, params1 = data1
    clinical_inputs2, ct_inputs2, ct_lesion_GT2, mri_inputs2, mri_lesion_GT2, masks2, ids2, params2 = data2

    out_ids = np.append(ids1, ids2, axis=0)

    assert not contains_duplicates(
        out_ids), 'Joined dataset would contain duplicated ids. Aborting.'

    assert ct_inputs2.shape[-1] == ct_inputs1.shape[
        -1], 'Datasets do not have the same number of channels. Aborting.'
    out_ct_inputs = np.append(ct_inputs1, ct_inputs2, axis=0)

    if ct_lesion_GT1.size <= 1 or ct_lesion_GT2.size <= 1:
        print(
            'Ignoring ct ground truth, as at least one dataset has no entries.'
        )
        out_ct_lesion_GT = np.array([])
    else:
        out_ct_lesion_GT = np.append(ct_lesion_GT1, ct_lesion_GT2, axis=0)

    if clinical_inputs1.size <= 1 or clinical_inputs2.size <= 1:
        print(
            'Ignoring clinical input, as at least one dataset has no entries.')
        out_clinical_inputs = np.array([])
    else:
        out_clinical_inputs = np.append(clinical_inputs1,
                                        clinical_inputs2,
                                        axis=0)

    if len(mri_inputs1) == 0 or len(mri_inputs2) == 0:
        print('Ignoring mri input, as at least one dataset has no entries.')
        out_mri_inputs = np.array([])
        out_mri_lesion_GT = np.array([])
    else:
        assert mri_inputs1.shape[-1] == mri_inputs2.shape[
            -1], 'Datasets do not have the same number of channels. Aborting.'

        out_mri_inputs = np.append(mri_inputs1, mri_inputs2, axis=0)
        out_mri_lesion_GT = np.append(mri_lesion_GT1, mri_lesion_GT2, axis=0)

    out_masks = np.append(masks1, masks2, axis=0)

    # params should stay the same
    out_params = params1

    print('Saving new dataset with: ', out_params)
    print('Ids:', out_ids.shape)
    print('Clinical:', out_clinical_inputs.shape)
    print('CT in:', out_ct_inputs.shape)
    print('CT gt:', out_ct_lesion_GT.shape)
    print('MRI in:', out_mri_inputs.shape)
    print('MRI gt:', out_mri_lesion_GT.shape)
    print('masks:', out_masks.shape)

    if outfile is None:
        outfile = 'joined_dataset.npz'

    dataset = (out_clinical_inputs, out_ct_inputs, out_ct_lesion_GT,
               out_mri_inputs, out_mri_lesion_GT, out_masks, out_ids,
               out_params)
    dl.save_dataset(dataset,
                    os.path.dirname(dataset1_path),
                    out_file_name=outfile)
Ejemplo n.º 11
0
def add_core_map(ct_dataset:[str, np.ndarray], masks = None, cbf_channel = 1, ncct_channel = 4, outfile = None, one_hot_encode_core=False, dilation_dimension=3):

    if int(dilation_dimension) == 2:
        dilation_structure = dilation_structure_2d
    elif int(dilation_dimension) == 3:
        dilation_structure = dilation_structure_3d
    else:
        raise NotImplementedError

    if isinstance(ct_dataset, str):
        data_dir = os.path.dirname(ct_dataset)
        file_name = os.path.basename(ct_dataset)
        data = dl.load_saved_data(data_dir, file_name)
        clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, masks, ids, params = data
        if masks.size <= 1:
            masks = None
    else:
        ct_inputs = ct_dataset

    n_subj, n_x, n_y, n_z, n_c = ct_inputs.shape

    # Create CSF mask
    low_bounded_ncct = ct_inputs[..., ncct_channel][ct_inputs[..., ncct_channel] > 0]
    up_and_low_bounded_ncct = low_bounded_ncct[low_bounded_ncct < 100]
    # threshold = 20
    threshold = np.percentile(up_and_low_bounded_ncct, 5)
    csf_mask = gaussian_smoothing(ct_inputs[..., ncct_channel, None], kernel_width=3) < threshold
    enlarged_csf_mask = np.array(
        [ndimage.binary_dilation(csf_mask[idx, ..., 0], structure=dilation_structure(2)) for idx in range(csf_mask.shape[0])])
    inv_csf_mask = -1 * enlarged_csf_mask + 1

    # Create Skull mask
    brain_mask = np.array([ct_brain_extraction(ct_inputs[subj, ..., ncct_channel], fsl_path='/usr/local/fsl/bin')[0]
                           for subj in range(n_subj)])
    not_brain_mask = 1 - brain_mask
    # enlargen slighlty
    enlarged_not_brain_mask = np.array(
        [ndimage.binary_dilation(not_brain_mask[subj], dilation_structure(3)) for subj in range(n_subj)])
    inv_skull_mask = 1 - enlarged_not_brain_mask

    ## Create major vessel mask
    threshold = np.percentile(ct_inputs[..., cbf_channel], 99)
    vessel_mask = ct_inputs[..., cbf_channel] > threshold
    enlarged_vessel_mask = np.array(
        [ndimage.binary_dilation(vessel_mask[idx], structure=dilation_structure(2)) for idx in range(vessel_mask.shape[0])])
    vessel_mask = enlarged_vessel_mask
    inv_vessel_mask = -1 * vessel_mask + 1

    ## Create Core mask
    smooth_rCBF = normalise_by_contralateral_median(gaussian_smoothing(ct_inputs[..., 1, None], kernel_width=2))
    smooth_core_masks = smooth_rCBF < 0.38
    corr_csf_core_masks = smooth_core_masks * inv_csf_mask[..., None]
    corr_vx_core_masks = corr_csf_core_masks * inv_vessel_mask[..., None]
    corr_skull_core_masks = corr_vx_core_masks * inv_skull_mask[..., None]

    if masks is not None:
        # Restrict to defined prior mask
        restr_core = corr_skull_core_masks * masks[..., None]
    else:
        restr_core = corr_skull_core_masks

    if one_hot_encode_core:
        class_0_core = 1 - restr_core
        class_1_core = restr_core
        restr_core = np.concatenate((class_0_core, class_1_core), axis=-1)

    ct_inputs = np.concatenate((ct_inputs, restr_core), axis=-1)

    if isinstance(ct_dataset, str):
        if outfile is None:
            if one_hot_encode_core:
                outfile = os.path.basename(ct_dataset).split('.')[0] + '_with_one_hot_encoded_core.npz'
            else:
                outfile = os.path.basename(ct_dataset).split('.')[0] + '_with_core.npz'
        params = params.item()
        if one_hot_encode_core:
            params['ct_sequences'].append('core_rCBF_0.38_class0')
            params['ct_sequences'].append('core_rCBF_0.38_class1')
        else:
            params['ct_sequences'].append('core_rCBF_0.38')
        dataset = (clinical_inputs, ct_inputs, ct_lesion_GT, mri_inputs, mri_lesion_GT, masks, ids, params)
        dl.save_dataset(dataset, os.path.dirname(ct_dataset), out_file_name=outfile)

    return ct_inputs, restr_core