def preprocess_data(root_dir, y_shape=256, z_shape=64):
    image_dir = os.path.join(root_dir, 'imagesTr')
    label_dir = os.path.join(root_dir, 'labelsTr')
    output_dir = os.path.join(root_dir, 'preprocessed')
    classes = 2

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print('Created' + output_dir + '...')

    class_stats = defaultdict(int)
    total = 0

    nii_files = subfiles(image_dir, suffix=".nii", join=False)

    for i in range(0, len(nii_files)):
        if nii_files[i].startswith("._"):
            nii_files[i] = nii_files[i][2:]

    for f in nii_files:
        image, _ = load(os.path.join(image_dir, f))
        label, _ = load(os.path.join(label_dir, f.replace('_0000', '')))

        print(f)

        for i in range(classes):
            class_stats[i] += np.sum(label == i)
            total += np.sum(label == i)

        # normalize images
        image = (image - image.min())/(image.max()-image.min())
        # not original!!
        print("********************\n")
        print(image.shape)
        print(label.shape)
        image = reshape(image, append_value=0, new_shape=(256, 256, image.shape[2]))
        label = reshape(label, append_value=0, new_shape=(256, 256, image.shape[2]))
        # end of not original!!
        #original!!!
        #image = reshape(image, append_value=0, new_shape=(image.shape[0], y_shape, z_shape))
        #label = reshape(label, append_value=0, new_shape=(label.shape[0], y_shape, z_shape))

        #result = np.stack((image, label))
        # not original!!
        result = np.stack((image, label))
        result = np.swapaxes(result, 1, 3)
        result = np.swapaxes(result, 2, 3)
        # end of not original!!
        np.save(os.path.join(output_dir, f.split('.')[0]+'.npy'), result)
        print(f)


    print(total)
    for i in range(classes):
        print(class_stats[i], class_stats[i]/total)
Example #2
0
def preprocess_data(root_dir):
    image_dir = os.path.join(root_dir, 'imagesTr')
    label_dir = os.path.join(root_dir, 'labelsTr')
    output_dir = os.path.join(root_dir, 'preprocessed')
    classes = 3

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print('Created' + output_dir + '...')

    class_stats = defaultdict(int)
    total = 0

    nii_files = subfiles(image_dir, suffix=".nii.gz", join=False)

    for i in range(0, len(nii_files)):
        if nii_files[i].startswith("._"):
            nii_files[i] = nii_files[i][2:]

    for f in nii_files:
        image, _ = load(os.path.join(image_dir, f))
        label, _ = load(os.path.join(label_dir, f.replace('_0000', '')))

        print(f)

        for i in range(classes):
            class_stats[i] += np.sum(label == i)
            total += np.sum(label == i)

        image = (image - image.min()) / (image.max() - image.min())

        image = reshape(image, append_value=0, new_shape=(64, 64, 64))
        label = reshape(label, append_value=0, new_shape=(64, 64, 64))

        result = np.stack((image, label))

        np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result)
        print(f)

    print(total)
    for i in range(classes):
        print(class_stats[i], class_stats[i] / total)
def preprocess_data(root_dir):  #y_shape=64, z_shape=64):
    c = get_config()
    image_dir = os.path.join(root_dir, 'imagesTr')
    label_dir = os.path.join(root_dir, 'labelsTr')
    output_dir = os.path.join(root_dir, 'preprocessed')
    classes = c.num_classes

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print('Created' + output_dir + '...')

    class_stats = defaultdict(int)
    total = 0

    nii_files = subfiles(image_dir, suffix=".nii.gz", join=False)

    for i in range(0, len(nii_files)):
        if nii_files[i].startswith("._"):
            nii_files[i] = nii_files[i][2:]

    for f in nii_files:
        image, _ = load(os.path.join(image_dir, f))
        label, _ = load(os.path.join(label_dir, f.replace('_0000', '')))

        print(f)

        for i in range(classes):
            class_stats[i] += np.sum(label == i)
            total += np.sum(label == i)

        # normalize images
        image = (image - image.min()) / (image.max() - image.min())

        image = np.swapaxes(image, 0, 2)
        label = np.swapaxes(label, 0, 2)

        result = reshape(np.stack([image, label], axis=0),
                         crop_size=c.patch_size)

        # image = reshape(image, append_value=0, new_shape=(image.shape[0], y_shape, z_shape))
        # label = reshape(label, append_value=0, new_shape=(label.shape[0], y_shape, z_shape))

        np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result)
        print(f)

    print(total)
    for i in range(classes):
        print(class_stats[i], class_stats[i] / total)
Example #4
0
def testdata_preprocess(input_dir, output_dir):

    nii_files = subfiles(input_dir, suffix=".nii.gz", join=False)

    for i in range(0, len(nii_files)):
        if nii_files[i].startswith("._"):
            nii_files[i] = nii_files[i][2:]

    for f in nii_files:
        image, a = load(os.path.join(
            input_dir, f))  # ??? what's the output-- image_header?
        print(f)

        image = (image - image.min()) / (image.max() - image.min())

        image = reshape(image, append_value=0, new_shape=(64, 64, 64))

        np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), image)
Example #5
0
def get_test_data(path=None,
                  drop_columns=None,
                  categorical_labels=False,
                  do_reshape=False,
                  reshape_size=[1, 25, 25]):
    if path is None:
        raise ValueError('Undefined input file path')

    log.info("Loading data from '{}'".format(path))
    input_data = pd.read_csv(path)

    # drop columns from training set
    if drop_columns is not None:
        for col in drop_columns:
            input_data.drop(col, axis=1, inplace=True)

    dataset = input_data.values
    num_rows = input_data.shape[0]
    num_cols = input_data.shape[1]
    log.debug('total numbers of test data: {}'.format(num_rows))

    # labels one-hot
    labels = None
    if input_data.get('label') is not None:
        labels = dataset[:, num_cols - 1]
        labels = dat_utils.flat_to_one_hot(labels,
                                           categorical=categorical_labels)
        dataset = np.delete(dataset, -1, axis=1)
        log.debug('Test label distribution: 0 - {}, 1 - {}'.format(
            len(labels) - np.count_nonzero(labels), np.count_nonzero(labels)))

    # reshape 1-D array to 2-D
    if do_reshape:
        dataset = dat_utils.reshape(data=dataset, reshape_size=reshape_size)

    return dataset, labels
Example #6
0
def get_train_val_data(path=None,
                       drop_columns=None,
                       categorical_labels=False,
                       train_val_ratio=0.2,
                       do_shuffle=False,
                       do_smote=False,
                       smote_ratio=0.2,
                       do_reshape=False,
                       reshape_size=[1, 25, 25]):

    if path is None:
        raise ValueError('Undefined input file path')

    log.info("Loading data from '{}'".format(path))
    input_data = pd.read_csv(path)

    # drop columns from training set
    if drop_columns is not None:
        for col in drop_columns:
            input_data.drop(col, axis=1, inplace=True)

    dataset = input_data.values
    num_rows = input_data.shape[0]
    num_cols = input_data.shape[1]

    # shuffle
    if do_shuffle:
        log.info('Shuffling data...')
        dat_utils.shuffle(dataset)

    # split train validation set
    labels = dataset[:, num_cols - 1]
    # one hot labels
    labels = dat_utils.flat_to_one_hot(labels, categorical=categorical_labels)
    dataset = np.delete(dataset, -1, axis=1)

    train_dataset, validation_dataset = dat_utils.data_split2(
        data=dataset, split_ratio=train_val_ratio)
    train_labels, validation_labels = dat_utils.data_split2(
        data=labels, split_ratio=train_val_ratio)

    log.debug('Training label distribution: 0 - {}, 1 - {}'.format(
        train_labels.shape[0] - np.count_nonzero(train_labels),
        np.count_nonzero(train_labels)))

    log.debug('Validation label distribution: 0 - {}, 1 - {}'.format(
        validation_labels.shape[0] - np.count_nonzero(validation_labels),
        np.count_nonzero(validation_labels)))

    # Synthetic Minority Over-sampling (SMOTE) only on train dataset
    if do_smote:
        train_dataset, train_labels, num_zero, num_one = \
            dat_utils.smote(data=train_dataset, labels=train_labels, ratio=smote_ratio)
        log.debug('SMOTE result: total - {}, 0 - {}, 1 - {}'.format(
            len(train_dataset), num_zero, num_one))

    # reshape 1-D array to 2-D
    if do_reshape:
        train_dataset = dat_utils.reshape(data=train_dataset,
                                          reshape_size=reshape_size)
        validation_dataset = dat_utils.reshape(data=validation_dataset,
                                               reshape_size=reshape_size)

    return (train_dataset, train_labels), (validation_dataset,
                                           validation_labels)