def preprocess_data(root_dir, y_shape=256, z_shape=64): image_dir = os.path.join(root_dir, 'imagesTr') label_dir = os.path.join(root_dir, 'labelsTr') output_dir = os.path.join(root_dir, 'preprocessed') classes = 2 if not os.path.exists(output_dir): os.makedirs(output_dir) print('Created' + output_dir + '...') class_stats = defaultdict(int) total = 0 nii_files = subfiles(image_dir, suffix=".nii", join=False) for i in range(0, len(nii_files)): if nii_files[i].startswith("._"): nii_files[i] = nii_files[i][2:] for f in nii_files: image, _ = load(os.path.join(image_dir, f)) label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) print(f) for i in range(classes): class_stats[i] += np.sum(label == i) total += np.sum(label == i) # normalize images image = (image - image.min())/(image.max()-image.min()) # not original!! print("********************\n") print(image.shape) print(label.shape) image = reshape(image, append_value=0, new_shape=(256, 256, image.shape[2])) label = reshape(label, append_value=0, new_shape=(256, 256, image.shape[2])) # end of not original!! #original!!! #image = reshape(image, append_value=0, new_shape=(image.shape[0], y_shape, z_shape)) #label = reshape(label, append_value=0, new_shape=(label.shape[0], y_shape, z_shape)) #result = np.stack((image, label)) # not original!! result = np.stack((image, label)) result = np.swapaxes(result, 1, 3) result = np.swapaxes(result, 2, 3) # end of not original!! np.save(os.path.join(output_dir, f.split('.')[0]+'.npy'), result) print(f) print(total) for i in range(classes): print(class_stats[i], class_stats[i]/total)
def preprocess_data(root_dir): image_dir = os.path.join(root_dir, 'imagesTr') label_dir = os.path.join(root_dir, 'labelsTr') output_dir = os.path.join(root_dir, 'preprocessed') classes = 3 if not os.path.exists(output_dir): os.makedirs(output_dir) print('Created' + output_dir + '...') class_stats = defaultdict(int) total = 0 nii_files = subfiles(image_dir, suffix=".nii.gz", join=False) for i in range(0, len(nii_files)): if nii_files[i].startswith("._"): nii_files[i] = nii_files[i][2:] for f in nii_files: image, _ = load(os.path.join(image_dir, f)) label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) print(f) for i in range(classes): class_stats[i] += np.sum(label == i) total += np.sum(label == i) image = (image - image.min()) / (image.max() - image.min()) image = reshape(image, append_value=0, new_shape=(64, 64, 64)) label = reshape(label, append_value=0, new_shape=(64, 64, 64)) result = np.stack((image, label)) np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result) print(f) print(total) for i in range(classes): print(class_stats[i], class_stats[i] / total)
def preprocess_data(root_dir): #y_shape=64, z_shape=64): c = get_config() image_dir = os.path.join(root_dir, 'imagesTr') label_dir = os.path.join(root_dir, 'labelsTr') output_dir = os.path.join(root_dir, 'preprocessed') classes = c.num_classes if not os.path.exists(output_dir): os.makedirs(output_dir) print('Created' + output_dir + '...') class_stats = defaultdict(int) total = 0 nii_files = subfiles(image_dir, suffix=".nii.gz", join=False) for i in range(0, len(nii_files)): if nii_files[i].startswith("._"): nii_files[i] = nii_files[i][2:] for f in nii_files: image, _ = load(os.path.join(image_dir, f)) label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) print(f) for i in range(classes): class_stats[i] += np.sum(label == i) total += np.sum(label == i) # normalize images image = (image - image.min()) / (image.max() - image.min()) image = np.swapaxes(image, 0, 2) label = np.swapaxes(label, 0, 2) result = reshape(np.stack([image, label], axis=0), crop_size=c.patch_size) # image = reshape(image, append_value=0, new_shape=(image.shape[0], y_shape, z_shape)) # label = reshape(label, append_value=0, new_shape=(label.shape[0], y_shape, z_shape)) np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result) print(f) print(total) for i in range(classes): print(class_stats[i], class_stats[i] / total)
def testdata_preprocess(input_dir, output_dir): nii_files = subfiles(input_dir, suffix=".nii.gz", join=False) for i in range(0, len(nii_files)): if nii_files[i].startswith("._"): nii_files[i] = nii_files[i][2:] for f in nii_files: image, a = load(os.path.join( input_dir, f)) # ??? what's the output-- image_header? print(f) image = (image - image.min()) / (image.max() - image.min()) image = reshape(image, append_value=0, new_shape=(64, 64, 64)) np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), image)
def get_test_data(path=None, drop_columns=None, categorical_labels=False, do_reshape=False, reshape_size=[1, 25, 25]): if path is None: raise ValueError('Undefined input file path') log.info("Loading data from '{}'".format(path)) input_data = pd.read_csv(path) # drop columns from training set if drop_columns is not None: for col in drop_columns: input_data.drop(col, axis=1, inplace=True) dataset = input_data.values num_rows = input_data.shape[0] num_cols = input_data.shape[1] log.debug('total numbers of test data: {}'.format(num_rows)) # labels one-hot labels = None if input_data.get('label') is not None: labels = dataset[:, num_cols - 1] labels = dat_utils.flat_to_one_hot(labels, categorical=categorical_labels) dataset = np.delete(dataset, -1, axis=1) log.debug('Test label distribution: 0 - {}, 1 - {}'.format( len(labels) - np.count_nonzero(labels), np.count_nonzero(labels))) # reshape 1-D array to 2-D if do_reshape: dataset = dat_utils.reshape(data=dataset, reshape_size=reshape_size) return dataset, labels
def get_train_val_data(path=None, drop_columns=None, categorical_labels=False, train_val_ratio=0.2, do_shuffle=False, do_smote=False, smote_ratio=0.2, do_reshape=False, reshape_size=[1, 25, 25]): if path is None: raise ValueError('Undefined input file path') log.info("Loading data from '{}'".format(path)) input_data = pd.read_csv(path) # drop columns from training set if drop_columns is not None: for col in drop_columns: input_data.drop(col, axis=1, inplace=True) dataset = input_data.values num_rows = input_data.shape[0] num_cols = input_data.shape[1] # shuffle if do_shuffle: log.info('Shuffling data...') dat_utils.shuffle(dataset) # split train validation set labels = dataset[:, num_cols - 1] # one hot labels labels = dat_utils.flat_to_one_hot(labels, categorical=categorical_labels) dataset = np.delete(dataset, -1, axis=1) train_dataset, validation_dataset = dat_utils.data_split2( data=dataset, split_ratio=train_val_ratio) train_labels, validation_labels = dat_utils.data_split2( data=labels, split_ratio=train_val_ratio) log.debug('Training label distribution: 0 - {}, 1 - {}'.format( train_labels.shape[0] - np.count_nonzero(train_labels), np.count_nonzero(train_labels))) log.debug('Validation label distribution: 0 - {}, 1 - {}'.format( validation_labels.shape[0] - np.count_nonzero(validation_labels), np.count_nonzero(validation_labels))) # Synthetic Minority Over-sampling (SMOTE) only on train dataset if do_smote: train_dataset, train_labels, num_zero, num_one = \ dat_utils.smote(data=train_dataset, labels=train_labels, ratio=smote_ratio) log.debug('SMOTE result: total - {}, 0 - {}, 1 - {}'.format( len(train_dataset), num_zero, num_one)) # reshape 1-D array to 2-D if do_reshape: train_dataset = dat_utils.reshape(data=train_dataset, reshape_size=reshape_size) validation_dataset = dat_utils.reshape(data=validation_dataset, reshape_size=reshape_size) return (train_dataset, train_labels), (validation_dataset, validation_labels)