def overlay_mask(args): """Overlay ground truth mask on model output binary mask.""" mask_list = [m for m in os.listdir(args.mask_dir) if m.endswith('.npy')] dset_nums, file_suffix = zip(*[m.split('_') for m in mask_list]) save_dir = os.path.join(args.save_dir, 'overlays_threshold_{}'.format(args.threshold)) os.makedirs(save_dir, exist_ok=True) with open(args.pkl_path, 'rb') as pkl_file: series_list = pickle.load(pkl_file) for num, suffix in tqdm(zip(dset_nums, file_suffix)): output = np.load(os.path.join(args.mask_dir, num + '_' + suffix)) series = None for s in series_list: if s.dset_path == '/series/' + num: series = s assert series is not None, 'Could not find series {} in series list pickle file.'.format(num) dcm_list = sorted([d for d in os.listdir(series.dcm_dir) if d.endswith('.dcm')]) assert len(dcm_list) == output.shape[0], \ '{} dcm files in directory, but mask dimension is {}.'.format(len(dcm_list), output.shape) dcm_first = util.read_dicom(os.path.join(series.dcm_dir, dcm_list[0])) dcm_second = util.read_dicom(os.path.join(series.dcm_dir, dcm_list[1])) affine_x = -dcm_first.PixelSpacing[0] * dcm_first.ImageOrientationPatient[0] affine_y = -dcm_first.PixelSpacing[1] * dcm_first.ImageOrientationPatient[4] affine_z = dcm_second.ImagePositionPatient[2] - dcm_first.ImagePositionPatient[2] # Define affine matrix affine = [[affine_x, 0., 0., -dcm_first.ImagePositionPatient[0]], [0., affine_y, 0., -dcm_first.ImagePositionPatient[1]], [0., 0., affine_z, dcm_first.ImagePositionPatient[2]], [0., 0., 0., 1.]] affine = np.array(affine, dtype=np.float_) # Fix dimension of mask to match dicoms and threshold probabilities output = np.transpose(output, (2, 1, 0)) output = (output >= args.threshold).astype(np.float_) if series.is_aneurysm: try: true_mask = np.load(os.path.join(series.dcm_dir, 'aneurysm_mask.npy')) true_mask = np.transpose(true_mask, (1, 0, 2)) except: # TODO: Remove after drawing missing masks continue else: true_mask = np.zeros(output.shape) if not series.is_bottom_up: output = np.flip(output, axis=2) true_mask = np.flip(true_mask, axis=2) false_positives = np.logical_and(output, np.logical_not(true_mask)).astype(np.float_) true_positives = np.logical_and(output, true_mask).astype(np.float_) * 2. false_negatives = np.logical_and(np.logical_not(output), true_mask).astype(np.float_) * 3. overlay = false_positives + true_positives + false_negatives # Create NIfTI file and save nifti_out = nib.Nifti1Image(overlay, affine=affine) filename = '{}_{}_overlay.nii.gz'.format(num, series.study_name) nib.save(nifti_out, os.path.join(save_dir, filename))
def _initialize(self): """Initialize CT series. Collect info about this series from the DICOMs. Raises: RuntimeWarning: If we can't find any DICOMs in the root directory RuntimeError: If there's an unexpected file name. """ dcm_paths = sorted([os.path.join(self.dcm_dir, f) for f in os.listdir(self.dcm_dir) \ if f.endswith('.dcm')]) if len(dcm_paths) == 0: raise RuntimeWarning('Did not find any DICOMs in {}'.format( self.dcm_dir)) self.slice_names = [os.path.basename(f)[:-4] for f in dcm_paths] # Read a DICOM as an example dcm = util.read_dicom(dcm_paths[0]) self.series_number = int(dcm.SeriesNumber) if 'SliceThickness' in dcm: self.dcm_thicknesses.append(dcm.SliceThickness) if 'ContentDate' in dcm: self.date = date_parser.parse(dcm.ContentDate) # Record scan direction if len(dcm_paths) == 1: raise RuntimeWarning('Only found a single DICOM file in {}'.format( self.dcm_dir)) dcm_second = util.read_dicom(dcm_paths[1]) if 'AnatomicalOrientationType' in dcm: raise RuntimeWarning('Series {} has Anatomical Orientation Type {}, unable to fetch scan direction.' \ .format(self.dcm_dir, dcm.AnatomicalOrientationType)) # The z-axis of ImagePositionPatient is increasing toward the head of the patient elif 'ImagePositionPatient' not in dcm: raise RuntimeWarning('{}: No ImagePositionPatient attribute, unable to fetch scan direction.' \ .format(self.dcm_dir)) else: ipp1 = dcm.ImagePositionPatient ipp2 = dcm_second.ImagePositionPatient self.is_bottom_up = ipp1[2] < ipp2[2] # Record last DICOM slice thickness for possibly multiple slice thicknesses dcm_last = util.read_dicom(dcm_paths[-1]) if 'SliceThickness' in dcm_last: self.dcm_thicknesses.append(dcm_last.SliceThickness) self.dcm_thicknesses = list(set(self.dcm_thicknesses)) # Record scanner manufacturer if self.scanner_make is None and 'Manufacturer' in dcm: self.scanner_make = str(dcm.Manufacturer).lower() # Save mask path if mask exists aneurysm_mask_path = os.path.join(self.dcm_dir, 'aneurysm_mask.npy') if os.path.exists(aneurysm_mask_path): self.aneurysm_mask_path = aneurysm_mask_path brain_mask_path = os.path.join(self.dcm_dir, 'brain_mask.npy') if os.path.exists(brain_mask_path): self.brain_mask_path = brain_mask_path dcm_scan_num = None # First number in name IM-####-####.dcm
def restructure_directory(input_dir, output_dir, json_path): with open(json_path, 'r') as json_file: dcm_dict = json.load(json_file) for acc, series_dict in tqdm(dcm_dict.items()): folder = os.path.join(input_dir, acc) subfolder = [s for s in os.listdir(folder) if s.startswith('ST')] assert len(subfolder) == 1, "Multiple subfolders present in {}.".format(acc) subfolder = subfolder[0] if len(series_dict) > 1: continue try: for series_acq, inst_num in series_dict.items(): dcm = util.read_dicom(os.path.join(folder, subfolder, series_dict[series_acq]['1'][0])) description = dcm.SeriesDescription.replace('/', ' ') Path(os.path.join(output_dir, acc, description)).mkdir(parents=True, exist_ok=True) series_num = series_acq.split('_')[0] if len(inst_num) < 10: continue for i in range(1, len(inst_num) + 1): source_path = os.path.join(folder, subfolder, inst_num[str(i)][0]) dest_path = os.path.join(output_dir, acc, description, inst_num[str(i)][0]) shutil.copy(source_path, dest_path) os.rename(dest_path, os.path.join(output_dir, acc, description, 'IM-' + series_num.zfill(4) + '-' + str(i).zfill(4) + '.dcm')) except: util.print_err('Error occurred while copying {}. Skipping...'.format(acc)) continue
def save_mask_npy(input_dir, reconvert, flip): """Convert mask NIfTI files into Numpy arrays, and save in .npy format. Args: input_dir: Directory containing mask.nii.gz files. """ for dir_path, _, filenames in tqdm(list(os.walk(input_dir))): mask_list = [ f for f in filenames if f.endswith('.nii.gz') and not f.startswith('.') ] if len(mask_list) == 0: continue if not reconvert and len( [f for f in filenames if f.endswith('mask.npy')]) > 0: continue if len(mask_list) > 1: raise RuntimeError('{} mask files in {}.'.format( len(mask_list), dir_path)) mask_filename = mask_list[0] mask_file = nib.load(os.path.join(dir_path, mask_filename)) mask_header = mask_file.header mask_array = mask_file.get_data() if mask_header['sform_code'] <= 0 or mask_header['srow_x'][0] > 0: raise RuntimeError( 'Check header and x-y axis for mask file in {}.'.format( dir_path)) dcm_files = sorted([d for d in filenames if d.endswith('.dcm')]) num_dcms = len(dcm_files) if mask_array.shape[2] != num_dcms: raise RuntimeError( 'Mask file has {} slices, but {} dicoms present in folder {}.'. format(mask_array.shape[2], num_dcms, dir_path)) mask_array = mask_array.transpose(1, 0, 2) if flip: dcm_first = util.read_dicom(os.path.join(dir_path, dcm_files[0])) dcm_second = util.read_dicom(os.path.join(dir_path, dcm_files[1])) if dcm_second.ImagePositionPatient[ 2] - dcm_first.ImagePositionPatient[2] < 0: mask_array = np.flip(mask_array, axis=2) print('Flipped mask for series {}.'.format(dir_path)) mask_path = os.path.join(dir_path, 'mask.npy') np.save(mask_path, mask_array)
def generate_nifti(args): """Generate binary masks from model output and convert them to NIfTI format.""" mask_list = [m for m in os.listdir(args.mask_dir) if m.endswith('.npy')] dset_nums, file_suffix = zip(*[m.split('_') for m in mask_list]) save_dir = os.path.join(args.save_dir, 'niftis_threshold_{}'.format(args.threshold)) os.makedirs(save_dir, exist_ok=True) with open(args.pkl_path, 'rb') as pkl_file: series_list = pickle.load(pkl_file) for num, suffix in tqdm(zip(dset_nums, file_suffix)): output = np.load(os.path.join(args.mask_dir, num + '_' + suffix)) series = None for s in series_list: if s.dset_path == '/series/' + num: series = s assert series is not None, 'Could not find series {} in series list pickle file.'.format(num) dcm_list = sorted([d for d in os.listdir(series.dcm_dir) if d.endswith('.dcm')]) assert len(dcm_list) == output.shape[0], \ '{} dcm files in directory, but mask dimension is {}.'.format(len(dcm_list), output.shape) dcm_first = util.read_dicom(os.path.join(series.dcm_dir, dcm_list[0])) dcm_second = util.read_dicom(os.path.join(series.dcm_dir, dcm_list[1])) affine_x = -dcm_first.PixelSpacing[0] * dcm_first.ImageOrientationPatient[0] affine_y = -dcm_first.PixelSpacing[1] * dcm_first.ImageOrientationPatient[4] affine_z = dcm_second.ImagePositionPatient[2] - dcm_first.ImagePositionPatient[2] # Define affine matrix affine = [[affine_x, 0., 0., -dcm_first.ImagePositionPatient[0]], [0., affine_y, 0., -dcm_first.ImagePositionPatient[1]], [0., 0., affine_z, dcm_first.ImagePositionPatient[2]], [0., 0., 0., 1.]] affine = np.array(affine, dtype=np.float_) # Fix dimension of mask to match dicoms and threshold probabilities output = np.transpose(output, (2, 1, 0)) if not series.is_bottom_up: output = np.flip(output, axis=2) output = (output >= args.threshold).astype(np.float_) # Create NIfTI file and save nifti_out = nib.Nifti1Image(output, affine=affine) filename = '{}_{}_output.nii.gz'.format(num, series.study_name) nib.save(nifti_out, os.path.join(save_dir, filename))
def sort_normals(input_dir, slice_thickness, json_path): dcms = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) for dir_path, _, filenames in os.walk(input_dir): for file in tqdm(filenames): try: dcm = util.read_dicom(os.path.join(dir_path,file)) thickness = float(dcm.SliceThickness) series_num = str(dcm.SeriesNumber) acq_num = str(dcm.AcquisitionNumber) inst_num = str(dcm.InstanceNumber) folder = os.path.basename(Path(dir_path).parents[0]) except: continue if thickness == slice_thickness: dcms[folder][series_num + '_' + acq_num][inst_num].append(file) with open(json_path, 'w') as json_file: json.dump(dcms, json_file, indent=4, sort_keys=True)
def get_series_numbers(args): with open(os.path.join(args.output_dir, 'dir2type.json'), 'r') as json_fh: dir2type = json.load(json_fh) df = pd.read_csv(args.input_csv) for i, row in df.iterrows(): series_dir = os.path.join(args.data_dir, str(row['Acc'])) if os.path.exists(series_dir): print('Found at {}'.format(series_dir)) for subdir in os.listdir(series_dir): if subdir not in dir2type: while True: try: input_num = int( input('{} (0=contrast, 1=other)?\n>>> '.format( subdir))) if input_num == 0 or input_num == 1: break except ValueError: continue dir2type[ subdir] = 'contrast' if input_num == 0 else 'non_contrast' if dir2type[subdir] == 'contrast': print('{} is contrast'.format(subdir)) dcm_dir = os.path.join(series_dir, subdir) dcm_names = [ f for f in os.listdir(dcm_dir) if f.endswith('.dcm') ] dcm = util.read_dicom(os.path.join(dcm_dir, dcm_names[0])) df.loc[i, 'CTA se'] = int(dcm.SeriesNumber) # Write CSV and dir2type mapping util.print_err('Dumping CSV file...') df.to_csv(os.path.join(args.output_dir, 'updated_annotations.csv')) util.print_err('Dumping JSON file...') with open(os.path.join(args.output_dir, 'dir2type.json'), 'w') as json_fh: json.dump(dir2type, json_fh, indent=4, sort_keys=True, default=util.json_encoder)
def create_hdf5(series_list, output_dir, resample=False, max_series=1e5): hdf5_fh = h5py.File(os.path.join(output_dir, 'data.hdf5'), 'a') for group_name in ('series', 'aneurysm_masks'): if group_name not in hdf5_fh: hdf5_fh.create_group('/{}'.format(group_name)) assert len(series_list) < 1e5, 'Too many series for 5-digit IDs.' for i, s in enumerate(series_list): if i >= max_series: break dset_path = '/series/{:05d}'.format(i + 1) if dset_path in hdf5_fh: continue print('Processing series {} from study {}...'.format( s.series_number, s.study_name)) pixel_arrays = [] is_valid_series = True for slice_name in tqdm(s.slice_names, total=len(s), unit=' slices'): # Process and write slices dcm_path = os.path.join(s.dcm_dir, slice_name + '.dcm') dcm = util.read_dicom(dcm_path) try: pixel_arrays.append(util.dcm_to_raw(dcm)) except NotImplementedError: print('Unsupported image format, not converting study: {}'. format(s.study_name)) is_valid_series = False break if not is_valid_series: continue volume = np.stack(pixel_arrays) aneurysm_mask_path = os.path.join(s.dcm_dir, 'aneurysm_mask.npy') if os.path.exists(aneurysm_mask_path): s.aneurysm_mask_path = aneurysm_mask_path aneurysm_mask = np.transpose(np.load(s.aneurysm_mask_path), [2, 0, 1]) else: s.aneurysm_mask_path = None aneurysm_mask = None assert aneurysm_mask is None or aneurysm_mask.shape == volume.shape, \ 'Mismatched aneurysm mask and volume shapes: {} and {}'.format(aneurysm_mask.shape, volume.shape) if len(s) > 0 and resample: util.print_err('Resampling volume... Shape before: {}'.format( volume.shape)) tick = time.time() dcm = util.read_dicom( os.path.join(s.dcm_dir, s.slice_names[0] + '.dcm')) volume, real_scale = util.resample(volume, dcm.SliceThickness, dcm.PixelSpacing, (1.5, 1., 1.)) util.print_err('Shape after: {}. Resample took {} s.'.format( volume.shape, time.time() - tick)) if aneurysm_mask is not None: util.print_err( 'Resampling mask... Shape before: {}, count before: {}.'. format(aneurysm_mask.shape, np.sum(aneurysm_mask > 0))) tick = time.time() aneurysm_mask, mask_scale = util.resample( aneurysm_mask, dcm.SliceThickness, dcm.PixelSpacing, (1.5, 1., 1.)) util.print_err( 'Mask shape after: {}, count after: {}. Resample took {} s.' .format(aneurysm_mask.shape, np.sum(aneurysm_mask > 0), time.time() - tick)) if not aneurysm_mask.any(): raise RuntimeError( 'Mask has zero volume after resampling.') if s.is_aneurysm: # Recompute slice numbers where the aneurysm lives s.aneurysm_bounds = get_aneurysm_range(aneurysm_mask) s.aneurysm_ranges = [s.aneurysm_bounds] s.absolute_range = [0, aneurysm_mask.shape[0]] # Create one dataset for the volume (int16), one for the mask (bool) s.dset_path = dset_path hdf5_fh.create_dataset(s.dset_path, data=volume, dtype='i2', chunks=True) if aneurysm_mask is not None: s.aneurysm_mask_path = '/aneurysm_masks/{:05d}'.format(i + 1) hdf5_fh.create_dataset(s.aneurysm_mask_path, data=aneurysm_mask, dtype='?', chunks=True) # Print summary util.print_err('Series: {}'.format(len(hdf5_fh['/series']))) util.print_err('Aneurysm Masks: {}'.format(len( hdf5_fh['/aneurysm_masks']))) # Dump pickle and JSON (updated dset_path and mask_path attributes) util.print_err('Dumping pickle file...') with open(os.path.join(output_dir, 'series_list.pkl'), 'wb') as pkl_fh: pickle.dump(series_list, pkl_fh) util.print_err('Dumping JSON file...') with open(os.path.join(output_dir, 'series_list.json'), 'w') as json_file: json.dump([dict(series) for series in series_list], json_file, indent=4, sort_keys=True, default=util.json_encoder) # Clean up hdf5_fh.close()