Example #1
0
def overlay_mask(args):
    """Overlay ground truth mask on model output binary mask."""
    mask_list = [m for m in os.listdir(args.mask_dir) if m.endswith('.npy')]
    dset_nums, file_suffix = zip(*[m.split('_') for m in mask_list])
    save_dir = os.path.join(args.save_dir, 'overlays_threshold_{}'.format(args.threshold))
    os.makedirs(save_dir, exist_ok=True)

    with open(args.pkl_path, 'rb') as pkl_file:
        series_list = pickle.load(pkl_file)

    for num, suffix in tqdm(zip(dset_nums, file_suffix)):
        output = np.load(os.path.join(args.mask_dir, num + '_' + suffix))
        series = None
        for s in series_list:
            if s.dset_path == '/series/' + num:
                series = s
        assert series is not None, 'Could not find series {} in series list pickle file.'.format(num)
        dcm_list = sorted([d for d in os.listdir(series.dcm_dir) if d.endswith('.dcm')])
        assert len(dcm_list) == output.shape[0], \
            '{} dcm files in directory, but mask dimension is {}.'.format(len(dcm_list), output.shape)
        dcm_first = util.read_dicom(os.path.join(series.dcm_dir, dcm_list[0]))
        dcm_second = util.read_dicom(os.path.join(series.dcm_dir, dcm_list[1]))

        affine_x = -dcm_first.PixelSpacing[0] * dcm_first.ImageOrientationPatient[0]
        affine_y = -dcm_first.PixelSpacing[1] * dcm_first.ImageOrientationPatient[4]
        affine_z = dcm_second.ImagePositionPatient[2] - dcm_first.ImagePositionPatient[2]

        # Define affine matrix
        affine = [[affine_x, 0., 0., -dcm_first.ImagePositionPatient[0]],
                  [0., affine_y, 0., -dcm_first.ImagePositionPatient[1]],
                  [0., 0., affine_z, dcm_first.ImagePositionPatient[2]],
                  [0., 0., 0., 1.]]
        affine = np.array(affine, dtype=np.float_)

        # Fix dimension of mask to match dicoms and threshold probabilities
        output = np.transpose(output, (2, 1, 0))
        output = (output >= args.threshold).astype(np.float_)
        if series.is_aneurysm:
            try:
                true_mask = np.load(os.path.join(series.dcm_dir, 'aneurysm_mask.npy'))
                true_mask = np.transpose(true_mask, (1, 0, 2))
            except:
                # TODO: Remove after drawing missing masks
                continue
        else:
            true_mask = np.zeros(output.shape)
        if not series.is_bottom_up:
            output = np.flip(output, axis=2)
            true_mask = np.flip(true_mask, axis=2)
        false_positives = np.logical_and(output, np.logical_not(true_mask)).astype(np.float_)
        true_positives = np.logical_and(output, true_mask).astype(np.float_) * 2.
        false_negatives = np.logical_and(np.logical_not(output), true_mask).astype(np.float_) * 3.
        overlay = false_positives + true_positives + false_negatives

        # Create NIfTI file and save
        nifti_out = nib.Nifti1Image(overlay, affine=affine)
        filename = '{}_{}_overlay.nii.gz'.format(num, series.study_name)
        nib.save(nifti_out, os.path.join(save_dir, filename))
Example #2
0
    def _initialize(self):
        """Initialize CT series. Collect info about this series from the DICOMs.

        Raises:
            RuntimeWarning: If we can't find any DICOMs in the root directory
            RuntimeError: If there's an unexpected file name.
        """
        dcm_paths = sorted([os.path.join(self.dcm_dir, f) for f in os.listdir(self.dcm_dir) \
                            if f.endswith('.dcm')])
        if len(dcm_paths) == 0:
            raise RuntimeWarning('Did not find any DICOMs in {}'.format(
                self.dcm_dir))
        self.slice_names = [os.path.basename(f)[:-4] for f in dcm_paths]

        # Read a DICOM as an example
        dcm = util.read_dicom(dcm_paths[0])
        self.series_number = int(dcm.SeriesNumber)
        if 'SliceThickness' in dcm:
            self.dcm_thicknesses.append(dcm.SliceThickness)
        if 'ContentDate' in dcm:
            self.date = date_parser.parse(dcm.ContentDate)

        # Record scan direction
        if len(dcm_paths) == 1:
            raise RuntimeWarning('Only found a single DICOM file in {}'.format(
                self.dcm_dir))
        dcm_second = util.read_dicom(dcm_paths[1])
        if 'AnatomicalOrientationType' in dcm:
            raise RuntimeWarning('Series {} has Anatomical Orientation Type {}, unable to fetch scan direction.' \
                                 .format(self.dcm_dir, dcm.AnatomicalOrientationType))
        # The z-axis of ImagePositionPatient is increasing toward the head of the patient
        elif 'ImagePositionPatient' not in dcm:
            raise RuntimeWarning('{}: No ImagePositionPatient attribute, unable to fetch scan direction.' \
                                 .format(self.dcm_dir))
        else:
            ipp1 = dcm.ImagePositionPatient
            ipp2 = dcm_second.ImagePositionPatient
            self.is_bottom_up = ipp1[2] < ipp2[2]

        # Record last DICOM slice thickness for possibly multiple slice thicknesses
        dcm_last = util.read_dicom(dcm_paths[-1])
        if 'SliceThickness' in dcm_last:
            self.dcm_thicknesses.append(dcm_last.SliceThickness)
        self.dcm_thicknesses = list(set(self.dcm_thicknesses))

        # Record scanner manufacturer
        if self.scanner_make is None and 'Manufacturer' in dcm:
            self.scanner_make = str(dcm.Manufacturer).lower()

        # Save mask path if mask exists
        aneurysm_mask_path = os.path.join(self.dcm_dir, 'aneurysm_mask.npy')
        if os.path.exists(aneurysm_mask_path):
            self.aneurysm_mask_path = aneurysm_mask_path
        brain_mask_path = os.path.join(self.dcm_dir, 'brain_mask.npy')
        if os.path.exists(brain_mask_path):
            self.brain_mask_path = brain_mask_path

        dcm_scan_num = None  # First number in name IM-####-####.dcm
def restructure_directory(input_dir, output_dir, json_path):
    with open(json_path, 'r') as json_file:
        dcm_dict = json.load(json_file)

    for acc, series_dict in tqdm(dcm_dict.items()):
        folder = os.path.join(input_dir, acc)
        subfolder = [s for s in os.listdir(folder) if s.startswith('ST')]
        assert len(subfolder) == 1, "Multiple subfolders present in {}.".format(acc)
        subfolder = subfolder[0]

        if len(series_dict) > 1:
            continue

        try:
            for series_acq, inst_num in series_dict.items():

                dcm = util.read_dicom(os.path.join(folder, subfolder, series_dict[series_acq]['1'][0]))
                description = dcm.SeriesDescription.replace('/', ' ')
                Path(os.path.join(output_dir, acc, description)).mkdir(parents=True, exist_ok=True)

                series_num = series_acq.split('_')[0]
                if len(inst_num) < 10:
                    continue
                    
                for i in range(1, len(inst_num) + 1):
                    source_path = os.path.join(folder, subfolder, inst_num[str(i)][0])
                    dest_path = os.path.join(output_dir, acc, description, inst_num[str(i)][0])
                    shutil.copy(source_path, dest_path)
                    os.rename(dest_path,
                              os.path.join(output_dir, acc, description,
                                           'IM-' + series_num.zfill(4) + '-' + str(i).zfill(4) + '.dcm'))
        except:
            util.print_err('Error occurred while copying {}. Skipping...'.format(acc))
            continue
Example #4
0
def save_mask_npy(input_dir, reconvert, flip):
    """Convert mask NIfTI files into Numpy arrays, and save in .npy format.

    Args:
        input_dir: Directory containing mask.nii.gz files.
    """
    for dir_path, _, filenames in tqdm(list(os.walk(input_dir))):
        mask_list = [
            f for f in filenames
            if f.endswith('.nii.gz') and not f.startswith('.')
        ]
        if len(mask_list) == 0:
            continue
        if not reconvert and len(
            [f for f in filenames if f.endswith('mask.npy')]) > 0:
            continue
        if len(mask_list) > 1:
            raise RuntimeError('{} mask files in {}.'.format(
                len(mask_list), dir_path))
        mask_filename = mask_list[0]
        mask_file = nib.load(os.path.join(dir_path, mask_filename))
        mask_header = mask_file.header
        mask_array = mask_file.get_data()
        if mask_header['sform_code'] <= 0 or mask_header['srow_x'][0] > 0:
            raise RuntimeError(
                'Check header and x-y axis for mask file in {}.'.format(
                    dir_path))

        dcm_files = sorted([d for d in filenames if d.endswith('.dcm')])
        num_dcms = len(dcm_files)
        if mask_array.shape[2] != num_dcms:
            raise RuntimeError(
                'Mask file has {} slices, but {} dicoms present in folder {}.'.
                format(mask_array.shape[2], num_dcms, dir_path))
        mask_array = mask_array.transpose(1, 0, 2)

        if flip:
            dcm_first = util.read_dicom(os.path.join(dir_path, dcm_files[0]))
            dcm_second = util.read_dicom(os.path.join(dir_path, dcm_files[1]))
            if dcm_second.ImagePositionPatient[
                    2] - dcm_first.ImagePositionPatient[2] < 0:
                mask_array = np.flip(mask_array, axis=2)
                print('Flipped mask for series {}.'.format(dir_path))

        mask_path = os.path.join(dir_path, 'mask.npy')
        np.save(mask_path, mask_array)
Example #5
0
def generate_nifti(args):
    """Generate binary masks from model output and convert them to NIfTI format."""
    mask_list = [m for m in os.listdir(args.mask_dir) if m.endswith('.npy')]
    dset_nums, file_suffix = zip(*[m.split('_') for m in mask_list])
    save_dir = os.path.join(args.save_dir, 'niftis_threshold_{}'.format(args.threshold))
    os.makedirs(save_dir, exist_ok=True)

    with open(args.pkl_path, 'rb') as pkl_file:
        series_list = pickle.load(pkl_file)

    for num, suffix in tqdm(zip(dset_nums, file_suffix)):
        output = np.load(os.path.join(args.mask_dir, num + '_' + suffix))
        series = None
        for s in series_list:
            if s.dset_path == '/series/' + num:
                series = s
        assert series is not None, 'Could not find series {} in series list pickle file.'.format(num)
        dcm_list = sorted([d for d in os.listdir(series.dcm_dir) if d.endswith('.dcm')])
        assert len(dcm_list) == output.shape[0], \
            '{} dcm files in directory, but mask dimension is {}.'.format(len(dcm_list), output.shape)
        dcm_first = util.read_dicom(os.path.join(series.dcm_dir, dcm_list[0]))
        dcm_second = util.read_dicom(os.path.join(series.dcm_dir, dcm_list[1]))

        affine_x = -dcm_first.PixelSpacing[0] * dcm_first.ImageOrientationPatient[0]
        affine_y = -dcm_first.PixelSpacing[1] * dcm_first.ImageOrientationPatient[4]
        affine_z = dcm_second.ImagePositionPatient[2] - dcm_first.ImagePositionPatient[2]

        # Define affine matrix
        affine = [[affine_x, 0., 0., -dcm_first.ImagePositionPatient[0]],
                  [0., affine_y, 0., -dcm_first.ImagePositionPatient[1]],
                  [0., 0., affine_z, dcm_first.ImagePositionPatient[2]],
                  [0., 0., 0., 1.]]
        affine = np.array(affine, dtype=np.float_)

        # Fix dimension of mask to match dicoms and threshold probabilities
        output = np.transpose(output, (2, 1, 0))
        if not series.is_bottom_up:
            output = np.flip(output, axis=2)
        output = (output >= args.threshold).astype(np.float_)

        # Create NIfTI file and save
        nifti_out = nib.Nifti1Image(output, affine=affine)
        filename = '{}_{}_output.nii.gz'.format(num, series.study_name)
        nib.save(nifti_out, os.path.join(save_dir, filename))
def sort_normals(input_dir, slice_thickness, json_path):
    dcms = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))

    for dir_path, _, filenames in os.walk(input_dir):
        for file in tqdm(filenames):
            try:
                dcm = util.read_dicom(os.path.join(dir_path,file))
                thickness = float(dcm.SliceThickness)
                series_num = str(dcm.SeriesNumber)
                acq_num = str(dcm.AcquisitionNumber)
                inst_num = str(dcm.InstanceNumber)
                folder = os.path.basename(Path(dir_path).parents[0])
            except:
                continue
            if thickness == slice_thickness:
                dcms[folder][series_num + '_' + acq_num][inst_num].append(file)

    with open(json_path, 'w') as json_file:
        json.dump(dcms, json_file, indent=4, sort_keys=True)
Example #7
0
def get_series_numbers(args):
    with open(os.path.join(args.output_dir, 'dir2type.json'), 'r') as json_fh:
        dir2type = json.load(json_fh)
    df = pd.read_csv(args.input_csv)

    for i, row in df.iterrows():
        series_dir = os.path.join(args.data_dir, str(row['Acc']))
        if os.path.exists(series_dir):
            print('Found at {}'.format(series_dir))
            for subdir in os.listdir(series_dir):
                if subdir not in dir2type:
                    while True:
                        try:
                            input_num = int(
                                input('{} (0=contrast, 1=other)?\n>>> '.format(
                                    subdir)))
                            if input_num == 0 or input_num == 1:
                                break
                        except ValueError:
                            continue
                    dir2type[
                        subdir] = 'contrast' if input_num == 0 else 'non_contrast'

                if dir2type[subdir] == 'contrast':
                    print('{} is contrast'.format(subdir))
                    dcm_dir = os.path.join(series_dir, subdir)
                    dcm_names = [
                        f for f in os.listdir(dcm_dir) if f.endswith('.dcm')
                    ]
                    dcm = util.read_dicom(os.path.join(dcm_dir, dcm_names[0]))
                    df.loc[i, 'CTA se'] = int(dcm.SeriesNumber)

    # Write CSV and dir2type mapping
    util.print_err('Dumping CSV file...')
    df.to_csv(os.path.join(args.output_dir, 'updated_annotations.csv'))
    util.print_err('Dumping JSON file...')
    with open(os.path.join(args.output_dir, 'dir2type.json'), 'w') as json_fh:
        json.dump(dir2type,
                  json_fh,
                  indent=4,
                  sort_keys=True,
                  default=util.json_encoder)
def create_hdf5(series_list, output_dir, resample=False, max_series=1e5):
    hdf5_fh = h5py.File(os.path.join(output_dir, 'data.hdf5'), 'a')
    for group_name in ('series', 'aneurysm_masks'):
        if group_name not in hdf5_fh:
            hdf5_fh.create_group('/{}'.format(group_name))

    assert len(series_list) < 1e5, 'Too many series for 5-digit IDs.'
    for i, s in enumerate(series_list):
        if i >= max_series:
            break
        dset_path = '/series/{:05d}'.format(i + 1)
        if dset_path in hdf5_fh:
            continue
        print('Processing series {} from study {}...'.format(
            s.series_number, s.study_name))
        pixel_arrays = []
        is_valid_series = True
        for slice_name in tqdm(s.slice_names, total=len(s), unit=' slices'):
            # Process and write slices
            dcm_path = os.path.join(s.dcm_dir, slice_name + '.dcm')
            dcm = util.read_dicom(dcm_path)
            try:
                pixel_arrays.append(util.dcm_to_raw(dcm))
            except NotImplementedError:
                print('Unsupported image format, not converting study: {}'.
                      format(s.study_name))
                is_valid_series = False
                break
        if not is_valid_series:
            continue

        volume = np.stack(pixel_arrays)

        aneurysm_mask_path = os.path.join(s.dcm_dir, 'aneurysm_mask.npy')
        if os.path.exists(aneurysm_mask_path):
            s.aneurysm_mask_path = aneurysm_mask_path
            aneurysm_mask = np.transpose(np.load(s.aneurysm_mask_path),
                                         [2, 0, 1])
        else:
            s.aneurysm_mask_path = None
            aneurysm_mask = None

        assert aneurysm_mask is None or aneurysm_mask.shape == volume.shape, \
            'Mismatched aneurysm mask and volume shapes: {} and {}'.format(aneurysm_mask.shape, volume.shape)
        if len(s) > 0 and resample:
            util.print_err('Resampling volume... Shape before: {}'.format(
                volume.shape))
            tick = time.time()
            dcm = util.read_dicom(
                os.path.join(s.dcm_dir, s.slice_names[0] + '.dcm'))
            volume, real_scale = util.resample(volume, dcm.SliceThickness,
                                               dcm.PixelSpacing, (1.5, 1., 1.))
            util.print_err('Shape after: {}. Resample took {} s.'.format(
                volume.shape,
                time.time() - tick))
            if aneurysm_mask is not None:
                util.print_err(
                    'Resampling mask... Shape before: {}, count before: {}.'.
                    format(aneurysm_mask.shape, np.sum(aneurysm_mask > 0)))
                tick = time.time()
                aneurysm_mask, mask_scale = util.resample(
                    aneurysm_mask, dcm.SliceThickness, dcm.PixelSpacing,
                    (1.5, 1., 1.))
                util.print_err(
                    'Mask shape after: {}, count after: {}. Resample took {} s.'
                    .format(aneurysm_mask.shape, np.sum(aneurysm_mask > 0),
                            time.time() - tick))
                if not aneurysm_mask.any():
                    raise RuntimeError(
                        'Mask has zero volume after resampling.')

                if s.is_aneurysm:
                    # Recompute slice numbers where the aneurysm lives
                    s.aneurysm_bounds = get_aneurysm_range(aneurysm_mask)
                    s.aneurysm_ranges = [s.aneurysm_bounds]
                    s.absolute_range = [0, aneurysm_mask.shape[0]]

        # Create one dataset for the volume (int16), one for the mask (bool)
        s.dset_path = dset_path
        hdf5_fh.create_dataset(s.dset_path,
                               data=volume,
                               dtype='i2',
                               chunks=True)

        if aneurysm_mask is not None:
            s.aneurysm_mask_path = '/aneurysm_masks/{:05d}'.format(i + 1)
            hdf5_fh.create_dataset(s.aneurysm_mask_path,
                                   data=aneurysm_mask,
                                   dtype='?',
                                   chunks=True)

    # Print summary
    util.print_err('Series: {}'.format(len(hdf5_fh['/series'])))
    util.print_err('Aneurysm Masks: {}'.format(len(
        hdf5_fh['/aneurysm_masks'])))

    # Dump pickle and JSON (updated dset_path and mask_path attributes)
    util.print_err('Dumping pickle file...')
    with open(os.path.join(output_dir, 'series_list.pkl'), 'wb') as pkl_fh:
        pickle.dump(series_list, pkl_fh)
    util.print_err('Dumping JSON file...')
    with open(os.path.join(output_dir, 'series_list.json'), 'w') as json_file:
        json.dump([dict(series) for series in series_list],
                  json_file,
                  indent=4,
                  sort_keys=True,
                  default=util.json_encoder)

    # Clean up
    hdf5_fh.close()