Ejemplo n.º 1
0
def dcm_dir_2_numpy(input_folder,
                    verbose=False,
                    harden_orientation=False,
                    return_all=False):
    """ Uses pydicom to stack an alphabetical list of DICOM files. TODO: Make it
        take slice_order into account.
    """

    if verbose:
        print('Searching for dicom files...')

    found_files = grab_files_recursive(input_folder)

    if verbose:
        print(('Found', len(found_files), 'in directory. \n'))
        print('Checking DICOM compatability...')

    dicom_files = []
    for file in found_files:
        try:
            temp_dicom = pydicom.read_file(file)
            dicom_files += [[
                file, temp_dicom.data_element('SeriesInstanceUID').value
            ]]
        except:
            continue

    if verbose:
        print(('Found', len(dicom_files), 'DICOM files in directory. \n'))
        print('Counting volumes..')

    unique_dicoms = defaultdict(list)
    for dicom_file in dicom_files:
        UID = dicom_file[1]
        unique_dicoms[UID] += [dicom_file[0]]

    if verbose:
        print(('Found', len(list(unique_dicoms.keys())), 'unique volumes \n'))
        print('Saving out files from these volumes.')

    for UID in list(unique_dicoms.keys()):

        # Bad behavior: Currently outputs first DICOM found.
        # Unsure about error-checking with DICOM.

        # if True:
        try:
            # Grab DICOMs for a certain Instance
            current_files = unique_dicoms[UID]
            current_dicoms = [
                pydicom.read_file(dcm) for dcm in unique_dicoms[UID]
            ]

            # Sort DICOMs by Instance.
            dicom_instances = [
                x.data_element('InstanceNumber').value for x in current_dicoms
            ]
            current_dicoms = [
                x for _, x in sorted(zip(dicom_instances, current_dicoms))
            ]
            current_files = [
                x for _, x in sorted(zip(dicom_instances, current_files))
            ]
            first_dicom, last_dicom = current_dicoms[0], current_dicoms[-1]

            if verbose:
                print(('Loading...', input_folder))

        except:
            print(
                'Could not read DICOM volume SeriesDescription. Skipping UID...',
                str(UID))
            continue

        try:
            # Extract patient position information for affine creation.
            output_affine = np.eye(4)
            image_position_patient = np.array(
                first_dicom.data_element('ImagePositionPatient').value).astype(
                    float)
            image_orientation_patient = np.array(
                first_dicom.data_element(
                    'ImageOrientationPatient').value).astype(float)
            last_image_position_patient = np.array(
                last_dicom.data_element('ImagePositionPatient').value).astype(
                    float)
            pixel_spacing_patient = np.array(
                first_dicom.data_element('PixelSpacing').value).astype(float)

            # Create DICOM Space affine (don't fully understand, TODO)
            output_affine[
                0:3,
                0] = pixel_spacing_patient[0] * image_orientation_patient[0:3]
            output_affine[
                0:3,
                1] = pixel_spacing_patient[1] * image_orientation_patient[3:6]
            output_affine[
                0:3,
                2] = (image_position_patient -
                      last_image_position_patient) / (1 - len(current_dicoms))
            output_affine[0:3, 3] = image_position_patient

            # Transformations from DICOM to Nifti Space (don't fully understand, TOO)
            cr_flip = np.eye(4)
            cr_flip[0:2, 0:2] = [[0, 1], [1, 0]]
            neg_flip = np.eye(4)
            neg_flip[0:2, 0:2] = [[-1, 0], [0, -1]]
            output_affine = np.matmul(neg_flip,
                                      np.matmul(output_affine, cr_flip))

            # Create numpy array data...
            output_numpy = []
            for i in range(len(current_dicoms)):
                try:
                    output_numpy += [
                        get_dicom_pixel_array(current_dicoms[i],
                                              current_files[i])
                    ]
                except Exception as e:
                    print("{}".format(e.msg))
                    print(('Warning, error at slice', i, 'in folder',
                           input_folder))
            output_numpy = np.stack(output_numpy, -1)

            # If preferred, harden to identity matrix space (LPS, maybe?)
            # Also unsure of the dynamic here, but they work.
            if harden_orientation:

                cx, cy, cz = np.argmax(np.abs(output_affine[0:3, 0:3]), axis=0)

                output_numpy = np.transpose(output_numpy, (cx, cy, cz))

                harden_matrix = np.eye(4)
                for dim, i in enumerate([cx, cy, cz]):
                    harden_matrix[i, i] = 0
                    harden_matrix[dim, i] = 1
                output_affine = np.matmul(output_affine, harden_matrix)

                flip_matrix = np.eye(4)
                for i in range(3):
                    if output_affine[i, i] < 0:
                        flip_matrix[i, i] = -1
                        output_numpy = np.flip(output_numpy, i)

                output_affine = np.matmul(output_affine, flip_matrix)

            if return_all:
                return output_numpy, None, output_affine  # TODO provide DICOM tags without doubling memory
            else:
                return output_numpy

        except:
            print('Could not read DICOM at folder...', input_folder)
Ejemplo n.º 2
0
def parse_filepaths(data_collection,
                    data_group_dict,
                    case_list=None,
                    recursive=True,
                    verbose=True,
                    file_identifying_chars=None):
    """ Recursive functionality not yet available
    """

    # Case lists not yet implemented.

    # Pulling from multiple directories not yet implemented.
    lead_group = data_group_dict[list(data_group_dict.keys())[0]]

    if type(lead_group[0]) is list:
        lead_directory = os.path.abspath(lead_group[0][0])
    else:
        lead_directory = os.path.abspath(lead_group[0])

    lead_files = []

    for directory in lead_group:

        if os.path.isdir(os.path.normpath(directory)):
            directory = os.path.join(directory, '*')

        regex = os.path.basename(directory)

        lead_files += grab_files_recursive(os.path.abspath(
            os.path.dirname(directory)),
                                           regex=regex,
                                           recursive=recursive)

    for lead_filepath in lead_files:

        base_filedir = os.path.dirname(lead_filepath).split(lead_directory,
                                                            1)[1]
        base_filepath = nifti_splitext(lead_filepath)[0]

        if file_identifying_chars is not None:
            base_filepath = os.path.basename(
                os.path.join(
                    os.path.dirname(base_filepath),
                    os.path.basename(base_filepath)[:file_identifying_chars]))

        # Search for sequence files, and skip those missing with files modalities.
        for data_group, sequence_labels in list(
                data_collection.data_group_dict.items()):

            data_group_files = []

            for sequence in sequence_labels:

                target_file = glob.glob(
                    os.path.join(sequence, base_filedir, base_filepath + '*'))

                if len(target_file) == 1:
                    data_group_files.append(target_file[0])
                else:
                    print(('Error loading', sequence, 'from case',
                           lead_filepath))
                    if len(target_file) == 0:
                        print('No file found.\n')
                    else:
                        print('Multiple files found.\n')

            if len(data_group_files) == len(sequence_labels):
                data_collection.data_groups[data_group].add_case(
                    lead_filepath, data_group_files)
            else:
                lead_filepath = None
                break

        # This is ugh.
        if lead_filepath is not None:
            case_name = lead_filepath
            data_collection.cases.append(case_name)