Beispiel #1
0
def read_dicomdir(filename="DICOMDIR"):
    """Read a DICOMDIR file and return a DicomDir instance.

    This is a wrapper around dcmread, which gives a default file name.

    Parameters
    ----------
    filename : str, optional
        Full path and name to DICOMDIR file to open

    Returns
    -------
    DicomDir

    Raises
    ------
    InvalidDicomError
        Raised if filename is not a DICOMDIR file.
    """
    # dcmread will return a DicomDir instance if file is one.

    # Read the file as usual.
    ds = dcmread(filename)
    # Here, check that it is in fact DicomDir
    if not isinstance(ds, DicomDir):
        msg = u"File '{0}' is not a Media Storage Directory file".format(
            filename)
        raise InvalidDicomError(msg)
    return ds
    def __init__(self, filename_or_obj, dataset, preamble=None, file_meta=None,
                 is_implicit_VR=True, is_little_endian=True):
        """Initialize a DICOMDIR dataset read from a DICOM file
        Carries forward all the initialization from FileDataset class

        :param filename: full path and filename to the file. Use None if is a BytesIO.
        :param dataset: some form of dictionary, usually a Dataset from read_dataset()
        :param preamble: the 128-byte DICOM preamble
        :param file_meta: the file meta info dataset, as returned by _read_file_meta,
                or an empty dataset if no file meta information is in the file
        :param is_implicit_VR: True if implicit VR transfer syntax used; False if explicit VR. Default is True.
        :param is_little_endian: True if little-endian transfer syntax used; False if big-endian. Default is True.
        """
        # Usually this class is created through filereader.read_partial,
        # and it checks class SOP, but in case of direct creation,
        # check here also
        if file_meta:
            class_uid = file_meta.MediaStorageSOPClassUID
            if not class_uid == "Media Storage Directory Storage":
                msg = "SOP Class is not Media Storage Directory (DICOMDIR)"
                raise InvalidDicomError(msg)
        FileDataset.__init__(self, filename_or_obj, dataset,
                             preamble, file_meta,
                             is_implicit_VR=True, is_little_endian=True)
        self.parse_records()
Beispiel #3
0
def read_dicomdir(filename: PathType = "DICOMDIR") -> DicomDir:
    """Read a DICOMDIR file and return a :class:`~pydicom.dicomdir.DicomDir`.

    This is a wrapper around :func:`dcmread` which gives a default file name.

    .. deprecated:: 2.1

        ``read_dicomdir()`` is deprecated and will be removed in v3.0. Use
        :func:`~pydicom.filereader.dcmread` instead.

    Parameters
    ----------
    filename : str, optional
        Full path and name to DICOMDIR file to open

    Returns
    -------
    DicomDir

    Raises
    ------
    InvalidDicomError
        Raised if filename is not a DICOMDIR file.
    """
    warnings.warn(
        "'read_dicomdir()' is deprecated and will be removed in v3.0, use "
        "'dcmread()' instead", DeprecationWarning)

    str_or_obj = path_from_pathlike(filename)
    ds = dcmread(str_or_obj)
    if not isinstance(ds, DicomDir):
        raise InvalidDicomError(
            f"File '{filename!r}' is not a Media Storage Directory file")

    return ds
Beispiel #4
0
def read_preamble(fp: BinaryIO, force: bool) -> Optional[bytes]:
    """Return the 128-byte DICOM preamble in `fp` if present.

    `fp` should be positioned at the start of the file-like. If the preamble
    and prefix are found then after reading `fp` will be positioned at the
    first byte after the prefix (byte offset 133). If either the preamble or
    prefix are missing and `force` is ``True`` then after reading `fp` will be
    positioned at the start of the file-like.

    Parameters
    ----------
    fp : file-like object
        The file-like to read the preamble from.
    force : bool
        Flag to force reading of a file even if no header is found.

    Returns
    -------
    preamble : bytes or None
        The 128-byte DICOM preamble will be returned if the appropriate prefix
        ('DICM') is found at byte offset 128. Returns ``None`` if the 'DICM'
        prefix is not found and `force` is ``True``.

    Raises
    ------
    InvalidDicomError
        If `force` is ``False`` and no appropriate header information found.

    Notes
    -----
    Also reads past the 'DICM' marker. Rewinds file to the beginning if
    no header found.
    """
    logger.debug("Reading File Meta Information preamble...")
    preamble = fp.read(128)
    if config.debugging:
        sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:])
        logger.debug(f"{fp.tell() - 128:08x}: {sample}")

    logger.debug("Reading File Meta Information prefix...")
    magic = fp.read(4)
    if magic != b"DICM" and force:
        logger.info(
            "File is not conformant with the DICOM File Format: 'DICM' "
            "prefix is missing from the File Meta Information header "
            "or the header itself is missing. Assuming no header and "
            "continuing.")
        fp.seek(0)
        return None

    if magic != b"DICM" and not force:
        raise InvalidDicomError(
            "File is missing DICOM File Meta Information header or the 'DICM' "
            "prefix is missing from the header. Use force=True to force "
            "reading.")
    else:
        logger.debug(f"{fp.tell() - 4:08x}: 'DICM' prefix found")

    return preamble
Beispiel #5
0
def getattr_required(dataset, name):
    '''
    Helper function that should be used when accessing a required DICOM
    attribute, which should raise our standard exception upon a failure.
    '''
    try:
        return getattr(dataset, name)
    except AttributeError:
        raise InvalidDicomError(f"Missing required DICOM attribute {name}")
Beispiel #6
0
    def __init__(self,
                 filename_or_obj,
                 dataset,
                 preamble=None,
                 file_meta=None,
                 is_implicit_VR=True,
                 is_little_endian=True):
        """Initialize a DICOMDIR dataset read from a DICOM file.

        Carries forward all the initialization from
        :class:`~pydicom.dataset.FileDataset`

        Parameters
        ----------
        filename_or_obj : str or None
            Full path and filename to the file of ``None`` if
            :class:`io.BytesIO`.
        dataset : dataset.Dataset
            Some form of dictionary, usually a
            :class:`~pydicom.dataset.FileDataset` from
            :func:`~pydicom.filereader.dcmread`.
        preamble : bytes
            The 128-byte DICOM preamble.
        file_meta : dataset.Dataset
            The file meta :class:`~pydicom.dataset.Dataset`, such as
            the one returned by
            :func:`~pydicom.filereader.read_file_meta_info`, or an empty
            :class:`~pydicom.dataset.Dataset` if no file meta information is
            in the file.
        is_implicit_VR : bool
            ``True`` if implicit VR transfer syntax used (default); ``False``
            if explicit VR.
        is_little_endian : bool
            ``True`` if little endian transfer syntax used (default); ``False``
            if big endian.
        """
        # Usually this class is created through filereader.read_partial,
        # and it checks class SOP, but in case of direct creation,
        # check here also
        if file_meta:
            class_uid = file_meta.MediaStorageSOPClassUID
            if not class_uid.name == "Media Storage Directory Storage":
                msg = "SOP Class is not Media Storage Directory (DICOMDIR)"
                raise InvalidDicomError(msg)
        FileDataset.__init__(self,
                             filename_or_obj,
                             dataset,
                             preamble,
                             file_meta,
                             is_implicit_VR=is_implicit_VR,
                             is_little_endian=is_little_endian)
        self.parse_records()
Beispiel #7
0
def _is_implicit_vr(fp, implicit_vr_is_assumed, is_little_endian, stop_when):
    """Check if the real VR is explicit or implicit.

    Parameters
    ----------
    fp : an opened file object
    implicit_vr_is_assumed : bool
        True if implicit VR is assumed.
        If this does not match with the real transfer syntax, a user warning
        will be issued.
    is_little_endian : bool
        True if file has little endian transfer syntax.
        Needed to interpret the first tag.
    stop_when : None, optional
        Optional call_back function which can terminate reading.
        Needed to check if the next tag still belongs to the read dataset.

    Returns
    -------
    True if implicit VR is used, False otherwise.
    """
    tag_bytes = fp.read(4)
    vr = fp.read(2)
    if len(vr) < 2:
        return implicit_vr_is_assumed

    # it is sufficient to check if the VR is in valid ASCII range, as it is
    # extremely unlikely that the tag length accidentally has such a
    # representation - this would need the first tag to be longer than 16kB
    # (e.g. it should be > 0x4141 = 16705 bytes)
    vr1 = ord(vr[0]) if in_py2 else vr[0]
    vr2 = ord(vr[1]) if in_py2 else vr[1]
    found_implicit = not (0x40 < vr1 < 0x5B and 0x40 < vr2 < 0x5B)

    if found_implicit != implicit_vr_is_assumed:
        # first check if the tag still belongs to the dataset if stop_when
        # is given - if not, the dataset is empty and we just return
        endian_chr = "<" if is_little_endian else ">"
        tag = TupleTag(unpack(endian_chr + "HH", tag_bytes))
        if stop_when is not None and stop_when(tag, vr, 0):
            return found_implicit

        # got to the real problem - warn or raise depending on config
        found_vr = 'implicit' if found_implicit else 'explicit'
        expected_vr = 'implicit' if not found_implicit else 'explicit'
        message = ('Expected {0} VR, but found {1} VR - using {1} VR for '
                   'reading'.format(expected_vr, found_vr))
        if config.enforce_valid_values:
            raise InvalidDicomError(message)
        warnings.warn(message, UserWarning)
    return found_implicit
Beispiel #8
0
def read_preamble(fp, force):
    """Return the 128-byte DICOM preamble in `fp` if present.

    Parameters
    ----------
    fp : file-like object
        The file-like to read the preamble from.
    force : bool
        Flag to force reading of a file even if no header is found.

    Returns
    -------
    preamble : str/bytes or None
        The 128-byte DICOM preamble will be returned if the appropriate prefix
        ('DICM') is found at byte offset 128. Returns None if the 'DICM' prefix
        is not found and `force` is True.

    Raises
    ------
    InvalidDicomError
        If `force` is False and no appropriate header information found.

    Notes
    -----
    Also reads past the 'DICM' marker. Rewinds file to the beginning if
    no header found.
    """
    logger.debug("Reading preamble...")
    preamble = fp.read(0x80)
    if config.debugging:
        sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:])
        logger.debug("{0:08x}: {1}".format(fp.tell() - 0x80, sample))
    magic = fp.read(4)
    if magic != b"DICM":
        if force:
            logger.info(
                "File is not a conformant DICOM file; 'DICM' prefix is "
                "missing from the file header or the header is "
                "missing. Assuming no header and continuing.")
            preamble = None
            fp.seek(0)
        else:
            raise InvalidDicomError("File is missing DICOM header or 'DICM' "
                                    "prefix is missing from the header. Use "
                                    "force=True to force reading.")
    else:
        logger.debug("{0:08x}: 'DICM' prefix found".format(fp.tell() - 4))
    return preamble
Beispiel #9
0
    def parse_records(self):
        """Build the hierarchy of given directory records, and structure
        into Patient, Studies, Series, Images hierarchy.

        This is intended for initial read of file only,
        it will not reorganize correctly if records are changed.
        """

        # Define a helper function for organizing the records
        def get_siblings(record, map_offset_to_record):
            """Return a list of all siblings of the given directory record,
            including itself.
            """
            sibling_list = [record]
            current_record = record
            while ('OffsetOfTheNextDirectoryRecord' in current_record
                   and current_record.OffsetOfTheNextDirectoryRecord):
                offset_of_next = current_record.OffsetOfTheNextDirectoryRecord
                sibling = map_offset_to_record[offset_of_next]
                sibling_list.append(sibling)
                current_record = sibling
            return sibling_list

        # Build the mapping from file offsets to records
        records = self.DirectoryRecordSequence
        map_offset_to_record = {}
        for record in records:
            offset = record.seq_item_tell
            map_offset_to_record[offset] = record
        # logging.debug("Record offsets: " + map_offset_to_record.keys())

        # Find the children of each record
        for record in records:
            record.children = []
            if 'OffsetOfReferencedLowerLevelDirectoryEntity' in record:
                child_offset = (
                    record.OffsetOfReferencedLowerLevelDirectoryEntity)
                if child_offset:
                    child = map_offset_to_record[child_offset]
                    record.children = get_siblings(child, map_offset_to_record)

        self.patient_records = [
            record for record in records
            if getattr(record, 'DirectoryRecordType') == 'PATIENT'
        ]
        if not self.patient_records:
            raise InvalidDicomError('Missing PATIENT record(s) in DICOMDIR')
Beispiel #10
0
def read_preamble(fp, force):
    """Read and return the DICOM preamble.

    Parameters
    ----------
    fp : file-like object
    force : boolean
        Flag to force reading of a file even if no header is found.

    Returns
    -------
    preamble : DICOM preamble, None
        The DICOM preamble will be returned if appropriate
        header ('DICM') is found. Returns None if no header
        is found.

    Raises
    ------
    InvalidDicomError
        If force flag is false and no appropriate header information
        found.

    Notes
    -----
    Also reads past the 'DICM' marker. Rewinds file to the beginning if
    no header found.
    """
    logger.debug("Reading preamble...")
    preamble = fp.read(0x80)
    if config.debugging:
        sample = bytes2hex(preamble[:8]) + "..." + bytes2hex(preamble[-8:])
        logger.debug("{0:08x}: {1}".format(fp.tell() - 0x80, sample))
    magic = fp.read(4)
    if magic != b"DICM":
        if force:
            logger.info("File is not a standard DICOM file; 'DICM' header is "
                        "missing. Assuming no header and continuing")
            preamble = None
            fp.seek(0)
        else:
            raise InvalidDicomError("File is missing 'DICM' marker. "
                                    "Use force=True to force reading")
    else:
        logger.debug("{0:08x}: 'DICM' marker found".format(fp.tell() - 4))
    return preamble
Beispiel #11
0
def read_dicomdir(filename="DICOMDIR"):
    """Read a DICOMDIR file and return a :class:`~pydicom.dicomdir.DicomDir`.

    This is a wrapper around :func:`dcmread` which gives a default file name.

    .. deprecated:: 2.1

        ``read_dicomdir()`` is deprecated and will be removed in v3.0. Use
        :func:`~pydicom.filereader.dcmread` instead.

    Parameters
    ----------
    filename : str, optional
        Full path and name to DICOMDIR file to open

    Returns
    -------
    DicomDir

    Raises
    ------
    InvalidDicomError
        Raised if filename is not a DICOMDIR file.
    """
    # dcmread will return a DicomDir instance if file is one.
    warnings.warn(
        "'read_dicomdir()' is deprecated and will be removed in v3.0, use "
        "'dcmread()' instead",
        DeprecationWarning
    )

    # Read the file as usual.
    ds = dcmread(filename)
    # Here, check that it is in fact DicomDir
    if not isinstance(ds, DicomDir):
        msg = "File '{0}' is not a Media Storage Directory file".format(
            filename)
        raise InvalidDicomError(msg)
    return ds
Beispiel #12
0
 def _test():
     raise InvalidDicomError('test msg')
Beispiel #13
0
    def __init__(
        self,
        filename_or_obj: Union[str, os.PathLike, BinaryIO],
        dataset: Dataset,
        preamble: Optional[bytes] = None,
        file_meta: Optional[FileMetaDataset] = None,
        is_implicit_VR: bool = True,
        is_little_endian: bool = True,
    ) -> None:
        """Initialize a DICOMDIR dataset read from a DICOM file.

        Carries forward all the initialization from
        :class:`~pydicom.dataset.FileDataset`

        Parameters
        ----------
        filename_or_obj : str or PathLike or file-like or None
            Full path and filename to the file of ``None`` if
            :class:`io.BytesIO`.
        dataset : dataset.Dataset
            Some form of dictionary, usually a
            :class:`~pydicom.dataset.FileDataset` from
            :func:`~pydicom.filereader.dcmread`.
        preamble : bytes
            The 128-byte DICOM preamble.
        file_meta : dataset.Dataset
            The file meta :class:`~pydicom.dataset.Dataset`, such as
            the one returned by
            :func:`~pydicom.filereader.read_file_meta_info`, or an empty
            :class:`~pydicom.dataset.Dataset` if no file meta information is
            in the file.
        is_implicit_VR : bool
            ``True`` if implicit VR transfer syntax used (default); ``False``
            if explicit VR.
        is_little_endian : bool
            ``True`` if little endian transfer syntax used (default); ``False``
            if big endian.

        Raises
        ------
        InvalidDicomError
            If the file transfer syntax is not Little Endian Explicit and
            :func:`enforce_valid_values<pydicom.config.enforce_valid_values>`
            is ``True``.

        """
        # Usually this class is created through filereader.read_partial,
        # and it checks class SOP, but in case of direct creation,
        # check here also
        if file_meta:
            class_uid = file_meta.MediaStorageSOPClassUID
            if not class_uid.name == "Media Storage Directory Storage":
                msg = "SOP Class is not Media Storage Directory (DICOMDIR)"
                raise InvalidDicomError(msg)
        if is_implicit_VR or not is_little_endian:
            msg = ('Invalid transfer syntax for DICOMDIR - '
                   'Explicit Little Endian expected.')
            if config.enforce_valid_values:
                raise InvalidDicomError(msg)
            warnings.warn(msg, UserWarning)
        FileDataset.__init__(self,
                             filename_or_obj,
                             dataset,
                             preamble,
                             file_meta,
                             is_implicit_VR=is_implicit_VR,
                             is_little_endian=is_little_endian)

        self.patient_records: List[Dataset] = []
        self.parse_records()
Beispiel #14
0
def _is_implicit_vr(
    fp: BinaryIO,
    implicit_vr_is_assumed: bool,
    is_little_endian: bool,
    stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]],
    is_sequence: bool
) -> bool:
    """Check if the real VR is explicit or implicit.

    Parameters
    ----------
    fp : an opened file object
    implicit_vr_is_assumed : bool
        True if implicit VR is assumed.
        If this does not match with the real transfer syntax, a user warning
        will be issued.
    is_little_endian : bool
        True if file has little endian transfer syntax.
        Needed to interpret the first tag.
    stop_when : None, optional
        Optional call_back function which can terminate reading.
        Needed to check if the next tag still belongs to the read dataset.
    is_sequence : bool
        True if called for a sequence, False for a top-level dataset.

    Returns
    -------
    True if implicit VR is used, False otherwise.
    """
    # sequences do not switch from implicit to explicit encoding,
    # but they are allowed to use implicit encoding if the dataset
    # is encoded as explicit VR
    if is_sequence and implicit_vr_is_assumed:
        return True

    tag_bytes = fp.read(4)
    raw_vr = fp.read(2)
    if len(raw_vr) < 2:
        return implicit_vr_is_assumed

    # it is sufficient to check if the VR is in valid ASCII range, as it is
    # extremely unlikely that the tag length accidentally has such a
    # representation - this would need the first tag to be longer than 16kB
    # (e.g. it should be > 0x4141 = 16705 bytes)
    found_implicit = not (0x40 < raw_vr[0] < 0x5B and 0x40 < raw_vr[1] < 0x5B)
    if found_implicit != implicit_vr_is_assumed:
        # first check if the tag still belongs to the dataset if stop_when
        # is given - if not, the dataset is empty and we just return
        endian_chr = "<" if is_little_endian else ">"
        tag = _unpack_tag(tag_bytes, endian_chr)
        vr = raw_vr.decode(default_encoding)
        if stop_when is not None and stop_when(tag, vr, 0):
            return found_implicit

        # sequences with undefined length can be encoded in implicit VR,
        # see PS 3.5, section 6.2.2
        if found_implicit and is_sequence:
            return True

        # got to the real problem - warn or raise depending on config
        found_vr = 'implicit' if found_implicit else 'explicit'
        expected_vr = 'implicit' if not found_implicit else 'explicit'
        msg = f"Expected {expected_vr} VR, but found {found_vr} VR"
        if config.enforce_valid_values:
            raise InvalidDicomError(msg)

        warnings.warn(msg + f" - using {found_vr} VR for reading", UserWarning)

    return found_implicit
    def ingest(self, input_file, skip_contours=False):
        '''
            Load RT Struct DICOM from input_file and output intermediate format
            :param input_file: Path to the dicom rt-struct file
            :return: multidimensional array with ROI(s)
        '''

        try:
            rt_struct_image = pydicom.read_file(input_file)

            if not hasattr(rt_struct_image, 'StructureSetROISequence'):
                raise InvalidDicomError()

        except (IsADirectoryError, InvalidDicomError):
            raise InvalidFileFormatException(
                'File {} is not an rt-struct dicom'.format(input_file))

        # lets extract the ROI(s) and dcmrtstruct2nii it to an intermediate format

        contours = []  # this var will hold the contours

        # first create a map so that we can easily trace referenced_roi_number back to its metadata
        metadata_mappings = {}
        for contour_metadata in rt_struct_image.StructureSetROISequence:
            metadata_mappings[contour_metadata.ROINumber] = contour_metadata

        for contour_sequence in rt_struct_image.ROIContourSequence:
            contour_data = {}

            metadata = metadata_mappings[
                contour_sequence.ReferencedROINumber]  # retrieve metadata

            # I'm not sure if these attributes are always present in the metadata and contour_sequence
            # so I decided to write this in a defensive way.

            if hasattr(metadata, 'ROIName'):
                contour_data['name'] = metadata.ROIName

            if hasattr(metadata, 'ROINumber'):
                contour_data['roi_number'] = metadata.ROINumber

            if hasattr(metadata, 'ReferencedFrameOfReferenceUID'):
                contour_data[
                    'referenced_frame'] = metadata.ReferencedFrameOfReferenceUID

            if hasattr(contour_sequence, 'ROIDisplayColor') and len(
                    contour_sequence.ROIDisplayColor) > 0:
                contour_data[
                    'display_color'] = contour_sequence.ROIDisplayColor

            if not skip_contours and hasattr(
                    contour_sequence, 'ContourSequence') and len(
                        contour_sequence.ContourSequence) > 0:
                contour_data['sequence'] = []
                for contour in contour_sequence.ContourSequence:
                    contour_data['sequence'].append({
                        'type': (contour.ContourGeometricType if hasattr(
                            contour, 'ContourGeometricType') else 'unknown'),
                        'points': {
                            'x': (
                                [
                                    contour.ContourData[index]
                                    for index in range(
                                        0, len(contour.ContourData), 3)
                                ] if hasattr(contour, 'ContourData') else None
                            ),  # this is just a fancy way to separate x, y, z from the rtstruct point array
                            'y': (
                                [
                                    contour.ContourData[index + 1]
                                    for index in range(
                                        0, len(contour.ContourData), 3)
                                ]
                                if hasattr(contour, 'ContourData') else None
                            ),  # this is just a fancy way to separate x, y, z from the rtstruct point array
                            'z': (
                                [
                                    contour.ContourData[index + 2]
                                    for index in range(
                                        0, len(contour.ContourData), 3)
                                ] if hasattr(contour, 'ContourData') else None
                            )  # this is just a fancy way to separate x, y, z from the rtstruct point array
                        }
                    })

            if contour_data:
                # only add contour if we successfully extracted (some) data
                contours.append(contour_data)

        return contours