Example #1
0
    def __setitem__(self, key, value):
        """Operator for dataset[key]=value. Check consistency, and deal with private tags"""
        if not isinstance(
                value,
            (DataElement,
             RawDataElement)):  # ok if is subclass, e.g. DeferredDataElement
            raise TypeError(
                "Dataset contents must be DataElement instances.\n"
                "To set a data_element value use data_element.value=val")
        tag = Tag(value.tag)
        if key != tag:
            raise ValueError("data_element.tag must match the dictionary key")

        data_element = value
        if tag.is_private:
            # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved
            logger.debug("Setting private tag %r" % tag)
            private_block = tag.elem >> 8
            private_creator_tag = Tag(tag.group, private_block)
            if private_creator_tag in self and tag != private_creator_tag:
                if isinstance(data_element, RawDataElement):
                    data_element = DataElement_from_raw(
                        data_element, self._character_set)
                data_element.private_creator = self[private_creator_tag].value
        dict.__setitem__(self, tag, data_element)
Example #2
0
    def test_valid_tag(self):
        """RawDataElement: conversion of known tag succeeds..."""
        raw = RawDataElement(Tag(0x00080020), 'DA', 8, b'20170101', 0, False,
                             True)
        element = DataElement_from_raw(raw, default_encoding)
        assert 'Study Date' == element.name
        assert 'DA' == element.VR
        assert '20170101' == element.value

        raw = RawDataElement(Tag(0x00080000), None, 4, b'\x02\x00\x00\x00', 0,
                             True, True)
        elem = DataElement_from_raw(raw, default_encoding)
        assert 'UL' == elem.VR
Example #3
0
    def testValidTag(self):
        """RawDataElement: conversion of known tag succeeds..."""
        raw = RawDataElement(Tag(0x00080020), 'DA', 8, b'20170101', 0, False,
                             True)
        element = DataElement_from_raw(raw, default_encoding)
        self.assertEqual(element.name, 'Study Date')
        self.assertEqual(element.VR, 'DA')
        self.assertEqual(element.value, '20170101')

        raw = RawDataElement(Tag(0x00080000), None, 4, b'\x02\x00\x00\x00', 0,
                             True, True)
        elem = DataElement_from_raw(raw, default_encoding)
        assert elem.VR == 'UL'
Example #4
0
    def __getitem__(self, key):
        """Operator for dataset[key] request."""
        tag = Tag(key)
        data_elem = dict.__getitem__(self, tag)

        if isinstance(data_elem, DataElement):
            return data_elem
        elif isinstance(data_elem, tuple):
            # If a deferred read, then go get the value now
            if data_elem.value is None:
                from pydicom.filereader import read_deferred_data_element
                data_elem = read_deferred_data_element(self.fileobj_type,
                                                       self.filename,
                                                       self.timestamp,
                                                       data_elem)

            if tag != (0x08, 0x05):
                character_set = self._character_set
            else:
                character_set = default_encoding
            # Not converted from raw form read from file yet; do so now
            self[tag] = DataElement_from_raw(data_elem,
                                             character_set,
                                             force=self._force)
        return dict.__getitem__(self, tag)
Example #5
0
 def testTagWithoutEncodingPython2(self):
     """RawDataElement: no encoding needed in Python 2."""
     raw = RawDataElement(Tag(0x00104000), 'LT', 23,
                          b'comment\\comment2\\comment3',
                          0, False, True)
     element = DataElement_from_raw(raw)
     self.assertEqual(element.name, 'Patient Comments')
Example #6
0
 def test_data_element_without_encoding(self):
     """RawDataElement: no encoding needed."""
     raw = RawDataElement(Tag(0x00104000), 'LT', 23,
                          b'comment\\comment2\\comment3',
                          0, False, True)
     element = DataElement_from_raw(raw)
     assert 'Patient Comments' == element.name
Example #7
0
    def test_invalid_tag_warning(self, allow_invalid_values):
        """RawDataElement: conversion of unknown tag warns..."""
        raw = RawDataElement(Tag(0x88880088), None, 4, b'unknown', 0, True,
                             True)

        with pytest.warns(UserWarning, match=r"\(8888, 0088\)"):
            element = DataElement_from_raw(raw)
            assert element.VR == 'UN'
Example #8
0
    def __setitem__(self, key, value):
        """Operator for Dataset[key] = value.

        Check consistency, and deal with private tags.

        Parameters
        ----------
        key : int
            The tag for the element to be added to the Dataset.
        value : pydicom.dataelem.DataElement or pydicom.dataelem.RawDataElement
            The element to add to the Dataset.

        Raises
        ------
        NotImplementedError
            If `key` is a slice.
        ValueError
            If the `key` value doesn't match DataElement.tag.
        """
        if isinstance(key, slice):
            raise NotImplementedError('Slicing is not supported for setting '
                                      'Dataset elements.')

        # OK if is subclass, e.g. DeferredDataElement
        if not isinstance(value, (DataElement, RawDataElement)):
            raise TypeError("Dataset contents must be DataElement instances.")
        if isinstance(value.tag, BaseTag):
            tag = value.tag
        else:
            tag = Tag(value.tag)
        if key != tag:
            raise ValueError("DataElement.tag must match the dictionary key")

        data_element = value
        if tag.is_private:
            # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved
            logger.debug("Setting private tag %r" % tag)
            private_block = tag.elem >> 8
            private_creator_tag = Tag(tag.group, private_block)
            if private_creator_tag in self and tag != private_creator_tag:
                if isinstance(data_element, RawDataElement):
                    data_element = DataElement_from_raw(
                        data_element, self._character_set)
                data_element.private_creator = self[private_creator_tag].value
        dict.__setitem__(self, tag, data_element)
Example #9
0
    def test_key_error(self):
        """RawDataElement: conversion of unknown tag throws KeyError..."""
        # raw data element -> tag VR length value
        #                       value_tell is_implicit_VR is_little_endian'
        # Unknown (not in DICOM dict), non-private, non-group 0 for this test
        raw = RawDataElement(Tag(0x88880002), None, 4, 0x1111, 0, True, True)

        with pytest.raises(KeyError, match=r"\(8888, 0002\)"):
            DataElement_from_raw(raw)
Example #10
0
 def test_little_endian_explicit(self):
     """Test reading little endian explicit VR data"""
     # (0010, 0010) PatientName PN 6 ABCDEF
     bytestream = (b'\x10\x00\x10\x00' b'PN' b'\x06\x00' b'ABCDEF')
     fp = BytesIO(bytestream)
     # fp, is_implicit_VR, is_little_endian,
     gen = data_element_generator(fp, False, True)
     elem = DataElement(0x00100010, 'PN', 'ABCDEF')
     assert elem == DataElement_from_raw(next(gen), 'ISO_IR 100')
Example #11
0
    def __setitem__(self, key, value):
        """Operator for Dataset[key] = value.

        Check consistency, and deal with private tags.

        Parameters
        ----------
        key : int
            The tag for the element to be added to the Dataset.
        value : pydicom.dataelem.DataElement or pydicom.dataelem.RawDataElement
            The element to add to the Dataset.

        Raises
        ------
        NotImplementedError
            If `key` is a slice.
        ValueError
            If the `key` value doesn't match DataElement.tag.
        """
        if isinstance(key, slice):
            raise NotImplementedError('Slicing is not supported for setting '
                                      'Dataset elements.')

        # OK if is subclass, e.g. DeferredDataElement
        if not isinstance(value, (DataElement, RawDataElement)):
            raise TypeError("Dataset contents must be DataElement instances.")
        tag = Tag(value.tag)
        if key != tag:
            raise ValueError("DataElement.tag must match the dictionary key")

        data_element = value
        if tag.is_private:
            # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved
            logger.debug("Setting private tag %r" % tag)
            private_block = tag.elem >> 8
            private_creator_tag = Tag(tag.group, private_block)
            if private_creator_tag in self and tag != private_creator_tag:
                if isinstance(data_element, RawDataElement):
                    data_element = DataElement_from_raw(
                        data_element, self._character_set)
                data_element.private_creator = self[private_creator_tag].value
        dict.__setitem__(self, tag, data_element)
Example #12
0
    def __setitem__(self, key, value):
        """Operator for dataset[key]=value. Check consistency, and deal with private tags"""
        if not isinstance(value, (DataElement, RawDataElement)):  # ok if is subclass, e.g. DeferredDataElement
            raise TypeError("Dataset contents must be DataElement instances.\n"
                            "To set a data_element value use data_element.value=val")
        tag = Tag(value.tag)
        if key != tag:
            raise ValueError("data_element.tag must match the dictionary key")

        data_element = value
        if tag.is_private:
            # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved
            logger.debug("Setting private tag %r" % tag)
            private_block = tag.elem >> 8
            private_creator_tag = Tag(tag.group, private_block)
            if private_creator_tag in self and tag != private_creator_tag:
                if isinstance(data_element, RawDataElement):
                    data_element = DataElement_from_raw(data_element, self._character_set)
                data_element.private_creator = self[private_creator_tag].value
        dict.__setitem__(self, tag, data_element)
Example #13
0
 def test_wrong_bytes_length_convert_to_UN(self, accept_wrong_length):
     """Check warning and behavior for incorrect number of raw bytes."""
     value = b'1'
     raw = RawDataElement(Tag(0x00190000), 'FD', 1, value, 0, False, True)
     msg = (
         r"Expected total bytes to be an even multiple of bytes per value. "
         r"Instead received b'1' with length 1 and struct format 'd' which "
         r"corresponds to bytes per value of 8. This occurred while trying "
         r"to parse \(0019, 0000\) according to VR 'FD'. "
         r"Setting VR to 'UN'.")
     with pytest.warns(UserWarning, match=msg):
         raw_elem = DataElement_from_raw(raw)
         assert 'UN' == raw_elem.VR
         assert value == raw_elem.value
Example #14
0
def correct_ambiguous_vr_element(
    elem: DataElement, ds: Dataset, is_little_endian: bool
) -> DataElement:
    """Attempt to correct the ambiguous VR element `elem`.

    When it's not possible to correct the VR, the element will be returned
    unchanged. Currently the only ambiguous VR elements not corrected for are
    all retired or part of DICONDE.

    If the VR is corrected and is 'US' or 'SS' then the value will be updated
    using the :func:`~pydicom.values.convert_numbers` function.

    Parameters
    ----------
    elem : dataelem.DataElement
        The element with an ambiguous VR.
    ds : dataset.Dataset
        The dataset containing `elem`.
    is_little_endian : bool
        The byte ordering of the values in the dataset.

    Returns
    -------
    dataelem.DataElement
        The corrected element
    """
    if 'or' in elem.VR:
        # convert raw data elements before handling them
        if isinstance(elem, RawDataElement):
            elem = DataElement_from_raw(elem, dataset=ds)
            ds.__setitem__(elem.tag, elem)

        try:
            _correct_ambiguous_vr_element(elem, ds, is_little_endian)
        except AttributeError as e:
            raise AttributeError(
                f"Failed to resolve ambiguous VR for tag {elem.tag}: " + str(e)
            )

    return elem
Example #15
0
def correct_ambiguous_vr_element(elem, ds, is_little_endian):
    """Attempt to correct the ambiguous VR element `elem`.

    When it's not possible to correct the VR, the element will be returned
    unchanged. Currently the only ambiguous VR elements not corrected for are
    all retired or part of DICONDE.

    If the VR is corrected and is 'US' or 'SS' then the value will be updated
    using the pydicom.values.convert_numbers() method.

    Parameters
    ----------
    elem : pydicom.dataelem.DataElement
        The element with an ambiguous VR.
    ds : pydicom.dataset.Dataset
        The dataset containing `elem`.
    is_little_endian : bool
        The byte ordering of the values in the dataset.

    Returns
    -------
    elem : pydicom.dataelem.DataElement
        The corrected element
    """
    if 'or' in elem.VR:
        # convert raw data elements before handling them
        if elem.is_raw:
            elem = DataElement_from_raw(elem)
            ds.__setitem__(elem.tag, elem)

        try:
            _correct_ambiguous_vr_element(elem, ds, is_little_endian)
        except AttributeError as e:
            reason = ('Failed to resolve ambiguous VR for tag'
                      ' {}: '.format(elem.tag)) + str(e)
            raise AttributeError(reason)

    return elem
Example #16
0
def read_dataset(fp,
                 is_implicit_VR,
                 is_little_endian,
                 bytelength=None,
                 stop_when=None,
                 defer_size=None,
                 parent_encoding=default_encoding,
                 specific_tags=None,
                 at_top_level=True):
    """Return a :class:`~pydicom.dataset.Dataset` instance containing the next
    dataset in the file.

    Parameters
    ----------
    fp : file-like
        An opened file-like object.
    is_implicit_VR : bool
        ``True`` if file transfer syntax is implicit VR.
    is_little_endian : bool
        ``True`` if file has little endian transfer syntax.
    bytelength : int, None, optional
        ``None`` to read until end of file or ItemDeliterTag, else a fixed
        number of bytes to read
    stop_when : None, optional
        Optional call_back function which can terminate reading. See help for
        :func:`data_element_generator` for details
    defer_size : int, None, optional
        Size to avoid loading large elements in memory. See :func:`dcmread` for
        more parameter info.
    parent_encoding :
        Optional encoding to use as a default in case (0008,0005) *Specific
        Character Set* isn't specified.
    specific_tags : list or None
        See :func:`dcmread` for parameter info.
    at_top_level: bool
        If dataset is top level (not within a sequence).
        Used to turn off explicit VR heuristic within sequences

    Returns
    -------
    dataset.Dataset
        A Dataset instance.

    See Also
    --------
    :class:`~pydicom.dataset.Dataset`
        A collection (dictionary) of DICOM
        :class:`~pydicom.dataelem.DataElement` instances.
    """
    raw_data_elements = dict()
    fp_start = fp.tell()
    if at_top_level:
        is_implicit_VR = _is_implicit_vr(fp, is_implicit_VR, is_little_endian,
                                         stop_when)
    fp.seek(fp_start)
    de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when, defer_size, parent_encoding,
                                    specific_tags)
    try:
        while (bytelength is None) or (fp.tell() - fp_start < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == BaseTag(0xFFFEE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        if config.enforce_valid_values:
            raise
        msg = str(details) + " in file " + getattr(fp, "name", "<no filename>")
        warnings.warn(msg, UserWarning)
    except NotImplementedError as details:
        logger.error(details)

    ds = Dataset(raw_data_elements)
    if 0x00080005 in raw_data_elements:
        char_set = DataElement_from_raw(raw_data_elements[0x00080005])
        encoding = convert_encodings(char_set)
    else:
        encoding = parent_encoding
    ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding)
    return ds
Example #17
0
 def test_default_for_reading_validation_mode(self):
     raw = RawDataElement(Tag(0x88880002), None, 4, b'unknown', 0, True,
                          True)
     with pytest.warns(UserWarning):
         DataElement_from_raw(raw)
Example #18
0
def correct_ambiguous_vr_element(elem, ds, is_little_endian):
    """Attempt to correct the ambiguous VR element `elem`.

    When it's not possible to correct the VR, the element will be returned
    unchanged. Currently the only ambiguous VR elements not corrected for are
    all retired or part of DICONDE.

    If the VR is corrected and is 'US' or 'SS' then the value will be updated
    using the pydicom.values.convert_numbers() method.

    Parameters
    ----------
    elem : pydicom.dataelem.DataElement
        The element with an ambiguous VR.
    ds : pydicom.dataset.Dataset
        The dataset containing `elem`.
    is_little_endian : bool
        The byte ordering of the values in the dataset.

    Returns
    -------
    elem : pydicom.dataelem.DataElement
        The corrected element
    """
    if 'or' in elem.VR:
        # convert raw data elements before handling them
        if elem.is_raw:
            elem = DataElement_from_raw(elem)
            ds.__setitem__(elem.tag, elem)

        # 'OB or OW': 7fe0,0010 PixelData
        if elem.tag == 0x7fe00010:
            # Compressed Pixel Data
            # PS3.5 Annex A.4
            #   If encapsulated, VR is OB and length is undefined
            if elem.is_undefined_length:
                elem.VR = 'OB'
            # Non-compressed Pixel Data - Implicit Little Endian
            # PS3.5 Annex A1: VR is always OW
            elif ds.is_implicit_VR:
                elem.VR = 'OW'
            else:
                # Non-compressed Pixel Data - Explicit VR
                # PS3.5 Annex A.2:
                # If BitsAllocated is > 8 then VR shall be OW,
                # else may be OB or OW.
                # If we get here, the data has not been written before
                # or has been converted from Implicit Little Endian,
                # so we default to OB for BitsAllocated 1 or 8
                elem.VR = 'OW' if ds.BitsAllocated > 8 else 'OB'

        # 'US or SS' and dependent on PixelRepresentation
        # (0018,9810) Zero Velocity Pixel Value
        # (0022,1452) Mapped Pixel Value
        # (0028,0104)/(0028,0105) Smallest/Largest Valid Pixel Value
        # (0028,0106)/(0028,0107) Smallest/Largest Image Pixel Value
        # (0028,0108)/(0028,0109) Smallest/Largest Pixel Value in Series
        # (0028,0110)/(0028,0111) Smallest/Largest Image Pixel Value in Plane
        # (0028,0120) Pixel Padding Value
        # (0028,0121) Pixel Padding Range Limit
        # (0028,1101-1103) Red/Green/Blue Palette Color Lookup Table Descriptor
        # (0028,3002) LUT Descriptor
        # (0040,9216)/(0040,9211) Real World Value First/Last Value Mapped
        # (0060,3004)/(0060,3006) Histogram First/Last Bin Value
        elif elem.tag in [
                0x00189810, 0x00221452, 0x00280104, 0x00280105, 0x00280106,
                0x00280107, 0x00280108, 0x00280109, 0x00280110, 0x00280111,
                0x00280120, 0x00280121, 0x00281101, 0x00281102, 0x00281103,
                0x00283002, 0x00409211, 0x00409216, 0x00603004, 0x00603006
        ]:
            # US if PixelRepresentation value is 0x0000, else SS
            #   For references, see the list at
            #   https://github.com/darcymason/pydicom/pull/298
            if ds.PixelRepresentation == 0:
                elem.VR = 'US'
                byte_type = 'H'
            else:
                elem.VR = 'SS'
                byte_type = 'h'
            elem.value = convert_numbers(elem.value, is_little_endian,
                                         byte_type)

        # 'OB or OW' and dependent on WaveformBitsAllocated
        # (5400, 0110) Channel Minimum Value
        # (5400, 0112) Channel Maximum Value
        # (5400, 100A) Waveform Padding Data
        # (5400, 1010) Waveform Data
        elif elem.tag in [0x54000110, 0x54000112, 0x5400100A, 0x54001010]:
            # If WaveformBitsAllocated is > 8 then OW, otherwise may be
            #   OB or OW.
            #   See PS3.3 C.10.9.1.
            if ds.is_implicit_VR:
                elem.VR = 'OW'
            else:
                elem.VR = 'OW' if ds.WaveformBitsAllocated > 8 else 'OB'

        # 'US or OW': 0028,3006 LUTData
        elif elem.tag == 0x00283006:
            # First value in LUT Descriptor is how many values in
            #   LUTData, if there's only one value then must be US
            # As per PS3.3 C.11.1.1.1
            if ds.LUTDescriptor[0] == 1:
                elem.VR = 'US'
                elem.value = convert_numbers(elem.value, is_little_endian, 'H')
            else:
                elem.VR = 'OW'

        # 'OB or OW': 60xx,3000 OverlayData and dependent on Transfer Syntax
        elif (elem.tag.group in range(0x6000, 0x601F, 2)
              and elem.tag.elem == 0x3000):
            # Implicit VR must be OW, explicit VR may be OB or OW
            #   as per PS3.5 Section 8.1.2 and Annex A
            elem.VR = 'OW'

    return elem
Example #19
0
 def test_reading_validation_mode_with_enforce_valid_values(
         self, enforce_valid_values):
     raw = RawDataElement(Tag(0x88880002), None, 4, b'unknown', 0, True,
                          True)
     with pytest.raises(KeyError):
         DataElement_from_raw(raw)
Example #20
0
def read_dataset(fp,
                 is_implicit_VR,
                 is_little_endian,
                 bytelength=None,
                 stop_when=None,
                 defer_size=None,
                 parent_encoding=default_encoding,
                 specific_tags=None):
    """Return a Dataset instance containing the next dataset in the file.

    Parameters
    ----------
    fp : an opened file object
    is_implicit_VR : boolean
        True if file transfer syntax is implicit VR.
    is_little_endian : boolean
        True if file has little endian transfer syntax.
    bytelength : int, None, optional
        None to read until end of file or ItemDeliterTag, else
        a fixed number of bytes to read
    stop_when : None, optional
        optional call_back function which can terminate reading.
        See help for data_element_generator for details
    defer_size : int, None, optional
        Size to avoid loading large elements in memory.
        See ``dcmread`` for more parameter info.
    parent_encoding :
        optional encoding to use as a default in case
        a Specific Character Set (0008,0005) isn't specified
    specific_tags : list or None
        See ``dcmread`` for parameter info.

    Returns
    -------
    a Dataset instance

    See Also
    --------
    pydicom.dataset.Dataset
        A collection (dictionary) of Dicom `DataElement` instances.
    """
    raw_data_elements = dict()
    fpStart = fp.tell()
    de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when, defer_size, parent_encoding,
                                    specific_tags)
    try:
        while (bytelength is None) or (fp.tell() - fpStart < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == BaseTag(0xFFFEE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        # XXX is this error visible enough to user code with just logging?
        logger.error(
            str(details) + " in file " + getattr(fp, "name", "<no filename>"))
    except NotImplementedError as details:
        logger.error(details)

    ds = Dataset(raw_data_elements)
    if 0x00080005 in raw_data_elements:
        char_set = DataElement_from_raw(raw_data_elements[0x00080005])
        encoding = convert_encodings(char_set)
    else:
        encoding = parent_encoding
    ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding)
    return ds
Example #21
0
def read_dataset(fp: BinaryIO,
                 is_implicit_VR: bool,
                 is_little_endian: bool,
                 bytelength: Optional[int] = None,
                 stop_when: Optional[Callable[[BaseTag, Optional[str], int],
                                              bool]] = None,
                 defer_size: Optional[Union[str, int, float]] = None,
                 parent_encoding: Union[
                     str, MutableSequence[str]] = default_encoding,
                 specific_tags: Optional[List[BaseTag]] = None,
                 at_top_level: bool = True) -> Dataset:
    """Return a :class:`~pydicom.dataset.Dataset` instance containing the next
    dataset in the file.

    Parameters
    ----------
    fp : file-like
        An opened file-like object.
    is_implicit_VR : bool
        ``True`` if file transfer syntax is implicit VR.
    is_little_endian : bool
        ``True`` if file has little endian transfer syntax.
    bytelength : int, None, optional
        ``None`` to read until end of file or ItemDeliterTag, else a fixed
        number of bytes to read
    stop_when : None, optional
        Optional call_back function which can terminate reading. See help for
        :func:`data_element_generator` for details
    defer_size : int, str or float, optional
        Size to avoid loading large elements in memory. See :func:`dcmread` for
        more parameter info.
    parent_encoding : str or List[str]
        Optional encoding to use as a default in case (0008,0005) *Specific
        Character Set* isn't specified.
    specific_tags : list of BaseTag, optional
        See :func:`dcmread` for parameter info.
    at_top_level: bool
        If dataset is top level (not within a sequence).
        Used to turn off explicit VR heuristic within sequences

    Returns
    -------
    dataset.Dataset
        A Dataset instance.

    See Also
    --------
    :class:`~pydicom.dataset.Dataset`
        A collection (dictionary) of DICOM
        :class:`~pydicom.dataelem.DataElement` instances.
    """
    raw_data_elements: Dict[BaseTag, Union[RawDataElement, DataElement]] = {}
    fp_start = fp.tell()
    is_implicit_VR = _is_implicit_vr(fp,
                                     is_implicit_VR,
                                     is_little_endian,
                                     stop_when,
                                     is_sequence=not at_top_level)
    fp.seek(fp_start)
    de_gen = data_element_generator(
        fp,
        is_implicit_VR,
        is_little_endian,
        stop_when,
        defer_size,
        parent_encoding,
        specific_tags,
    )
    try:
        while (bytelength is None) or (fp.tell() - fp_start < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == BaseTag(0xFFFEE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        if config.settings.reading_validation_mode == config.RAISE:
            raise
        msg = str(details) + " in file " + getattr(fp, "name", "<no filename>")
        warnings.warn(msg, UserWarning)
    except NotImplementedError as details:
        logger.error(details)

    ds = Dataset(raw_data_elements)

    encoding: Union[str, MutableSequence[str]]
    if 0x00080005 in raw_data_elements:
        elem = cast(RawDataElement, raw_data_elements[BaseTag(0x00080005)])
        char_set = cast(Optional[Union[str, MutableSequence[str]]],
                        DataElement_from_raw(elem).value)
        encoding = convert_encodings(char_set)  # -> List[str]
    else:
        encoding = parent_encoding  # -> Union[str, MutableSequence[str]]

    ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding)
    return ds
Example #22
0
def correct_ambiguous_vr_element(elem, ds, is_little_endian):
    """Attempt to correct the ambiguous VR element `elem`.

    When it's not possible to correct the VR, the element will be returned
    unchanged. Currently the only ambiguous VR elements not corrected for are
    all retired or part of DICONDE.

    If the VR is corrected and is 'US' or 'SS' then the value will be updated
    using the pydicom.values.convert_numbers() method.

    Parameters
    ----------
    elem : pydicom.dataelem.DataElement
        The element with an ambiguous VR.
    ds : pydicom.dataset.Dataset
        The dataset containing `elem`.
    is_little_endian : bool
        The byte ordering of the values in the dataset.

    Returns
    -------
    elem : pydicom.dataelem.DataElement
        The corrected element
    """
    if 'or' in elem.VR:
        # convert raw data elements before handling them
        if elem.is_raw:
            elem = DataElement_from_raw(elem)
            ds.__setitem__(elem.tag, elem)

        # 'OB or OW': 7fe0,0010 PixelData
        if elem.tag == 0x7fe00010:

            try:
                # Compressed Pixel Data
                # PS3.5 Annex A.4
                #   If encapsulated, VR is OB and length is undefined
                if elem.is_undefined_length:
                    elem.VR = 'OB'
                else:
                    # Non-compressed Pixel Data
                    # If BitsAllocated is > 8 then OW, else may be OB or OW
                    #   as per PS3.5 Annex A.2. For BitsAllocated < 8 test the
                    #    size of each pixel to see if its written in OW or OB
                    if ds.BitsAllocated > 8:
                        elem.VR = 'OW'
                    else:
                        nr_pixels = ds.Rows * ds.Columns
                        if 'SamplesPerPixel' in ds:
                            nr_pixels *= ds.SamplesPerPixel
                        pixel_size = len(ds.PixelData) / nr_pixels
                        if pixel_size == 2:
                            elem.VR = 'OW'
                        elif pixel_size == 1:
                            elem.VR = 'OB'
            except AttributeError:
                pass

        # 'US or SS' and dependent on PixelRepresentation
        elif elem.tag in [
                0x00189810, 0x00221452, 0x00280104, 0x00280105, 0x00280106,
                0x00280107, 0x00280108, 0x00280109, 0x00280110, 0x00280111,
                0x00280120, 0x00280121, 0x00281101, 0x00281102, 0x00281103,
                0x00283002, 0x00409211, 0x00409216, 0x00603004, 0x00603006
        ]:
            # US if PixelRepresenation value is 0x0000, else SS
            #   For references, see the list at
            #   https://github.com/darcymason/pydicom/pull/298
            if 'PixelRepresentation' in ds:
                if ds.PixelRepresentation == 0:
                    elem.VR = 'US'
                    byte_type = 'H'
                else:
                    elem.VR = 'SS'
                    byte_type = 'h'
                elem.value = convert_numbers(elem.value, is_little_endian,
                                             byte_type)

        # 'OB or OW' and dependent on WaveformBitsAllocated
        elif elem.tag in [0x54000100, 0x54000112, 0x5400100A, 0x54001010]:
            # If WaveformBitsAllocated is > 8 then OW, otherwise may be
            #   OB or OW, however not sure how to handle this.
            #   See PS3.3 C.10.9.1.
            if 'WaveformBitsAllocated' in ds:
                if ds.WaveformBitsAllocated > 8:
                    elem.VR = 'OW'

        # 'US or OW': 0028,3006 LUTData
        elif elem.tag in [0x00283006]:
            if 'LUTDescriptor' in ds:
                # First value in LUT Descriptor is how many values in
                #   LUTData, if there's only one value then must be US
                # As per PS3.3 C.11.1.1.1
                if ds.LUTDescriptor[0] == 1:
                    elem.VR = 'US'
                    elem.value = convert_numbers(elem.value, is_little_endian,
                                                 'H')
                else:
                    elem.VR = 'OW'

        # 'OB or OW': 60xx,3000 OverlayData and dependent on Transfer Syntax
        elif (elem.tag.group in range(0x6000, 0x601F, 2)
              and elem.tag.elem == 0x3000):
            # Implicit VR must be OW, explicit VR may be OB or OW
            #   as per PS3.5 Section 8.1.2 and Annex A
            if ds.is_implicit_VR:
                elem.VR = 'OW'

    return elem
Example #23
0
    def __getitem__(self, key):
        """Operator for Dataset[key] request.

        Any deferred data elements will be read in and an attempt will be made
        to correct any elements with ambiguous VRs.

        Examples
        --------
        Indexing using DataElement tag
        >>> ds = Dataset()
        >>> ds.SOPInstanceUID = '1.2.3'
        >>> ds.PatientName = 'CITIZEN^Jan'
        >>> ds.PatientID = '12345'
        >>> ds[0x00100010]
        'CITIZEN^Jan'

        Slicing using DataElement tag
        All group 0x0010 elements in the dataset
        >>> ds[0x00100000:0x0011000]
        (0010, 0010) Patient's Name                      PN: 'CITIZEN^Jan'
        (0010, 0020) Patient ID                          LO: '12345'

        All group 0x0002 elements in the dataset
        >>> ds[(0x0002, 0x0000):(0x0003, 0x0000)]

        Parameters
        ----------
        key
            The DICOM (group, element) tag in any form accepted by
            pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010,
            etc. May also be a slice made up of DICOM tags.

        Returns
        -------
        pydicom.dataelem.DataElement or pydicom.dataset.Dataset
            If a single DICOM element tag is used then returns the
            corresponding DataElement. If a slice is used then returns a
            Dataset object containing the corresponding DataElements.
        """
        # If passed a slice, return a Dataset containing the corresponding
        #   DataElements
        if isinstance(key, slice):
            ds = Dataset()
            for tag in self._slice_dataset(key.start, key.stop, key.step):
                ds.add(self[tag])
            return ds

        tag = Tag(key)
        data_elem = dict.__getitem__(self, tag)

        if isinstance(data_elem, DataElement):
            return data_elem
        elif isinstance(data_elem, tuple):
            # If a deferred read, then go get the value now
            if data_elem.value is None:
                from pydicom.filereader import read_deferred_data_element
                data_elem = read_deferred_data_element(self.fileobj_type,
                                                       self.filename,
                                                       self.timestamp,
                                                       data_elem)

            if tag != (0x08, 0x05):
                character_set = self._character_set
            else:
                character_set = default_encoding
            # Not converted from raw form read from file yet; do so now
            self[tag] = DataElement_from_raw(data_elem, character_set)

            # If the Element has an ambiguous VR, try to correct it
            if 'or' in self[tag].VR:
                from pydicom.filewriter import correct_ambiguous_vr_element
                self[tag] = correct_ambiguous_vr_element(
                    self[tag], self, data_elem[6])

        return dict.__getitem__(self, tag)
Example #24
0
 def test_wrong_bytes_length_exception(self, accept_wrong_length):
     """Check exception when number of raw bytes is not correct."""
     raw = RawDataElement(Tag(0x00190000), 'FD', 1, b'1', 0, False, True)
     with pytest.raises(BytesLengthException):
         DataElement_from_raw(raw)
Example #25
0
 def test_unknown_vr(self):
     """Test converting a raw element with unknown VR"""
     raw = RawDataElement(Tag(0x00080000), 'AA', 8, b'20170101', 0, False,
                          True)
     with pytest.raises(NotImplementedError):
         DataElement_from_raw(raw, default_encoding)
Example #26
0
def correct_ambiguous_vr_element(elem, ds, is_little_endian):
    """Attempt to correct the ambiguous VR element `elem`.

    When it's not possible to correct the VR, the element will be returned
    unchanged. Currently the only ambiguous VR elements not corrected for are
    all retired or part of DICONDE.

    If the VR is corrected and is 'US' or 'SS' then the value will be updated
    using the pydicom.values.convert_numbers() method.

    Parameters
    ----------
    elem : pydicom.dataelem.DataElement
        The element with an ambiguous VR.
    ds : pydicom.dataset.Dataset
        The dataset containing `elem`.
    is_little_endian : bool
        The byte ordering of the values in the dataset.

    Returns
    -------
    elem : pydicom.dataelem.DataElement
        The corrected element
    """
    if 'or' in elem.VR:
        # convert raw data elements before handling them
        if elem.is_raw:
            elem = DataElement_from_raw(elem)
            ds.__setitem__(elem.tag, elem)

        # 'OB or OW': 7fe0,0010 PixelData
        if elem.tag == 0x7fe00010:

            try:
                # Compressed Pixel Data
                # PS3.5 Annex A.4
                #   If encapsulated, VR is OB and length is undefined
                if elem.is_undefined_length:
                    elem.VR = 'OB'
                else:
                    # Non-compressed Pixel Data
                    # If BitsAllocated is > 8 then OW, else may be OB or OW
                    #   as per PS3.5 Annex A.2. For BitsAllocated < 8 test the
                    #    size of each pixel to see if its written in OW or OB
                    if ds.BitsAllocated > 8:
                        elem.VR = 'OW'
                    else:
                        nr_pixels = ds.Rows * ds.Columns
                        if 'SamplesPerPixel' in ds:
                            nr_pixels *= ds.SamplesPerPixel
                        pixel_size = len(ds.PixelData) / nr_pixels
                        if pixel_size == 2:
                            elem.VR = 'OW'
                        elif pixel_size == 1:
                            elem.VR = 'OB'
            except AttributeError:
                pass

        # 'US or SS' and dependent on PixelRepresentation
        elif elem.tag in [
                0x00189810, 0x00221452, 0x00280104, 0x00280105, 0x00280106,
                0x00280107, 0x00280108, 0x00280109, 0x00280110, 0x00280111,
                0x00280120, 0x00280121, 0x00281101, 0x00281102, 0x00281103,
                0x00283002, 0x00409211, 0x00409216, 0x00603004, 0x00603006
        ]:
            # US if PixelRepresenation value is 0x0000, else SS
            #   For references, see the list at
            #   https://github.com/darcymason/pydicom/pull/298
            if 'PixelRepresentation' in ds:
                if ds.PixelRepresentation == 0:
                    elem.VR = 'US'
                    byte_type = 'H'
                else:
                    elem.VR = 'SS'
                    byte_type = 'h'
                elem.value = convert_numbers(elem.value, is_little_endian,
                                             byte_type)

        # 'OB or OW' and dependent on WaveformBitsAllocated
        elif elem.tag in [0x54000100, 0x54000112, 0x5400100A, 0x54001010]:
            # If WaveformBitsAllocated is > 8 then OW, otherwise may be
            #   OB or OW, however not sure how to handle this.
            #   See PS3.3 C.10.9.1.
            if 'WaveformBitsAllocated' in ds:
                if ds.WaveformBitsAllocated > 8:
                    elem.VR = 'OW'

        # 'US or OW': 0028,3006 LUTData
        elif elem.tag in [0x00283006]:
            if 'LUTDescriptor' in ds:
                # First value in LUT Descriptor is how many values in
                #   LUTData, if there's only one value then must be US
                # As per PS3.3 C.11.1.1.1
                if ds.LUTDescriptor[0] == 1:
                    elem.VR = 'US'
                    elem.value = convert_numbers(elem.value, is_little_endian,
                                                 'H')
                else:
                    elem.VR = 'OW'

        # 'OB or OW': 60xx,3000 OverlayData and dependent on Transfer Syntax
        elif (elem.tag.group in range(0x6000, 0x601F, 2)
              and elem.tag.elem == 0x3000):
            # Implicit VR must be OW, explicit VR may be OB or OW
            #   as per PS3.5 Section 8.1.2 and Annex A
            if ds.is_implicit_VR:
                elem.VR = 'OW'

    return elem