Ejemplo n.º 1
0
    def get_element_values(self):
        """Return a the data element value or list of values.

        Returns
        -------
        str or bytes or int or float or dataset_class
        or PersonName or list of any of these types
            The value or value list of the newly created data element.
        """
        from pydicom.dataelem import empty_value_for_VR
        if self.value_key == 'Value':
            if not isinstance(self.value, list):
                fmt = '"{}" of data element "{}" must be a list.'
                raise TypeError(fmt.format(self.value_key, self.tag))
            if not self.value:
                return empty_value_for_VR(self.vr)
            element_value = [
                self.get_regular_element_value(v) for v in self.value
            ]
            if len(element_value) == 1 and self.vr != 'SQ':
                element_value = element_value[0]
            return convert_to_python_number(element_value, self.vr)

        # The value for "InlineBinary" shall be encoded as a base64 encoded
        # string, as shown in PS3.18, Table F.3.1-1, but the example in
        # PS3.18, Annex F.4 shows the string enclosed in a list.
        # We support both variants, as the standard is ambiguous here,
        # and do the same for "BulkDataURI".
        value = self.value
        if isinstance(value, list):
            value = value[0]

        if self.value_key == 'InlineBinary':
            if not isinstance(value, (str, bytes)):
                fmt = '"{}" of data element "{}" must be a bytes-like object.'
                raise TypeError(fmt.format(self.value_key, self.tag))
            return base64.b64decode(value)

        if self.value_key == 'BulkDataURI':
            if not isinstance(value, str):
                fmt = '"{}" of data element "{}" must be a string.'
                raise TypeError(fmt.format(self.value_key, self.tag))
            if self.bulk_data_uri_handler is None:
                warnings.warn(
                    'no bulk data URI handler provided for retrieval '
                    'of value of data element "{}"'.format(self.tag))
                return empty_value_for_VR(self.vr, raw=True)
            return self.bulk_data_uri_handler(value)
        return empty_value_for_VR(self.vr)
Ejemplo n.º 2
0
def convert_to_python_number(value: Any, vr: str) -> Any:
    """When possible convert numeric-like values to either ints or floats
    based on their value representation.

    .. versionadded:: 1.4

    Parameters
    ----------
    value : Any
        Value of the data element.
    vr : str
        Value representation of the data element.

    Returns
    -------
    Any

        * If `value` is empty then returns the `value` unchanged.
        * If `vr` is an integer-like VR type then returns ``int`` or
          ``List[int]``
        * If `vr` is a float-like VR type then returns ``float`` or
          ``List[float]``
        * Otherwise returns `value` unchanged

    """
    from pydicom.dataelem import empty_value_for_VR

    if value is None or "":
        return value

    number_type: Optional[Union[Type[int], Type[float]]] = None
    if vr in (INT_VR - {VR.AT}) | {VR.US_SS}:
        number_type = int
    if vr in FLOAT_VR:
        number_type = float

    if number_type is None:
        return value

    if isinstance(value, (list, tuple)):
        return [
            number_type(v) if v is not None else empty_value_for_VR(vr)
            for v in value
        ]

    return number_type(value)
Ejemplo n.º 3
0
    def get_sequence_item(self, value: SQValueType) -> "Dataset":
        """Return a sequence item for the JSON dict `value`.

        Parameters
        ----------
        value : dict or None
            The sequence item from the JSON entry.

        Returns
        -------
        dataset_class
            The decoded dataset item.

        Raises
        ------
        KeyError
            If the "vr" key is missing for a contained element
        """
        from pydicom import DataElement
        from pydicom.dataelem import empty_value_for_VR

        ds = self.dataset_class()

        value = {} if value is None else value
        for key, val in value.items():
            if 'vr' not in val:
                raise KeyError(f"Data element '{self.tag}' must have key 'vr'")

            vr = val['vr']
            unique_value_keys = tuple(set(val.keys()) & set(JSON_VALUE_KEYS))

            if not unique_value_keys:
                # data element with no value
                elem = DataElement(tag=int(key, 16),
                                   value=empty_value_for_VR(vr),
                                   VR=vr)
            else:
                value_key = unique_value_keys[0]
                elem = DataElement.from_json(self.dataset_class, key, vr,
                                             val[value_key], value_key,
                                             self.bulk_data_element_handler)
            ds.add(elem)

        return ds
Ejemplo n.º 4
0
    def get_sequence_item(self, value):
        """Return a sequence item for the JSON dict `value`.

        Parameters
        ----------
        value : dict or None
            The sequence item from the JSON entry.

        Returns
        -------
        dataset_class
            The decoded dataset item.

        Raises
        ------
        KeyError
            If the "vr" key is missing for a contained element
        """
        ds = self.dataset_class()
        if value:
            for key, val in value.items():
                if 'vr' not in val:
                    fmt = 'Data element "{}" must have key "vr".'
                    raise KeyError(fmt.format(self.tag))
                vr = val['vr']
                unique_value_keys = tuple(
                    set(val.keys()) & set(JSON_VALUE_KEYS))
                from pydicom import DataElement
                from pydicom.dataelem import empty_value_for_VR
                if not unique_value_keys:
                    # data element with no value
                    elem = DataElement(tag=int(key, 16),
                                       value=empty_value_for_VR(vr),
                                       VR=vr)
                else:
                    value_key = unique_value_keys[0]
                    elem = DataElement.from_json(self.dataset_class, key, vr,
                                                 val[value_key], value_key)
                ds.add(elem)
        return ds
Ejemplo n.º 5
0
    def get_regular_element_value(self, value: ValueType) -> Any:
        """Return a the data element value created from a json "Value" entry.

        Parameters
        ----------
        value : None, str, int, float or dict
            The data element's value from the json entry.

        Returns
        -------
        None, str, int, float or Dataset
            A single value of the corresponding :class:`DataElement`.
        """
        from pydicom.dataelem import empty_value_for_VR

        # Table F.2.3-1 has JSON type mappings
        if self.vr == VR.SQ:  # Dataset
            # May be an empty dict
            value = cast(Dict[str, Any], value)
            return self.get_sequence_item(value)

        if value is None:
            return empty_value_for_VR(self.vr)

        if self.vr == VR.PN:  # str
            value = cast(Dict[str, str], value)
            return self.get_pn_element_value(value)

        if self.vr == VR.AT:  # Optional[int]
            # May be an empty str
            value = cast(str, value)
            try:
                return int(value, 16)
            except ValueError:
                warnings.warn(
                    f"Invalid value '{value}' for AT element - ignoring it")

            return None

        return value
Ejemplo n.º 6
0
def convert_value(VR, raw_data_element, encodings=None):
    """Return encoded element value using the appropriate decoder.

    Parameters
    ----------
    raw_data_element : bytes or str
        The encoded element value.
    encodings : list of str, optional
        A list of the character encoding schemes used to encode any text
        elements.

    Returns
    -------
    type or list of type
        The element value decoded using the appropriate decoder.
    """
    if VR not in converters:
        # `VR` characters are in the ascii alphabet ranges 65 - 90, 97 - 122
        char_range = list(range(65, 91)) + list(range(97, 123))
        # If the VR characters are outside that range then print hex values
        if ord(VR[0]) not in char_range or ord(VR[1]) not in char_range:
            VR = ' '.join(['0x{:02x}'.format(ord(ch)) for ch in VR])
        message = "Unknown Value Representation '{}'".format(VR)
        raise NotImplementedError(message)

    if raw_data_element.length == 0:
        return empty_value_for_VR(VR)

    # Look up the function to convert that VR
    # Dispatch two cases: a plain converter,
    # or a number one which needs a format string
    if isinstance(converters[VR], tuple):
        converter, num_format = converters[VR]
    else:
        converter = converters[VR]
        num_format = None

    # Ensure that encodings is a list
    encodings = encodings or [default_encoding]
    if isinstance(encodings, compat.string_types):
        encodings = [encodings]

    byte_string = raw_data_element.value
    is_little_endian = raw_data_element.is_little_endian
    is_implicit_VR = raw_data_element.is_implicit_VR

    # Not only two cases. Also need extra info if is a raw sequence
    # Pass all encodings to the converter if needed
    try:
        if VR in text_VRs or VR == 'PN':
            value = converter(byte_string, encodings=encodings)
        elif VR != "SQ":
            value = converter(byte_string, is_little_endian, num_format)
        else:
            value = convert_SQ(byte_string, is_implicit_VR, is_little_endian,
                               encodings, raw_data_element.value_tell)
    except ValueError:
        if config.enforce_valid_values:
            # The user really wants an exception here
            raise
        logger.debug('unable to translate tag %s with VR %s' %
                     (raw_data_element.tag, VR))

        for vr in convert_retry_VR_order:
            if vr == VR:
                continue
            try:
                value = convert_value(vr, raw_data_element, encodings)
                logger.debug('converted value for tag %s with VR %s' %
                             (raw_data_element.tag, vr))
                break
            except Exception:
                pass
        else:
            logger.debug('Could not convert value for tag %s with any VR '
                         'in the convert_retry_VR_order list' %
                         raw_data_element.tag)
            value = raw_data_element.value
    return value
Ejemplo n.º 7
0
def data_element_generator(fp,
                           is_implicit_VR,
                           is_little_endian,
                           stop_when=None,
                           defer_size=None,
                           encoding=default_encoding,
                           specific_tags=None):
    """Create a generator to efficiently return the raw data elements.

    .. note::

        This function is used internally - usually there is no need to call it
        from user code. To read data from a DICOM file, :func:`dcmread`
        shall be used instead.

    Parameters
    ----------
    fp : file-like
        The file-like to read from.
    is_implicit_VR : bool
        ``True`` if the data is encoded as implicit VR, ``False`` otherwise.
    is_little_endian : bool
        ``True`` if the data is encoded as little endian, ``False`` otherwise.
    stop_when : None, callable, optional
        If ``None`` (default), then the whole file is read. A callable which
        takes tag, VR, length, and returns ``True`` or ``False``. If it
        returns ``True``, ``read_data_element`` will just return.
    defer_size : int, str, None, optional
        See :func:`dcmread` for parameter info.
    encoding :
        Encoding scheme
    specific_tags : list or None
        See :func:`dcmread` for parameter info.

    Returns
    -------
    VR : str or None
        ``None`` if implicit VR, otherwise the VR read from the file.
    length : int
        The length of the DICOM data element (could be DICOM "undefined
        length" ``0xFFFFFFFFL``)
    value_bytes : bytes or str
        The raw bytes from the DICOM file (not parsed into Python types)
    is_little_endian : bool
        ``True`` if transfer syntax is little endian; else ``False``.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #        The 4-byte length can be FFFFFFFF (undefined length)*
    #
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        element_struct = Struct(endian_chr + "HHL")
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack
    defer_size = size_in_bytes(defer_size)

    tag_set = set()
    if specific_tags is not None:
        for tag in specific_tags:
            if isinstance(tag, str):
                tag = Tag(tag_for_keyword(tag))
            if isinstance(tag, BaseTag):
                tag_set.add(tag)
        tag_set.add(Tag(0x08, 0x05))
    has_tag_set = len(tag_set) > 0

    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            return  # at end of file
        if debugging:
            debug_msg = "{0:08x}: {1}".format(fp.tell() - 8,
                                              bytes2hex(bytes_read))

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            VR = VR.decode(default_encoding)
            if VR in extra_length_VRs:
                bytes_read = fp_read(4)
                length = extra_length_unpack(bytes_read)[0]
                if debugging:
                    debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                return

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            # don't defer loading of Specific Character Set value as it is
            # needed immediately to get the character encoding for other tags
            if has_tag_set and tag not in tag_set:
                # skip the tag if not in specific tags
                fp.seek(fp_tell() + length)
                continue

            if (defer_size is not None and length > defer_size
                    and tag != BaseTag(0x00080005)):
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = (fp_read(length)
                         if length > 0 else empty_value_for_VR(VR, raw=True))
                if debugging:
                    dotdot = "..." if length > 12 else "   "
                    displayed_value = value[:12] if value else b''
                    logger_debug("%08x: %-34s %s %r %s" %
                                 (value_tell, bytes2hex(displayed_value),
                                  dotdot, displayed_value, dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == BaseTag(0x00080005):
                from pydicom.values import convert_string
                encoding = convert_string(value or b'', is_little_endian)
                # Store the encoding value in the generator
                # for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None:
                try:
                    VR = dictionary_VR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items
                    # and is thus a SQ
                    next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4)))
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    msg = "{0:08x}: Reading/parsing undefined length sequence"
                    logger_debug(msg.format(fp_tell()))
                seq = read_sequence(fp, is_implicit_VR, is_little_endian,
                                    length, encoding)
                if has_tag_set and tag not in tag_set:
                    continue
                yield DataElement(tag,
                                  VR,
                                  seq,
                                  value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                    delimiter, defer_size)

                # tags with undefined length are skipped after read
                if has_tag_set and tag not in tag_set:
                    continue
                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)
Ejemplo n.º 8
0
def data_element_generator(
    fp: BinaryIO,
    is_implicit_VR: bool,
    is_little_endian: bool,
    stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]] = None,
    defer_size: Optional[Union[int, str, float]] = None,
    encoding: Union[str, MutableSequence[str]] = default_encoding,
    specific_tags: Optional[List[BaseTag]] = None
) -> Iterator[Union[RawDataElement, DataElement]]:
    """Create a generator to efficiently return the raw data elements.

    .. note::

        This function is used internally - usually there is no need to call it
        from user code. To read data from a DICOM file, :func:`dcmread`
        shall be used instead.

    Parameters
    ----------
    fp : file-like
        The file-like to read from.
    is_implicit_VR : bool
        ``True`` if the data is encoded as implicit VR, ``False`` otherwise.
    is_little_endian : bool
        ``True`` if the data is encoded as little endian, ``False`` otherwise.
    stop_when : None, callable, optional
        If ``None`` (default), then the whole file is read. A callable which
        takes tag, VR, length, and returns ``True`` or ``False``. If it
        returns ``True``, ``read_data_element`` will just return.
    defer_size : int, str or float, optional
        See :func:`dcmread` for parameter info.
    encoding : Union[str, MutableSequence[str]]
        Encoding scheme
    specific_tags : list or None
        See :func:`dcmread` for parameter info.

    Yields
    -------
    RawDataElement or DataElement
        Yields DataElement for undefined length UN or SQ, RawDataElement
        otherwise.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #        The 4-byte length can be FFFFFFFF (undefined length)*
    #
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element
    from pydicom.values import convert_string

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"

    # assign implicit VR struct to variable as use later if VR assumed missing
    implicit_VR_struct = Struct(endian_chr + "HHL")
    if is_implicit_VR:
        element_struct = implicit_VR_struct
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack
    defer_size = size_in_bytes(defer_size)

    tag_set = {Tag(tag) for tag in specific_tags} if specific_tags else set()
    has_tag_set = bool(tag_set)
    if has_tag_set:
        tag_set.add(Tag(0x00080005))  # Specific Character Set

    while True:
        # VR: Optional[str]

        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            return  # at end of file

        if debugging:
            debug_msg = f"{fp.tell() - 8:08x}: {bytes2hex(bytes_read)}"

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            # defend against switching to implicit VR, some writer do in SQ's
            # issue 1067, issue 1035

            if not (b'AA' <= VR <= b'ZZ') and config.assume_implicit_vr_switch:
                # invalid VR, must be 2 cap chrs, assume implicit and continue
                VR = None
                group, elem, length = implicit_VR_struct.unpack(bytes_read)
            else:
                VR = VR.decode(default_encoding)
                if VR in extra_length_VRs:
                    bytes_read = fp_read(4)
                    length = extra_length_unpack(bytes_read)[0]
                    if debugging:
                        debug_msg += " " + bytes2hex(bytes_read)

        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                return

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            # don't defer loading of Specific Character Set value as it is
            # needed immediately to get the character encoding for other tags
            if has_tag_set and tag not in tag_set:
                # skip the tag if not in specific tags
                fp.seek(fp_tell() + length)
                continue

            if (defer_size is not None and length > defer_size and
                    tag != BaseTag(0x00080005)):
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = (
                    fp_read(length) if length > 0
                    else cast(
                        Optional[bytes], empty_value_for_VR(VR, raw=True)
                    )
                )
                if debugging:
                    dotdot = "..." if length > 20 else "   "
                    displayed_value = value[:20] if value else b''
                    logger_debug("%08x: %-34s %s %r %s" %
                                 (value_tell, bytes2hex(displayed_value),
                                  dotdot, displayed_value, dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == BaseTag(0x00080005):
                # *Specific Character String* is b'' for empty value
                encoding = convert_string(
                    cast(bytes, value) or b'', is_little_endian
                )
                # Store the encoding value in the generator
                # for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # VR UN with undefined length shall be handled as SQ
            # see PS 3.5, section 6.2.2
            if VR == 'UN':
                VR = 'SQ'
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None or VR == 'UN' and config.replace_un_with_known_vr:
                try:
                    VR = dictionary_VR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items
                    # and is thus a SQ
                    next_tag = _unpack_tag(fp_read(4), endian_chr)
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    logger_debug(
                        f"{fp_tell():08X}: Reading/parsing undefined length "
                        "sequence"
                    )

                seq = read_sequence(fp, is_implicit_VR,
                                    is_little_endian, length, encoding)
                if has_tag_set and tag not in tag_set:
                    continue

                yield DataElement(tag, VR, seq, value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(
                    fp, is_little_endian, delimiter, defer_size
                )

                # tags with undefined length are skipped after read
                if has_tag_set and tag not in tag_set:
                    continue

                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)
Ejemplo n.º 9
0
def convert_value(
    VR: str,
    raw_data_element: RawDataElement,
    encodings: Optional[Union[str, MutableSequence[str]]] = None
) -> Union[Any, MutableSequence[Any]]:
    """Return the element value decoded using the appropriate decoder.

    Parameters
    ----------
    VR : str
        The element's VR.
    raw_data_element : pydicom.dataelem.RawDataElement
        The encoded element value.
    encodings : list of str, optional
        A list of the character encoding schemes used to encode any text
        elements.

    Returns
    -------
    type or MultiValue of type
        The element value decoded using the appropriate decoder.
    """

    if VR not in converters:
        # `VR` characters are in the ascii alphabet ranges 65 - 90, 97 - 122
        char_range = list(range(65, 91)) + list(range(97, 123))
        # If the VR characters are outside that range then print hex values
        if ord(VR[0]) not in char_range or ord(VR[1]) not in char_range:
            VR = ' '.join(['0x{:02x}'.format(ord(ch)) for ch in VR])
        raise NotImplementedError(f"Unknown Value Representation '{VR}'")

    if raw_data_element.length == 0:
        return empty_value_for_VR(VR)

    # Look up the function to convert that VR
    # Dispatch two cases: a plain converter,
    # or a number one which needs a format string
    if isinstance(converters[VR], tuple):
        converter, num_format = cast(tuple, converters[VR])
    else:
        converter = converters[VR]
        num_format = None

    # Ensure that encodings is a list
    encodings = encodings or [default_encoding]
    if isinstance(encodings, str):
        encodings = [encodings]

    byte_string = raw_data_element.value
    is_little_endian = raw_data_element.is_little_endian
    is_implicit_VR = raw_data_element.is_implicit_VR

    # Not only two cases. Also need extra info if is a raw sequence
    # Pass all encodings to the converter if needed
    try:
        if VR in text_VRs or VR == 'PN':
            # SH, LO, ST, LT, UC, UT
            return converter(byte_string, encodings)

        if VR != "SQ":
            return converter(byte_string, is_little_endian, num_format)

        # SQ
        return converter(
            byte_string,
            is_implicit_VR,
            is_little_endian,
            encodings,
            raw_data_element.value_tell
        )
    except ValueError:
        if config.enforce_valid_values:
            # The user really wants an exception here
            raise

    logger.debug(
        f"Unable to convert tag {raw_data_element.tag} with VR {VR} using "
        "the standard value converter"
    )
    for vr in [val for val in convert_retry_VR_order if val != VR]:
        try:
            return convert_value(vr, raw_data_element, encodings)
        except Exception:
            pass

    logger.debug(
        f"Could not convert value for tag {raw_data_element.tag} with "
        "any VR in the 'convert_retry_VR_order' list"
    )
    return raw_data_element.value
Ejemplo n.º 10
0
    def get_element_values(self) -> Any:
        """Return a the data element value or list of values.

        Returns
        -------
        None, str, float, int, bytes, dataset_class or a list of these
            The value or value list of the newly created data element.
        """
        from pydicom.dataelem import empty_value_for_VR

        # An attribute with an empty value should have no "Value",
        #   "BulkDataURI" or "InlineBinary"
        if self.value_key is None:
            return empty_value_for_VR(self.vr)

        if self.value_key == 'Value':
            if not isinstance(self.value, list):
                raise TypeError(
                    f"'{self.value_key}' of data element '{self.tag}' must "
                    "be a list")

            if not self.value:
                return empty_value_for_VR(self.vr)

            val = cast(List[ValueType], self.value)
            element_value = [self.get_regular_element_value(v) for v in val]
            if len(element_value) == 1 and self.vr != VR.SQ:
                element_value = element_value[0]

            return convert_to_python_number(element_value, self.vr)

        # The value for "InlineBinary" shall be encoded as a base64 encoded
        # string, as shown in PS3.18, Table F.3.1-1, but the example in
        # PS3.18, Annex F.4 shows the string enclosed in a list.
        # We support both variants, as the standard is ambiguous here,
        # and do the same for "BulkDataURI".
        value = cast(Union[str, List[str]], self.value)
        if isinstance(value, list):
            value = value[0]

        if self.value_key == 'InlineBinary':
            # The `value` should be a base64 encoded str
            if not isinstance(value, str):
                raise TypeError(
                    f"Invalid attribute value for data element '{self.tag}' - "
                    "the value for 'InlineBinary' must be str, not "
                    f"{type(value).__name__}")

            return base64.b64decode(value)  # bytes

        if self.value_key == 'BulkDataURI':
            # The `value` should be a URI as a str
            if not isinstance(value, str):
                raise TypeError(
                    f"Invalid attribute value for data element '{self.tag}' - "
                    "the value for 'BulkDataURI' must be str, not "
                    f"{type(value).__name__}")

            if self.bulk_data_element_handler is None:
                warnings.warn(
                    'No bulk data URI handler provided for retrieval '
                    f'of value of data element "{self.tag}"')
                return empty_value_for_VR(self.vr)

            return self.bulk_data_element_handler(self.tag, self.vr, value)

        raise ValueError(
            f"Unknown attribute name '{self.value_key}' for tag {self.tag}")