コード例 #1
0
ファイル: util.py プロジェクト: dcmq/pydcmq
def dataElementToValue(dataElement, datasetencoding, subvalue=None):
    if subvalue != None:
        value = subvalue
    else:
        value = dataElement.value
    if value == '' or value == None:
        return None
    if type(value) in [str, int, float, list]:
        return value
    if type(value) == bytes:
        pythonencoding = convert_encodings([datasetencoding])
        return value.decode(pythonencoding[0])
    if type(value) == MultiValue:
        return [
            dataElementToValue(dataElement, datasetencoding, subvalue)
            for subvalue in value
        ]
    if dataElement.VR == 'PN':
        if len(str(value)) > 0:
            return {'Alphabetic': str(value)}
        return None
    if dataElement.VR == 'DA':
        return [value.isoformat()]
    if dataElement.VR == 'IS':
        return int(value)
    if dataElement.VR == 'UI' or dataElement.VR == 'AT':
        return str(value)
    if dataElement.VR == 'SQ':
        return [datasetToJSON(subvalue, datasetencoding) for subvalue in value]
    return value.original_string
コード例 #2
0
ファイル: filewriter.py プロジェクト: pohlt/pydicom
def write_data_element(fp, data_element, encoding=default_encoding):
    """Write the data_element to file fp according to dicom media storage rules.
    """
    # Write element's tag
    fp.write_tag(data_element.tag)

    # If explicit VR, write the VR
    VR = data_element.VR
    if not fp.is_implicit_VR:
        if len(VR) != 2:
            msg = "Cannot write ambiguous VR of '%s' for data element with tag %r." % (VR, data_element.tag)
            msg += "\nSet the correct VR before writing, or use an implicit VR transfer syntax"
            raise ValueError(msg)
        if not in_py2:
            fp.write(bytes(VR, default_encoding))
        else:
            fp.write(VR)
        if VR in extra_length_VRs:
            fp.write_US(0)   # reserved 2 bytes
    if VR not in writers:
        raise NotImplementedError("write_data_element: unknown Value Representation '{0}'".format(VR))

    length_location = fp.tell()  # save location for later.
    if not fp.is_implicit_VR and VR not in extra_length_VRs:
        fp.write_US(0)  # Explicit VR length field is only 2 bytes
    else:
        fp.write_UL(0xFFFFFFFF)   # will fill in real length value later if not undefined length item

    encoding = convert_encodings(encoding)

    writer_function, writer_param = writers[VR]
    if VR in text_VRs:
        writer_function(fp, data_element, encoding=encoding[1])
    elif VR in ('PN', 'SQ'):
        writer_function(fp, data_element, encoding=encoding)
    else:
        # Many numeric types use the same writer but with numeric format parameter
        if writer_param is not None:
            writer_function(fp, data_element, writer_param)
        else:
            writer_function(fp, data_element)

    #  print DataElement(tag, VR, value)

    is_undefined_length = False
    if hasattr(data_element, "is_undefined_length") and data_element.is_undefined_length:
        is_undefined_length = True
    location = fp.tell()
    fp.seek(length_location)
    if not fp.is_implicit_VR and VR not in extra_length_VRs:
        fp.write_US(location - length_location - 2)  # 2 is length of US
    else:
        # write the proper length of the data_element back in the length slot, unless is SQ with undefined length.
        if not is_undefined_length:
            fp.write_UL(location - length_location - 4)  # 4 is length of UL
    fp.seek(location)  # ready for next data_element
    if is_undefined_length:
        fp.write_tag(SequenceDelimiterTag)
        fp.write_UL(0)  # 4-byte 'length' of delimiter data item
コード例 #3
0
ファイル: filewriter.py プロジェクト: AlfiyaZi/pydicom
def write_data_element(fp, data_element, encoding=default_encoding):
    """Write the data_element to file fp according to dicom media storage rules.
    """
    fp.write_tag(data_element.tag)

    VR = data_element.VR
    if not fp.is_implicit_VR:
        if len(VR) != 2:
            msg = "Cannot write ambiguous VR of '%s' for data element with tag %r." % (VR, data_element.tag)
            msg += "\nSet the correct VR before writing, or use an implicit VR transfer syntax"
            raise ValueError(msg)
        if not in_py2:
            fp.write(bytes(VR, default_encoding))
        else:
            fp.write(VR)
        if VR in extra_length_VRs:
            fp.write_US(0)   # reserved 2 bytes
    if VR not in writers:
        raise NotImplementedError("write_data_element: unknown Value Representation '{0}'".format(VR))

    length_location = fp.tell()  # save location for later.
    if not fp.is_implicit_VR and VR not in ['OB', 'OW', 'OF', 'SQ', 'UT', 'UN']:
        fp.write_US(0)  # Explicit VR length field is only 2 bytes
    else:
        fp.write_UL(0xFFFFFFFF)   # will fill in real length value later if not undefined length item

    encoding = convert_encodings(encoding)

    writer_function, writer_param = writers[VR]
    if VR in text_VRs:
        writer_function(fp, data_element, encoding=encoding[1])
    elif VR in ('PN', 'SQ'):
        writer_function(fp, data_element, encoding=encoding)
    else:
        # Many numeric types use the same writer but with numeric format parameter
        if writer_param is not None:
            writer_function(fp, data_element, writer_param)
        else:
            writer_function(fp, data_element)

    #  print DataElement(tag, VR, value)

    is_undefined_length = False
    if hasattr(data_element, "is_undefined_length") and data_element.is_undefined_length:
        is_undefined_length = True
    location = fp.tell()
    fp.seek(length_location)
    if not fp.is_implicit_VR and VR not in ['OB', 'OW', 'OF', 'SQ', 'UT', 'UN']:
        fp.write_US(location - length_location - 2)  # 2 is length of US
    else:
        # write the proper length of the data_element back in the length slot, unless is SQ with undefined length.
        if not is_undefined_length:
            fp.write_UL(location - length_location - 4)  # 4 is length of UL
    fp.seek(location)  # ready for next data_element
    if is_undefined_length:
        fp.write_tag(SequenceDelimiterTag)
        fp.write_UL(0)  # 4-byte 'length' of delimiter data item
コード例 #4
0
ファイル: dataset.py プロジェクト: parneshr/pydicom
    def _character_set(self):
        char_set = self.get('SpecificCharacterSet', None)

        if not char_set:
            char_set = self._parent_encoding
        else:
            char_set = convert_encodings(char_set)

        return char_set
コード例 #5
0
ファイル: dataset.py プロジェクト: BulatSuleymanov/pydicom
    def _character_set(self):
        char_set = self.get("SpecificCharacterSet", None)

        if not char_set:
            char_set = self._parent_encoding
        else:
            char_set = convert_encodings(char_set)

        return char_set
コード例 #6
0
ファイル: dataset.py プロジェクト: linenlkystr/pydicom
    def _character_set(self):
        """The Dataset's SpecificCharacterSet value (if present)."""
        char_set = self.get('SpecificCharacterSet', None)

        if not char_set:
            char_set = self._parent_encoding
        else:
            char_set = convert_encodings(char_set)

        return char_set
コード例 #7
0
ファイル: dataset.py プロジェクト: jrkerns/pydicom
    def _character_set(self):
        """The Dataset's SpecificCharacterSet value (if present)."""
        char_set = self.get('SpecificCharacterSet', None)

        if not char_set:
            char_set = self._parent_encoding
        else:
            char_set = convert_encodings(char_set)

        return char_set
コード例 #8
0
def read_dataset(fp,
                 is_implicit_VR,
                 is_little_endian,
                 bytelength=None,
                 stop_when=None,
                 defer_size=None,
                 parent_encoding=default_encoding,
                 specific_tags=None):
    """Return a Dataset instance containing the next dataset in the file.

    Parameters
    ----------
    fp : an opened file object
    is_implicit_VR : boolean
        True if file transfer syntax is implicit VR.
    is_little_endian : boolean
        True if file has little endian transfer syntax.
    bytelength : int, None, optional
        None to read until end of file or ItemDeliterTag, else
        a fixed number of bytes to read
    stop_when : None, optional
        optional call_back function which can terminate reading.
        See help for data_element_generator for details
    defer_size : int, None, optional
        Size to avoid loading large elements in memory.
        See ``dcmread`` for more parameter info.
    parent_encoding :
        optional encoding to use as a default in case
        a Specific Character Set (0008,0005) isn't specified
    specific_tags : list or None
        See ``dcmread`` for parameter info.

    Returns
    -------
    a Dataset instance

    See Also
    --------
    pydicom.dataset.Dataset
        A collection (dictionary) of Dicom `DataElement` instances.
    """
    raw_data_elements = dict()
    fpStart = fp.tell()
    de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when, defer_size, parent_encoding,
                                    specific_tags)
    try:
        while (bytelength is None) or (fp.tell() - fpStart < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == BaseTag(0xFFFEE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        # XXX is this error visible enough to user code with just logging?
        logger.error(
            str(details) + " in file " + getattr(fp, "name", "<no filename>"))
    except NotImplementedError as details:
        logger.error(details)

    ds = Dataset(raw_data_elements)
    if 0x00080005 in raw_data_elements:
        char_set = DataElement_from_raw(raw_data_elements[0x00080005])
        encoding = convert_encodings(char_set)
    else:
        encoding = parent_encoding
    ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding)
    return ds
コード例 #9
0
def data_element_generator(fp,
                           is_implicit_VR,
                           is_little_endian,
                           stop_when=None,
                           defer_size=None,
                           encoding=default_encoding,
                           specific_tags=None):
    """Create a generator to efficiently return the raw data elements.

    Parameters
    ----------
    fp : file-like object
    is_implicit_VR : boolean
    is_little_endian : boolean
    stop_when : None, callable, optional
        If None (default), then the whole file is read.
        A callable which takes tag, VR, length,
        and returns True or False. If it returns True,
        read_data_element will just return.
    defer_size : int, str, None, optional
        See ``dcmread`` for parameter info.
    encoding :
        Encoding scheme
    specific_tags : list or None
        See ``dcmread`` for parameter info.

    Returns
    -------
    VR : None if implicit VR, otherwise the VR read from the file
    length :
        the length as in the DICOM data element (could be
        DICOM "undefined length" 0xffffffffL)
    value_bytes :
        the raw bytes from the DICOM file
        (not parsed into python types)
    is_little_endian : boolean
        True if transfer syntax is little endian; else False.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #        The 4-byte length can be FFFFFFFF (undefined length)*
    #
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        element_struct = Struct(endian_chr + "HHL")
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack
    defer_size = size_in_bytes(defer_size)

    tag_set = set()
    has_specific_char_set = True
    if specific_tags is not None:
        for tag in specific_tags:
            if isinstance(tag, (str, compat.text_type)):
                tag = Tag(tag_for_keyword(tag))
            if isinstance(tag, BaseTag):
                tag_set.add(tag)
        has_specific_char_set = Tag(0x08, 0x05) in tag_set
        tag_set.add(Tag(0x08, 0x05))
    has_tag_set = len(tag_set) > 0

    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            return  # at end of file
        if debugging:
            debug_msg = "{0:08x}: {1}".format(fp.tell() - 8,
                                              bytes2hex(bytes_read))

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            if not in_py2:
                VR = VR.decode(default_encoding)
            if VR in extra_length_VRs:
                bytes_read = fp_read(4)
                length = extra_length_unpack(bytes_read)[0]
                if debugging:
                    debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                return

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            # don't defer loading of Specific Character Set value as it is
            # needed immediately to get the character encoding for other tags
            if has_tag_set and tag not in tag_set:
                # skip the tag if not in specific tags
                fp.seek(fp_tell() + length)
                continue

            if (defer_size is not None and length > defer_size
                    and tag != BaseTag(0x00080005)):
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = fp_read(length)
                if debugging:
                    dotdot = "   "
                    if length > 12:
                        dotdot = "..."
                    logger_debug("%08x: %-34s %s %r %s" %
                                 (value_tell, bytes2hex(
                                     value[:12]), dotdot, value[:12], dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == BaseTag(0x00080005):
                from pydicom.values import convert_string
                encoding = convert_string(value,
                                          is_little_endian,
                                          encoding=default_encoding)
                # Store the encoding value in the generator
                # for use with future elements (SQs)
                encoding = convert_encodings(encoding)
                if not has_specific_char_set:
                    continue

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None:
                try:
                    VR = dictionary_VR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items
                    # and is thus a SQ
                    next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4)))
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    msg = "{0:08x}: Reading/parsing undefined length sequence"
                    logger_debug(msg.format(fp_tell()))
                seq = read_sequence(fp, is_implicit_VR, is_little_endian,
                                    length, encoding)
                if has_tag_set and tag not in tag_set:
                    continue
                yield DataElement(tag,
                                  VR,
                                  seq,
                                  value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                    delimiter, defer_size)

                # If the tag is (0008,0005) Specific Character Set,
                # then store it
                if tag == (0x08, 0x05):
                    from pydicom.values import convert_string
                    encoding = convert_string(value,
                                              is_little_endian,
                                              encoding=default_encoding)
                    # Store the encoding value in the generator for use
                    # with future elements (SQs)
                    encoding = convert_encodings(encoding)
                    if not has_specific_char_set:
                        continue

                # tags with undefined length are skipped after read
                if has_tag_set and tag not in tag_set:
                    continue
                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)
コード例 #10
0
def write_data_element(fp, data_element, encodings=None):
    """Write the data_element to file fp according to
    dicom media storage rules.
    """
    # Write element's tag
    fp.write_tag(data_element.tag)

    # If explicit VR, write the VR
    VR = data_element.VR
    if not fp.is_implicit_VR:
        if len(VR) != 2:
            msg = ("Cannot write ambiguous VR of '{}' for data element with "
                   "tag {}.\nSet the correct VR before writing, or use an "
                   "implicit VR transfer syntax".format(
                       VR, repr(data_element.tag)))
            raise ValueError(msg)
        if not in_py2:
            fp.write(bytes(VR, default_encoding))
        else:
            fp.write(VR)
        if VR in extra_length_VRs:
            fp.write_US(0)  # reserved 2 bytes

    # write into a buffer to avoid seeking back which can be expansive
    buffer = DicomBytesIO()
    buffer.is_little_endian = fp.is_little_endian
    buffer.is_implicit_VR = fp.is_implicit_VR

    if data_element.is_raw:
        # raw data element values can be written as they are
        buffer.write(data_element.value)
        is_undefined_length = data_element.length == 0xFFFFFFFF
    else:
        if VR not in writers:
            raise NotImplementedError(
                "write_data_element: unknown Value Representation "
                "'{0}'".format(VR))

        encodings = encodings or [default_encoding]
        encodings = convert_encodings(encodings)
        writer_function, writer_param = writers[VR]
        is_undefined_length = data_element.is_undefined_length
        if VR in text_VRs or VR in ('PN', 'SQ'):
            writer_function(buffer, data_element, encodings=encodings)
        else:
            # Many numeric types use the same writer but with numeric format
            # parameter
            if writer_param is not None:
                writer_function(buffer, data_element, writer_param)
            else:
                writer_function(buffer, data_element)

    # valid pixel data with undefined length shall contain encapsulated
    # data, e.g. sequence items - raise ValueError otherwise (see #238)
    if is_undefined_length and data_element.tag == 0x7fe00010:
        val = data_element.value
        if (fp.is_little_endian and not val.startswith(b'\xfe\xff\x00\xe0')
                or not fp.is_little_endian
                and not val.startswith(b'\xff\xfe\xe0\x00')):
            raise ValueError('Pixel Data with undefined length must '
                             'start with an item tag')

    value_length = buffer.tell()
    if (not fp.is_implicit_VR and VR not in extra_length_VRs
            and not is_undefined_length):
        fp.write_US(value_length)  # Explicit VR length field is only 2 bytes
    else:
        # write the proper length of the data_element in the length slot,
        # unless is SQ with undefined length.
        fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length)

    fp.write(buffer.getvalue())
    if is_undefined_length:
        fp.write_tag(SequenceDelimiterTag)
        fp.write_UL(0)  # 4-byte 'length' of delimiter data item
コード例 #11
0
def read_dataset(fp,
                 is_implicit_VR,
                 is_little_endian,
                 bytelength=None,
                 stop_when=None,
                 defer_size=None,
                 parent_encoding=default_encoding,
                 specific_tags=None,
                 at_top_level=True):
    """Return a :class:`~pydicom.dataset.Dataset` instance containing the next
    dataset in the file.

    Parameters
    ----------
    fp : file-like
        An opened file-like object.
    is_implicit_VR : bool
        ``True`` if file transfer syntax is implicit VR.
    is_little_endian : bool
        ``True`` if file has little endian transfer syntax.
    bytelength : int, None, optional
        ``None`` to read until end of file or ItemDeliterTag, else a fixed
        number of bytes to read
    stop_when : None, optional
        Optional call_back function which can terminate reading. See help for
        :func:`data_element_generator` for details
    defer_size : int, None, optional
        Size to avoid loading large elements in memory. See :func:`dcmread` for
        more parameter info.
    parent_encoding :
        Optional encoding to use as a default in case (0008,0005) *Specific
        Character Set* isn't specified.
    specific_tags : list or None
        See :func:`dcmread` for parameter info.
    at_top_level: bool
        If dataset is top level (not within a sequence).
        Used to turn off explicit VR heuristic within sequences

    Returns
    -------
    dataset.Dataset
        A Dataset instance.

    See Also
    --------
    :class:`~pydicom.dataset.Dataset`
        A collection (dictionary) of DICOM
        :class:`~pydicom.dataelem.DataElement` instances.
    """
    raw_data_elements = dict()
    fp_start = fp.tell()
    if at_top_level:
        is_implicit_VR = _is_implicit_vr(fp, is_implicit_VR, is_little_endian,
                                         stop_when)
    fp.seek(fp_start)
    de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when, defer_size, parent_encoding,
                                    specific_tags)
    try:
        while (bytelength is None) or (fp.tell() - fp_start < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == BaseTag(0xFFFEE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        if config.enforce_valid_values:
            raise
        msg = str(details) + " in file " + getattr(fp, "name", "<no filename>")
        warnings.warn(msg, UserWarning)
    except NotImplementedError as details:
        logger.error(details)

    ds = Dataset(raw_data_elements)
    if 0x00080005 in raw_data_elements:
        char_set = DataElement_from_raw(raw_data_elements[0x00080005])
        encoding = convert_encodings(char_set)
    else:
        encoding = parent_encoding
    ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding)
    return ds
コード例 #12
0
def write_data_element(
        fp: DicomIO,
        elem: Union[DataElement, RawDataElement],
        encodings: Optional[Union[str, List[str]]] = None) -> None:
    """Write the data_element to file fp according to
    dicom media storage rules.
    """
    # Write element's tag
    fp.write_tag(elem.tag)

    # write into a buffer to avoid seeking back which can be expansive
    buffer = DicomBytesIO()
    buffer.is_little_endian = fp.is_little_endian
    buffer.is_implicit_VR = fp.is_implicit_VR

    VR: Optional[str] = elem.VR
    if not fp.is_implicit_VR and VR and len(VR) != 2:
        msg = (f"Cannot write ambiguous VR of '{VR}' for data element with "
               f"tag {repr(elem.tag)}.\nSet the correct VR before "
               f"writing, or use an implicit VR transfer syntax")
        raise ValueError(msg)

    if elem.is_raw:
        elem = cast(RawDataElement, elem)
        # raw data element values can be written as they are
        buffer.write(cast(bytes, elem.value))
        is_undefined_length = elem.length == 0xFFFFFFFF
    else:
        elem = cast(DataElement, elem)
        if VR not in writers:
            raise NotImplementedError(
                f"write_data_element: unknown Value Representation '{VR}'")

        encodings = encodings or [default_encoding]
        encodings = convert_encodings(encodings)
        fn, param = writers[VR]
        is_undefined_length = elem.is_undefined_length
        if not elem.is_empty:
            if VR in text_VRs or VR in ('PN', 'SQ'):
                fn(buffer, elem, encodings=encodings)  # type: ignore[operator]
            else:
                # Many numeric types use the same writer but with
                # numeric format parameter
                if param is not None:
                    fn(buffer, elem, param)  # type: ignore[operator]
                else:
                    fn(buffer, elem)  # type: ignore[operator]

    # valid pixel data with undefined length shall contain encapsulated
    # data, e.g. sequence items - raise ValueError otherwise (see #238)
    if is_undefined_length and elem.tag == 0x7fe00010:
        encap_item = b'\xfe\xff\x00\xe0'
        if not fp.is_little_endian:
            # Non-conformant endianness
            encap_item = b'\xff\xfe\xe0\x00'
        if not cast(bytes, elem.value).startswith(encap_item):
            raise ValueError(
                "(7FE0,0010) Pixel Data has an undefined length indicating "
                "that it's compressed, but the data isn't encapsulated as "
                "required. See pydicom.encaps.encapsulate() for more "
                "information")

    value_length = buffer.tell()
    if (not fp.is_implicit_VR and VR not in extra_length_VRs
            and not is_undefined_length and value_length > 0xffff):
        # see PS 3.5, section 6.2.2 for handling of this case
        msg = (
            f"The value for the data element {elem.tag} exceeds the "
            f"size of 64 kByte and cannot be written in an explicit transfer "
            f"syntax. The data element VR is changed from '{VR}' to 'UN' "
            f"to allow saving the data.")
        warnings.warn(msg)
        VR = 'UN'

    # write the VR for explicit transfer syntax
    if not fp.is_implicit_VR:
        VR = cast(str, VR)
        fp.write(bytes(VR, default_encoding))

        if VR in extra_length_VRs:
            fp.write_US(0)  # reserved 2 bytes

    if (not fp.is_implicit_VR and VR not in extra_length_VRs
            and not is_undefined_length):
        fp.write_US(value_length)  # Explicit VR length field is 2 bytes
    else:
        # write the proper length of the data_element in the length slot,
        # unless is SQ with undefined length.
        fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length)

    fp.write(buffer.getvalue())
    if is_undefined_length:
        fp.write_tag(SequenceDelimiterTag)
        fp.write_UL(0)  # 4-byte 'length' of delimiter data item
コード例 #13
0
ファイル: filewriter.py プロジェクト: jrkerns/pydicom
def write_data_element(fp, data_element, encoding=default_encoding):
    """Write the data_element to file fp according to dicom media storage rules.
    """
    # Write element's tag
    fp.write_tag(data_element.tag)

    # If explicit VR, write the VR
    VR = data_element.VR
    if not fp.is_implicit_VR:
        if len(VR) != 2:
            msg = ("Cannot write ambiguous VR of '{}' for data element with "
                   "tag {}.\nSet the correct VR before writing, or use an "
                   "implicit VR transfer syntax".format(
                       VR, repr(data_element.tag)))
            raise ValueError(msg)
        if not in_py2:
            fp.write(bytes(VR, default_encoding))
        else:
            fp.write(VR)
        if VR in extra_length_VRs:
            fp.write_US(0)  # reserved 2 bytes
    if VR not in writers:
        raise NotImplementedError(
            "write_data_element: unknown Value Representation '{0}'".format(
                VR))

    length_location = fp.tell()  # save location for later.
    if not fp.is_implicit_VR and VR not in extra_length_VRs:
        fp.write_US(0)  # Explicit VR length field is only 2 bytes
    else:
        fp.write_UL(
            0xFFFFFFFF
        )  # will fill in real length value later if not undefined length item

    encoding = convert_encodings(encoding)

    writer_function, writer_param = writers[VR]
    if VR in text_VRs:
        writer_function(fp, data_element, encoding=encoding[1])
    elif VR in ('PN', 'SQ'):
        writer_function(fp, data_element, encoding=encoding)
    else:
        # Many numeric types use the same writer but with numeric format
        # parameter
        if writer_param is not None:
            writer_function(fp, data_element, writer_param)
        else:
            writer_function(fp, data_element)

    #  print DataElement(tag, VR, value)

    is_undefined_length = False
    if (hasattr(data_element, "is_undefined_length")
            and data_element.is_undefined_length):
        is_undefined_length = True
        # valid pixel data with undefined length shall contain encapsulated
        # data, e.g. sequence items - raise ValueError otherwise (see #238)
        if data_element.tag == 0x7fe00010:  # pixel data
            val = data_element.value
            if (fp.is_little_endian and not
                    val.startswith(b'\xfe\xff\x00\xe0') or
                    not fp.is_little_endian and
                    not val.startswith(b'\xff\xfe\xe0\x00')):
                raise ValueError('Pixel Data with undefined length must '
                                 'start with an item tag')
    location = fp.tell()
    fp.seek(length_location)
    if not fp.is_implicit_VR and VR not in extra_length_VRs:
        fp.write_US(location - length_location - 2)  # 2 is length of US
    else:
        # write the proper length of the data_element back in the length slot,
        # unless is SQ with undefined length.
        if not is_undefined_length:
            fp.write_UL(location - length_location - 4)  # 4 is length of UL
    fp.seek(location)  # ready for next data_element
    if is_undefined_length:
        fp.write_tag(SequenceDelimiterTag)
        fp.write_UL(0)  # 4-byte 'length' of delimiter data item
コード例 #14
0
ファイル: filewriter.py プロジェクト: pieper/pydicom
def write_data_element(fp, data_element, encodings=None):
    """Write the data_element to file fp according to
    dicom media storage rules.
    """
    # Write element's tag
    fp.write_tag(data_element.tag)

    # If explicit VR, write the VR
    VR = data_element.VR
    if not fp.is_implicit_VR:
        if len(VR) != 2:
            msg = ("Cannot write ambiguous VR of '{}' for data element with "
                   "tag {}.\nSet the correct VR before writing, or use an "
                   "implicit VR transfer syntax".format(
                       VR, repr(data_element.tag)))
            raise ValueError(msg)
        if not in_py2:
            fp.write(bytes(VR, default_encoding))
        else:
            fp.write(VR)
        if VR in extra_length_VRs:
            fp.write_US(0)  # reserved 2 bytes

    # write into a buffer to avoid seeking back which can be expansive
    buffer = DicomBytesIO()
    buffer.is_little_endian = fp.is_little_endian
    buffer.is_implicit_VR = fp.is_implicit_VR

    if data_element.is_raw:
        # raw data element values can be written as they are
        buffer.write(data_element.value)
        is_undefined_length = data_element.length == 0xFFFFFFFF
    else:
        if VR not in writers:
            raise NotImplementedError(
                "write_data_element: unknown Value Representation "
                "'{0}'".format(VR))

        encodings = encodings or [default_encoding]
        encodings = convert_encodings(encodings)
        writer_function, writer_param = writers[VR]
        is_undefined_length = data_element.is_undefined_length
        if VR in text_VRs or VR in ('PN', 'SQ'):
            writer_function(buffer, data_element, encodings=encodings)
        else:
            # Many numeric types use the same writer but with numeric format
            # parameter
            if writer_param is not None:
                writer_function(buffer, data_element, writer_param)
            else:
                writer_function(buffer, data_element)

    # valid pixel data with undefined length shall contain encapsulated
    # data, e.g. sequence items - raise ValueError otherwise (see #238)
    if is_undefined_length and data_element.tag == 0x7fe00010:
        val = data_element.value
        if (fp.is_little_endian and not
                val.startswith(b'\xfe\xff\x00\xe0') or
                not fp.is_little_endian and
                not val.startswith(b'\xff\xfe\xe0\x00')):
            raise ValueError('Pixel Data with undefined length must '
                             'start with an item tag')

    value_length = buffer.tell()
    if (not fp.is_implicit_VR and VR not in extra_length_VRs and
            not is_undefined_length):
        try:
            fp.write_US(value_length)  # Explicit VR length field is 2 bytes
        except struct.error:
            msg = ('The value for the data element {} exceeds the size '
                   'of 64 kByte and cannot be written in an explicit transfer '
                   'syntax. You can save it using Implicit Little Endian '
                   'transfer syntax, or you have to truncate the value to not '
                   'exceed the maximum size of 64 kByte.'
                   .format(data_element.tag))
            raise ValueError(msg)
    else:
        # write the proper length of the data_element in the length slot,
        # unless is SQ with undefined length.
        fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length)

    fp.write(buffer.getvalue())
    if is_undefined_length:
        fp.write_tag(SequenceDelimiterTag)
        fp.write_UL(0)  # 4-byte 'length' of delimiter data item
コード例 #15
0
ファイル: filereader.py プロジェクト: wroldwiedbwe/pydicom
def data_element_generator(
    fp: BinaryIO,
    is_implicit_VR: bool,
    is_little_endian: bool,
    stop_when: Optional[Callable[[BaseTag, Optional[str], int], bool]] = None,
    defer_size: Optional[Union[int, str, float]] = None,
    encoding: Union[str, MutableSequence[str]] = default_encoding,
    specific_tags: Optional[List[BaseTag]] = None
) -> Iterator[Union[RawDataElement, DataElement]]:
    """Create a generator to efficiently return the raw data elements.

    .. note::

        This function is used internally - usually there is no need to call it
        from user code. To read data from a DICOM file, :func:`dcmread`
        shall be used instead.

    Parameters
    ----------
    fp : file-like
        The file-like to read from.
    is_implicit_VR : bool
        ``True`` if the data is encoded as implicit VR, ``False`` otherwise.
    is_little_endian : bool
        ``True`` if the data is encoded as little endian, ``False`` otherwise.
    stop_when : None, callable, optional
        If ``None`` (default), then the whole file is read. A callable which
        takes tag, VR, length, and returns ``True`` or ``False``. If it
        returns ``True``, ``read_data_element`` will just return.
    defer_size : int, str or float, optional
        See :func:`dcmread` for parameter info.
    encoding : Union[str, MutableSequence[str]]
        Encoding scheme
    specific_tags : list or None
        See :func:`dcmread` for parameter info.

    Yields
    -------
    RawDataElement or DataElement
        Yields DataElement for undefined length UN or SQ, RawDataElement
        otherwise.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #        The 4-byte length can be FFFFFFFF (undefined length)*
    #
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element
    from pydicom.values import convert_string

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"

    # assign implicit VR struct to variable as use later if VR assumed missing
    implicit_VR_struct = Struct(endian_chr + "HHL")
    if is_implicit_VR:
        element_struct = implicit_VR_struct
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack
    defer_size = size_in_bytes(defer_size)

    tag_set = {Tag(tag) for tag in specific_tags} if specific_tags else set()
    has_tag_set = bool(tag_set)
    if has_tag_set:
        tag_set.add(Tag(0x00080005))  # Specific Character Set

    while True:
        # VR: Optional[str]

        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            return  # at end of file

        if debugging:
            debug_msg = f"{fp.tell() - 8:08x}: {bytes2hex(bytes_read)}"

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            # defend against switching to implicit VR, some writer do in SQ's
            # issue 1067, issue 1035

            if not (b'AA' <= VR <= b'ZZ') and config.assume_implicit_vr_switch:
                # invalid VR, must be 2 cap chrs, assume implicit and continue
                VR = None
                group, elem, length = implicit_VR_struct.unpack(bytes_read)
            else:
                VR = VR.decode(default_encoding)
                if VR in extra_length_VRs:
                    bytes_read = fp_read(4)
                    length = extra_length_unpack(bytes_read)[0]
                    if debugging:
                        debug_msg += " " + bytes2hex(bytes_read)

        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                return

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            # don't defer loading of Specific Character Set value as it is
            # needed immediately to get the character encoding for other tags
            if has_tag_set and tag not in tag_set:
                # skip the tag if not in specific tags
                fp.seek(fp_tell() + length)
                continue

            if (defer_size is not None and length > defer_size and
                    tag != BaseTag(0x00080005)):
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = (
                    fp_read(length) if length > 0
                    else cast(
                        Optional[bytes], empty_value_for_VR(VR, raw=True)
                    )
                )
                if debugging:
                    dotdot = "..." if length > 20 else "   "
                    displayed_value = value[:20] if value else b''
                    logger_debug("%08x: %-34s %s %r %s" %
                                 (value_tell, bytes2hex(displayed_value),
                                  dotdot, displayed_value, dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == BaseTag(0x00080005):
                # *Specific Character String* is b'' for empty value
                encoding = convert_string(
                    cast(bytes, value) or b'', is_little_endian
                )
                # Store the encoding value in the generator
                # for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # VR UN with undefined length shall be handled as SQ
            # see PS 3.5, section 6.2.2
            if VR == 'UN':
                VR = 'SQ'
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None or VR == 'UN' and config.replace_un_with_known_vr:
                try:
                    VR = dictionary_VR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items
                    # and is thus a SQ
                    next_tag = _unpack_tag(fp_read(4), endian_chr)
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    logger_debug(
                        f"{fp_tell():08X}: Reading/parsing undefined length "
                        "sequence"
                    )

                seq = read_sequence(fp, is_implicit_VR,
                                    is_little_endian, length, encoding)
                if has_tag_set and tag not in tag_set:
                    continue

                yield DataElement(tag, VR, seq, value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(
                    fp, is_little_endian, delimiter, defer_size
                )

                # tags with undefined length are skipped after read
                if has_tag_set and tag not in tag_set:
                    continue

                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)
コード例 #16
0
ファイル: filereader.py プロジェクト: darcymason/pydicom
def read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None,
                 stop_when=None, defer_size=None,
                 parent_encoding=default_encoding, specific_tags=None):
    """Return a Dataset instance containing the next dataset in the file.

    Parameters
    ----------
    fp : an opened file object
    is_implicit_VR : boolean
        True if file transfer syntax is implicit VR.
    is_little_endian : boolean
        True if file has little endian transfer syntax.
    bytelength : int, None, optional
        None to read until end of file or ItemDeliterTag, else
        a fixed number of bytes to read
    stop_when : None, optional
        optional call_back function which can terminate reading.
        See help for data_element_generator for details
    defer_size : int, None, optional
        Size to avoid loading large elements in memory.
        See ``dcmread`` for more parameter info.
    parent_encoding :
        optional encoding to use as a default in case
        a Specific Character Set (0008,0005) isn't specified
    specific_tags : list or None
        See ``dcmread`` for parameter info.

    Returns
    -------
    a Dataset instance

    See Also
    --------
    pydicom.dataset.Dataset
        A collection (dictionary) of Dicom `DataElement` instances.
    """
    raw_data_elements = dict()
    fp_start = fp.tell()
    is_implicit_VR = _is_implicit_vr(
        fp, is_implicit_VR, is_little_endian, stop_when)
    fp.seek(fp_start)
    de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when, defer_size, parent_encoding,
                                    specific_tags)
    try:
        while (bytelength is None) or (fp.tell() - fp_start < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == BaseTag(0xFFFEE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        # XXX is this error visible enough to user code with just logging?
        logger.error(str(details) + " in file " +
                     getattr(fp, "name", "<no filename>"))
    except NotImplementedError as details:
        logger.error(details)

    ds = Dataset(raw_data_elements)
    if 0x00080005 in raw_data_elements:
        char_set = DataElement_from_raw(raw_data_elements[0x00080005])
        encoding = convert_encodings(char_set)
    else:
        encoding = parent_encoding
    ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding)
    return ds
コード例 #17
0
def read_dataset(fp: BinaryIO,
                 is_implicit_VR: bool,
                 is_little_endian: bool,
                 bytelength: Optional[int] = None,
                 stop_when: Optional[Callable[[BaseTag, Optional[str], int],
                                              bool]] = None,
                 defer_size: Optional[Union[str, int, float]] = None,
                 parent_encoding: Union[
                     str, MutableSequence[str]] = default_encoding,
                 specific_tags: Optional[List[BaseTag]] = None,
                 at_top_level: bool = True) -> Dataset:
    """Return a :class:`~pydicom.dataset.Dataset` instance containing the next
    dataset in the file.

    Parameters
    ----------
    fp : file-like
        An opened file-like object.
    is_implicit_VR : bool
        ``True`` if file transfer syntax is implicit VR.
    is_little_endian : bool
        ``True`` if file has little endian transfer syntax.
    bytelength : int, None, optional
        ``None`` to read until end of file or ItemDeliterTag, else a fixed
        number of bytes to read
    stop_when : None, optional
        Optional call_back function which can terminate reading. See help for
        :func:`data_element_generator` for details
    defer_size : int, str or float, optional
        Size to avoid loading large elements in memory. See :func:`dcmread` for
        more parameter info.
    parent_encoding : str or List[str]
        Optional encoding to use as a default in case (0008,0005) *Specific
        Character Set* isn't specified.
    specific_tags : list of BaseTag, optional
        See :func:`dcmread` for parameter info.
    at_top_level: bool
        If dataset is top level (not within a sequence).
        Used to turn off explicit VR heuristic within sequences

    Returns
    -------
    dataset.Dataset
        A Dataset instance.

    See Also
    --------
    :class:`~pydicom.dataset.Dataset`
        A collection (dictionary) of DICOM
        :class:`~pydicom.dataelem.DataElement` instances.
    """
    raw_data_elements: Dict[BaseTag, Union[RawDataElement, DataElement]] = {}
    fp_start = fp.tell()
    is_implicit_VR = _is_implicit_vr(fp,
                                     is_implicit_VR,
                                     is_little_endian,
                                     stop_when,
                                     is_sequence=not at_top_level)
    fp.seek(fp_start)
    de_gen = data_element_generator(
        fp,
        is_implicit_VR,
        is_little_endian,
        stop_when,
        defer_size,
        parent_encoding,
        specific_tags,
    )
    try:
        while (bytelength is None) or (fp.tell() - fp_start < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == BaseTag(0xFFFEE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        if config.settings.reading_validation_mode == config.RAISE:
            raise
        msg = str(details) + " in file " + getattr(fp, "name", "<no filename>")
        warnings.warn(msg, UserWarning)
    except NotImplementedError as details:
        logger.error(details)

    ds = Dataset(raw_data_elements)

    encoding: Union[str, MutableSequence[str]]
    if 0x00080005 in raw_data_elements:
        elem = cast(RawDataElement, raw_data_elements[BaseTag(0x00080005)])
        char_set = cast(Optional[Union[str, MutableSequence[str]]],
                        DataElement_from_raw(elem).value)
        encoding = convert_encodings(char_set)  # -> List[str]
    else:
        encoding = parent_encoding  # -> Union[str, MutableSequence[str]]

    ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding)
    return ds
コード例 #18
0
def write_data_element(fp, data_element, encodings=None):
    """Write the data_element to file fp according to
    dicom media storage rules.
    """
    # Write element's tag
    fp.write_tag(data_element.tag)

    # write into a buffer to avoid seeking back which can be expansive
    buffer = DicomBytesIO()
    buffer.is_little_endian = fp.is_little_endian
    buffer.is_implicit_VR = fp.is_implicit_VR

    VR = data_element.VR
    if not fp.is_implicit_VR and len(VR) != 2:
        msg = ("Cannot write ambiguous VR of '{}' for data element with "
               "tag {}.\nSet the correct VR before writing, or use an "
               "implicit VR transfer syntax".format(VR,
                                                    repr(data_element.tag)))
        raise ValueError(msg)

    if data_element.is_raw:
        # raw data element values can be written as they are
        buffer.write(data_element.value)
        is_undefined_length = data_element.length == 0xFFFFFFFF
    else:
        if VR not in writers:
            raise NotImplementedError(
                "write_data_element: unknown Value Representation "
                "'{0}'".format(VR))

        encodings = encodings or [default_encoding]
        encodings = convert_encodings(encodings)
        writer_function, writer_param = writers[VR]
        is_undefined_length = data_element.is_undefined_length
        if not data_element.is_empty:
            if VR in text_VRs or VR in ('PN', 'SQ'):
                writer_function(buffer, data_element, encodings=encodings)
            else:
                # Many numeric types use the same writer but with
                # numeric format parameter
                if writer_param is not None:
                    writer_function(buffer, data_element, writer_param)
                else:
                    writer_function(buffer, data_element)

    # valid pixel data with undefined length shall contain encapsulated
    # data, e.g. sequence items - raise ValueError otherwise (see #238)
    if is_undefined_length and data_element.tag == 0x7fe00010:
        encap_item = b'\xfe\xff\x00\xe0'
        if not fp.is_little_endian:
            # Non-conformant endianness
            encap_item = b'\xff\xfe\xe0\x00'
        if not data_element.value.startswith(encap_item):
            raise ValueError(
                "(7FE0,0010) Pixel Data has an undefined length indicating "
                "that it's compressed, but the data isn't encapsulated as "
                "required. See pydicom.encaps.encapsulate() for more "
                "information")

    value_length = buffer.tell()
    if (not fp.is_implicit_VR and VR not in extra_length_VRs
            and not is_undefined_length and value_length > 0xffff):
        # see PS 3.5, section 6.2.2 for handling of this case
        msg = ('The value for the data element {} exceeds the size '
               'of 64 kByte and cannot be written in an explicit transfer '
               'syntax. The data element VR is changed from "{}" to "UN" '
               'to allow saving the data.'.format(data_element.tag, VR))
        warnings.warn(msg)
        VR = 'UN'

    # write the VR for explicit transfer syntax
    if not fp.is_implicit_VR:
        if not in_py2:
            fp.write(bytes(VR, default_encoding))
        else:
            fp.write(VR)
        if VR in extra_length_VRs:
            fp.write_US(0)  # reserved 2 bytes

    if (not fp.is_implicit_VR and VR not in extra_length_VRs
            and not is_undefined_length):
        fp.write_US(value_length)  # Explicit VR length field is 2 bytes
    else:
        # write the proper length of the data_element in the length slot,
        # unless is SQ with undefined length.
        fp.write_UL(0xFFFFFFFF if is_undefined_length else value_length)

    fp.write(buffer.getvalue())
    if is_undefined_length:
        fp.write_tag(SequenceDelimiterTag)
        fp.write_UL(0)  # 4-byte 'length' of delimiter data item
コード例 #19
0
ファイル: filereader.py プロジェクト: kaurousseau/toad
def data_element_generator(fp, is_implicit_VR, is_little_endian,
                           stop_when=None, defer_size=None, encoding=default_encoding):
    """Create a generator to efficiently return the raw data elements.

    Parameters
    ----------
    fp : file-like object
    is_implicit_VR : boolean
    is_little_endian : boolean
    stop_when : None, callable, optional
        If None (default), then the whole file is read.
        A callable which takes tag, VR, length,
        and returns True or False. If it returns True,
        read_data_element will raise StopIteration.
    defer_size : int, str, None, optional
        See ``read_file`` for parameter info.
    encoding :
        Encoding scheme

    Returns
    -------
    VR : None if implicit VR, otherwise the VR read from the file
    length :
        the length as in the DICOM data element (could be
        DICOM "undefined length" 0xffffffffL)
    value_bytes :
        the raw bytes from the DICOM file
        (not parsed into python types)
    is_little_endian : boolean
        True if transfer syntax is little endian; else False.
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #       The 4-byte length can be FFFFFFFF (undefined length)*
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        element_struct = Struct(endian_chr + "HHL")
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = config.debugging
    element_struct_unpack = element_struct.unpack

    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            raise StopIteration  # at end of file
        if debugging:
            debug_msg = "{0:08x}: {1}".format(fp.tell() - 8,
                                              bytes2hex(bytes_read))

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            if not in_py2:
                VR = VR.decode(default_encoding)
            if VR in extra_length_VRs:
                bytes_read = fp_read(4)
                length = extra_length_unpack(bytes_read)[0]
                if debugging:
                    debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFF:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                raise StopIteration

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFF:
            if defer_size is not None and length > defer_size:
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. "
                             "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = fp_read(length)
                if debugging:
                    dotdot = "   "
                    if length > 12:
                        dotdot = "..."
                    logger_debug("%08x: %-34s %s %r %s" % (value_tell,
                                                           bytes2hex(value[:12]), dotdot, value[:12], dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == (0x08, 0x05):
                from pydicom.values import convert_string
                encoding = convert_string(value, is_little_endian, encoding=default_encoding)
                # Store the encoding value in the generator for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                 is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None:
                try:
                    VR = dictionaryVR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items and is thus a SQ
                    next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4)))
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    msg = "{0:08x}: Reading/parsing undefined length sequence"
                    logger_debug(msg.format(fp_tell()))
                seq = read_sequence(fp, is_implicit_VR,
                                    is_little_endian, length, encoding)
                yield DataElement(tag, VR, seq, value_tell,
                                  is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                    delimiter, defer_size)

                # If the tag is (0008,0005) Specific Character Set, then store it
                if tag == (0x08, 0x05):
                    from pydicom.values import convert_string
                    encoding = convert_string(value, is_little_endian, encoding=default_encoding)
                    # Store the encoding value in the generator for use with future elements (SQs)
                    encoding = convert_encodings(encoding)

                yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)
コード例 #20
0
ファイル: filewriter.py プロジェクト: linenlkystr/pydicom
def write_data_element(fp, data_element, encoding=default_encoding):
    """Write the data_element to file fp according to dicom media storage rules.
    """
    # Write element's tag
    fp.write_tag(data_element.tag)

    # If explicit VR, write the VR
    VR = data_element.VR
    if not fp.is_implicit_VR:
        if len(VR) != 2:
            msg = ("Cannot write ambiguous VR of '{}' for data element with "
                   "tag {}.\nSet the correct VR before writing, or use an "
                   "implicit VR transfer syntax".format(
                       VR, repr(data_element.tag)))
            raise ValueError(msg)
        if not in_py2:
            fp.write(bytes(VR, default_encoding))
        else:
            fp.write(VR)
        if VR in extra_length_VRs:
            fp.write_US(0)  # reserved 2 bytes
    if VR not in writers:
        raise NotImplementedError(
            "write_data_element: unknown Value Representation '{0}'".format(
                VR))

    length_location = fp.tell()  # save location for later.
    if not fp.is_implicit_VR and VR not in extra_length_VRs:
        fp.write_US(0)  # Explicit VR length field is only 2 bytes
    else:
        fp.write_UL(
            0xFFFFFFFF
        )  # will fill in real length value later if not undefined length item

    encoding = convert_encodings(encoding)

    writer_function, writer_param = writers[VR]
    if VR in text_VRs:
        writer_function(fp, data_element, encoding=encoding[1])
    elif VR in ('PN', 'SQ'):
        writer_function(fp, data_element, encoding=encoding)
    else:
        # Many numeric types use the same writer but with numeric format
        # parameter
        if writer_param is not None:
            writer_function(fp, data_element, writer_param)
        else:
            writer_function(fp, data_element)

    #  print DataElement(tag, VR, value)

    is_undefined_length = False
    if (hasattr(data_element, "is_undefined_length")
            and data_element.is_undefined_length):
        is_undefined_length = True
        # valid pixel data with undefined length shall contain encapsulated
        # data, e.g. sequence items - raise ValueError otherwise (see #238)
        if data_element.tag == 0x7fe00010:  # pixel data
            val = data_element.value
            if (fp.is_little_endian and not val.startswith(b'\xfe\xff\x00\xe0')
                    or not fp.is_little_endian
                    and not val.startswith(b'\xff\xfe\xe0\x00')):
                raise ValueError('Pixel Data with undefined length must '
                                 'start with an item tag')
    location = fp.tell()
    fp.seek(length_location)
    if not fp.is_implicit_VR and VR not in extra_length_VRs:
        fp.write_US(location - length_location - 2)  # 2 is length of US
    else:
        # write the proper length of the data_element back in the length slot,
        # unless is SQ with undefined length.
        if not is_undefined_length:
            fp.write_UL(location - length_location - 4)  # 4 is length of UL
    fp.seek(location)  # ready for next data_element
    if is_undefined_length:
        fp.write_tag(SequenceDelimiterTag)
        fp.write_UL(0)  # 4-byte 'length' of delimiter data item