Example #1
0
def read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None,
                 stop_when=None, defer_size=None,
                 parent_encoding=default_encoding, specific_tags=None):
    """Return a Dataset instance containing the next dataset in the file.

    Parameters
    ----------
    fp : an opened file object
    is_implicit_VR : boolean
        True if file transfer syntax is implicit VR.
    is_little_endian : boolean
        True if file has little endian transfer syntax.
    bytelength : int, None, optional
        None to read until end of file or ItemDeliterTag, else
        a fixed number of bytes to read
    stop_when : None, optional
        optional call_back function which can terminate reading.
        See help for data_element_generator for details
    defer_size : int, None, optional
        Size to avoid loading large elements in memory.
        See ``dcmread`` for more parameter info.
    parent_encoding :
        optional encoding to use as a default in case
        a Specific Character Set (0008,0005) isn't specified
    specific_tags : list or None
        See ``dcmread`` for parameter info.

    Returns
    -------
    a Dataset instance

    See Also
    --------
    pydicom.dataset.Dataset
        A collection (dictionary) of Dicom `DataElement` instances.
    """
    raw_data_elements = dict()
    fpStart = fp.tell()
    de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when, defer_size, parent_encoding,
                                    specific_tags)
    try:
        while (bytelength is None) or (fp.tell() - fpStart < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == (0xFFFE, 0xE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        # XXX is this error visible enough to user code with just logging?
        logger.error(str(details) + " in file " +
                     getattr(fp, "name", "<no filename>"))
    except NotImplementedError as details:
        logger.error(details)

    return Dataset(raw_data_elements)
Example #2
0
def read_dataset(fp, is_implicit_VR, is_little_endian, bytelength=None,
                 stop_when=None, defer_size=None,
                 parent_encoding=default_encoding, specific_tags=None):
    """Return a Dataset instance containing the next dataset in the file.

    Parameters
    ----------
    fp : an opened file object
    is_implicit_VR : boolean
        True if file transfer syntax is implicit VR.
    is_little_endian : boolean
        True if file has little endian transfer syntax.
    bytelength : int, None, optional
        None to read until end of file or ItemDeliterTag, else
        a fixed number of bytes to read
    stop_when : None, optional
        optional call_back function which can terminate reading.
        See help for data_element_generator for details
    defer_size : int, None, optional
        Size to avoid loading large elements in memory.
        See ``read_file`` for more parameter info.
    parent_encoding :
        optional encoding to use as a default in case
        a Specific Character Set (0008,0005) isn't specified
    specific_tags : list or None
        See ``read_file`` for parameter info.

    Returns
    -------
    a Dataset instance

    See Also
    --------
    pydicom.dataset.Dataset
        A collection (dictionary) of Dicom `DataElement` instances.
    """
    raw_data_elements = dict()
    fpStart = fp.tell()
    de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when, defer_size, parent_encoding,
                                    specific_tags)
    try:
        while (bytelength is None) or (fp.tell() - fpStart < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == (0xFFFE, 0xE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        # XXX is this error visible enough to user code with just logging?
        logger.error(str(details) + " in file " +
                     getattr(fp, "name", "<no filename>"))
    except NotImplementedError as details:
        logger.error(details)

    return Dataset(raw_data_elements)
Example #3
0
def read_dataset(fp,
                 is_implicit_VR,
                 is_little_endian,
                 bytelength=None,
                 stop_when=None,
                 defer_size=None,
                 parent_encoding=default_encoding,
                 specific_tags=None,
                 at_top_level=True):
    """Return a :class:`~pydicom.dataset.Dataset` instance containing the next
    dataset in the file.

    Parameters
    ----------
    fp : file-like
        An opened file-like object.
    is_implicit_VR : bool
        ``True`` if file transfer syntax is implicit VR.
    is_little_endian : bool
        ``True`` if file has little endian transfer syntax.
    bytelength : int, None, optional
        ``None`` to read until end of file or ItemDeliterTag, else a fixed
        number of bytes to read
    stop_when : None, optional
        Optional call_back function which can terminate reading. See help for
        :func:`data_element_generator` for details
    defer_size : int, None, optional
        Size to avoid loading large elements in memory. See :func:`dcmread` for
        more parameter info.
    parent_encoding :
        Optional encoding to use as a default in case (0008,0005) *Specific
        Character Set* isn't specified.
    specific_tags : list or None
        See :func:`dcmread` for parameter info.
    at_top_level: bool
        If dataset is top level (not within a sequence).
        Used to turn off explicit VR heuristic within sequences

    Returns
    -------
    dataset.Dataset
        A Dataset instance.

    See Also
    --------
    :class:`~pydicom.dataset.Dataset`
        A collection (dictionary) of DICOM
        :class:`~pydicom.dataelem.DataElement` instances.
    """
    raw_data_elements = dict()
    fp_start = fp.tell()
    if at_top_level:
        is_implicit_VR = _is_implicit_vr(fp, is_implicit_VR, is_little_endian,
                                         stop_when)
    fp.seek(fp_start)
    de_gen = data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when, defer_size, parent_encoding,
                                    specific_tags)
    try:
        while (bytelength is None) or (fp.tell() - fp_start < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == BaseTag(0xFFFEE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        if config.enforce_valid_values:
            raise
        msg = str(details) + " in file " + getattr(fp, "name", "<no filename>")
        warnings.warn(msg, UserWarning)
    except NotImplementedError as details:
        logger.error(details)

    ds = Dataset(raw_data_elements)
    if 0x00080005 in raw_data_elements:
        char_set = DataElement_from_raw(raw_data_elements[0x00080005])
        encoding = convert_encodings(char_set)
    else:
        encoding = parent_encoding
    ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding)
    return ds
Example #4
0
def read_undefined_length_value(fp,
                                is_little_endian,
                                delimiter_tag,
                                defer_size=None,
                                read_size=1024 * 8):
    """Read until the delimiter tag found and return the value;
     ignore the delimiter.

    On completion, the file will be set to the first byte after the delimiter
    and its following four zero bytes.

    Parameters
    ----------
    fp : a file-like object
    is_little_endian : boolean
        True if file transfer syntax is little endian, else False.
    delimiter_tag : BaseTag
        tag used as and marker for reading
    defer_size : int, None, optional
        Size to avoid loading large elements in memory.
        See ``filereader.dcmread`` for more parameter info.
    read_size : int
        Number of bytes to read at one time.

    Returns
    -------
    delimiter : str, None
        The file delimiter

    Raises
    ------
    EOFError
        If EOF is reached before delimiter found.
    """
    data_start = fp.tell()
    search_rewind = 3

    if is_little_endian:
        bytes_format = b"<HH"
    else:
        bytes_format = b">HH"
    bytes_to_find = pack(bytes_format, delimiter_tag.group, delimiter_tag.elem)

    found = False
    eof = False
    value_chunks = []
    defer_size = size_in_bytes(defer_size)
    byte_count = 0  # for defer_size checks
    while not found:
        chunk_start = fp.tell()
        bytes_read = fp.read(read_size)
        if len(bytes_read) < read_size:
            # try again - if still don't get required amount,
            # this is the last block
            new_bytes = fp.read(read_size - len(bytes_read))
            bytes_read += new_bytes
            if len(bytes_read) < read_size:
                eof = True  # but will still check whatever we did get
        index = bytes_read.find(bytes_to_find)
        if index != -1:
            found = True
            new_bytes = bytes_read[:index]
            byte_count += len(new_bytes)
            if defer_size is None or byte_count < defer_size:
                value_chunks.append(bytes_read[:index])
            fp.seek(chunk_start + index + 4)  # rewind to end of delimiter
            length = fp.read(4)
            if length != b"\0\0\0\0":
                msg = ("Expected 4 zero bytes after undefined length delimiter"
                       " at pos {0:04x}")
                logger.error(msg.format(fp.tell() - 4))
        elif eof:
            fp.seek(data_start)
            raise EOFError(
                "End of file reached before delimiter {0!r} found".format(
                    delimiter_tag))
        else:
            # rewind a bit in case delimiter crossed read_size boundary
            fp.seek(fp.tell() - search_rewind)
            # accumulate the bytes read (not including the rewind)
            new_bytes = bytes_read[:-search_rewind]
            byte_count += len(new_bytes)
            if defer_size is None or byte_count < defer_size:
                value_chunks.append(new_bytes)
    # if get here then have found the byte string
    if defer_size is not None and byte_count >= defer_size:
        return None
    else:
        return b"".join(value_chunks)
Example #5
0
    def from_json(cls,
                  dataset_class,
                  tag,
                  vr,
                  value,
                  value_key,
                  bulk_data_uri_handler=None,
                  encodings=None):
        """Creates a DataElement from JSON.

        Parameters
        ----------
        dataset_class: Dataset derived class
            class used to create sequence items
        tag: pydicom.tag.Tag
            data element tag
        vr: str
            data element value representation
        value: list
            data element value(s)
        value_key: Union[str, None]
            key of the data element that contains the value
            (options: ``{"Value", "InlineBinary", "BulkDataURI"}``)
        bulk_data_uri_handler: Union[Callable, None]
            callable that accepts the "BulkDataURI" of the JSON representation
            of a data element and returns the actual value of that data element
            (retrieved via DICOMweb WADO-RS)

        Returns
        -------
        pydicom.dataelem.DataElement

        """
        # TODO: test wado-rs retrieve wrapper
        try:
            vm = dictionary_VM(tag)
        except KeyError:
            # Private tag
            vm = str(len(value))
        if value_key == 'Value':
            if not (isinstance(value, list)):
                fmt = '"{}" of data element "{}" must be a list.'
                raise TypeError(fmt.format(value_key, tag))
        elif value_key in {'InlineBinary', 'BulkDataURI'}:
            if isinstance(value, list):
                fmt = '"{}" of data element "{}" must be a {}.'
                expected_type = ('string' if value_key == 'BulkDataURI' else
                                 'bytes-like object')
                raise TypeError(fmt.format(value_key, tag, expected_type))
        if vr == 'SQ':
            elem_value = []
            for value_item in value:
                ds = dataset_class()
                if value_item:
                    for key, val in value_item.items():
                        if 'vr' not in val:
                            fmt = 'Data element "{}" must have key "vr".'
                            raise KeyError(fmt.format(tag))
                        unique_value_keys = tuple(
                            set(val.keys()) & set(jsonrep.JSON_VALUE_KEYS))
                        if len(unique_value_keys) == 0:
                            logger.debug(
                                'data element has neither key "{}".'.format(
                                    '" nor "'.join(jsonrep.JSON_VALUE_KEYS)))
                            elem = DataElement(tag=tag, value='', VR=vr)
                        else:
                            value_key = unique_value_keys[0]
                            elem = cls.from_json(dataset_class, key, val['vr'],
                                                 val[value_key], value_key)
                        ds.add(elem)
                elem_value.append(ds)
        elif vr == 'PN':
            # Special case, see DICOM Part 18 Annex F2.2
            elem_value = []
            for v in value:
                if not isinstance(v, dict):
                    # Some DICOMweb services get this wrong, so we
                    # workaround the issue and warn the user
                    # rather than raising an error.
                    logger.error(
                        'value of data element "{}" with VR Person Name (PN) '
                        'is not formatted correctly'.format(tag))
                    elem_value.append(v)
                else:
                    elem_value.extend(list(v.values()))
            if vm == '1':
                try:
                    elem_value = elem_value[0]
                except IndexError:
                    elem_value = ''
        else:
            if vm == '1':
                if value_key == 'InlineBinary':
                    elem_value = base64.b64decode(value)
                elif value_key == 'BulkDataURI':
                    if bulk_data_uri_handler is None:
                        logger.warning(
                            'no bulk data URI handler provided for retrieval '
                            'of value of data element "{}"'.format(tag))
                        elem_value = b''
                    else:
                        elem_value = bulk_data_uri_handler(value)
                else:
                    if value:
                        elem_value = value[0]
                    else:
                        elem_value = value
            else:
                elem_value = value
        if elem_value is None:
            logger.warning('missing value for data element "{}"'.format(tag))
            elem_value = ''

        elem_value = jsonrep.convert_to_python_number(elem_value, vr)

        try:
            if compat.in_py2 and vr == "PN":
                elem_value = PersonNameUnicode(elem_value, 'UTF8')
            return DataElement(tag=tag, value=elem_value, VR=vr)
        except Exception:
            raise ValueError(
                'Data element "{}" could not be loaded from JSON: {}'.format(
                    tag, elem_value))
Example #6
0
def read_undefined_length_value(fp,
                                is_little_endian,
                                delimiter_tag,
                                defer_size=None,
                                read_size=1024*8):
    """Read until the delimiter tag found and return the value;
     ignore the delimiter.

    On completion, the file will be set to the first byte after the delimiter
    and its following four zero bytes.

    Parameters
    ----------
    fp : a file-like object
    is_little_endian : boolean
        True if file transfer syntax is little endian, else False.
    delimiter_tag : BaseTag
        tag used as and marker for reading
    defer_size : int, None, optional
        Size to avoid loading large elements in memory.
        See ``filereader.read_file`` for more parameter info.
    read_size : int
        Number of bytes to read at one time.

    Returns
    -------
    delimiter : str, None
        The file delimiter

    Raises
    ------
    EOFError
        If EOF is reached before delimiter found.
    """
    data_start = fp.tell()
    search_rewind = 3

    if is_little_endian:
        bytes_format = b"<HH"
    else:
        bytes_format = b">HH"
    bytes_to_find = pack(bytes_format, delimiter_tag.group, delimiter_tag.elem)

    found = False
    eof = False
    value_chunks = []
    defer_size = size_in_bytes(defer_size)
    byte_count = 0  # for defer_size checks
    while not found:
        chunk_start = fp.tell()
        bytes_read = fp.read(read_size)
        if len(bytes_read) < read_size:
            # try again - if still don't get required amount,
            # this is the last block
            new_bytes = fp.read(read_size - len(bytes_read))
            bytes_read += new_bytes
            if len(bytes_read) < read_size:
                eof = True  # but will still check whatever we did get
        index = bytes_read.find(bytes_to_find)
        if index != -1:
            found = True
            new_bytes = bytes_read[:index]
            byte_count += len(new_bytes)
            if defer_size is None or byte_count < defer_size:
                value_chunks.append(bytes_read[:index])
            fp.seek(chunk_start + index + 4)  # rewind to end of delimiter
            length = fp.read(4)
            if length != b"\0\0\0\0":
                msg = ("Expected 4 zero bytes after undefined length delimiter"
                       " at pos {0:04x}")
                logger.error(msg.format(fp.tell() - 4))
        elif eof:
            fp.seek(data_start)
            raise EOFError("End of file reached before delimiter {0!r} found".
                           format(delimiter_tag))
        else:
            # rewind a bit in case delimiter crossed read_size boundary
            fp.seek(fp.tell() - search_rewind)
            # accumulate the bytes read (not including the rewind)
            new_bytes = bytes_read[:-search_rewind]
            byte_count += len(new_bytes)
            if defer_size is None or byte_count < defer_size:
                value_chunks.append(new_bytes)
    # if get here then have found the byte string
    if defer_size is not None and byte_count >= defer_size:
        return None
    else:
        return b"".join(value_chunks)
Example #7
0
def read_dataset(fp: BinaryIO,
                 is_implicit_VR: bool,
                 is_little_endian: bool,
                 bytelength: Optional[int] = None,
                 stop_when: Optional[Callable[[BaseTag, Optional[str], int],
                                              bool]] = None,
                 defer_size: Optional[Union[str, int, float]] = None,
                 parent_encoding: Union[
                     str, MutableSequence[str]] = default_encoding,
                 specific_tags: Optional[List[BaseTag]] = None,
                 at_top_level: bool = True) -> Dataset:
    """Return a :class:`~pydicom.dataset.Dataset` instance containing the next
    dataset in the file.

    Parameters
    ----------
    fp : file-like
        An opened file-like object.
    is_implicit_VR : bool
        ``True`` if file transfer syntax is implicit VR.
    is_little_endian : bool
        ``True`` if file has little endian transfer syntax.
    bytelength : int, None, optional
        ``None`` to read until end of file or ItemDeliterTag, else a fixed
        number of bytes to read
    stop_when : None, optional
        Optional call_back function which can terminate reading. See help for
        :func:`data_element_generator` for details
    defer_size : int, str or float, optional
        Size to avoid loading large elements in memory. See :func:`dcmread` for
        more parameter info.
    parent_encoding : str or List[str]
        Optional encoding to use as a default in case (0008,0005) *Specific
        Character Set* isn't specified.
    specific_tags : list of BaseTag, optional
        See :func:`dcmread` for parameter info.
    at_top_level: bool
        If dataset is top level (not within a sequence).
        Used to turn off explicit VR heuristic within sequences

    Returns
    -------
    dataset.Dataset
        A Dataset instance.

    See Also
    --------
    :class:`~pydicom.dataset.Dataset`
        A collection (dictionary) of DICOM
        :class:`~pydicom.dataelem.DataElement` instances.
    """
    raw_data_elements: Dict[BaseTag, Union[RawDataElement, DataElement]] = {}
    fp_start = fp.tell()
    is_implicit_VR = _is_implicit_vr(fp,
                                     is_implicit_VR,
                                     is_little_endian,
                                     stop_when,
                                     is_sequence=not at_top_level)
    fp.seek(fp_start)
    de_gen = data_element_generator(
        fp,
        is_implicit_VR,
        is_little_endian,
        stop_when,
        defer_size,
        parent_encoding,
        specific_tags,
    )
    try:
        while (bytelength is None) or (fp.tell() - fp_start < bytelength):
            raw_data_element = next(de_gen)
            # Read data elements. Stop on some errors, but return what was read
            tag = raw_data_element.tag
            # Check for ItemDelimiterTag --dataset is an item in a sequence
            if tag == BaseTag(0xFFFEE00D):
                break
            raw_data_elements[tag] = raw_data_element
    except StopIteration:
        pass
    except EOFError as details:
        if config.settings.reading_validation_mode == config.RAISE:
            raise
        msg = str(details) + " in file " + getattr(fp, "name", "<no filename>")
        warnings.warn(msg, UserWarning)
    except NotImplementedError as details:
        logger.error(details)

    ds = Dataset(raw_data_elements)

    encoding: Union[str, MutableSequence[str]]
    if 0x00080005 in raw_data_elements:
        elem = cast(RawDataElement, raw_data_elements[BaseTag(0x00080005)])
        char_set = cast(Optional[Union[str, MutableSequence[str]]],
                        DataElement_from_raw(elem).value)
        encoding = convert_encodings(char_set)  # -> List[str]
    else:
        encoding = parent_encoding  # -> Union[str, MutableSequence[str]]

    ds.set_original_encoding(is_implicit_VR, is_little_endian, encoding)
    return ds
Example #8
0
def read_undefined_length_value(fp: BinaryIO,
                                is_little_endian: bool,
                                delimiter_tag: BaseTag,
                                defer_size: Optional[Union[int, float]] = None,
                                read_size: int = 1024 * 8) -> Optional[bytes]:
    """Read until `delimiter_tag` and return the value up to that point.

    On completion, the file will be set to the first byte after the delimiter
    and its following four zero bytes.

    Parameters
    ----------
    fp : file-like
        The file-like to read.
    is_little_endian : bool
        ``True`` if file transfer syntax is little endian, else ``False``.
    delimiter_tag : BaseTag
        Tag used as end marker for reading
    defer_size : int or None, optional
        Size to avoid loading large elements in memory. See
        :func:`~pydicom.filereader.dcmread` for more parameter info.
    read_size : int, optional
        Number of bytes to read at one time.

    Returns
    -------
    delimiter : bytes or None
        The file delimiter.

    Raises
    ------
    EOFError
        If EOF is reached before delimiter found.
    """
    data_start = fp.tell()
    defer_size = size_in_bytes(defer_size)

    # It's common for an undefined length value item to be an
    # encapsulated pixel data as defined in PS3.5 section A.4.
    # Attempt to parse the data under that assumption, since the method
    #  1. is proof against coincidental embedded sequence delimiter tags
    #  2. avoids accumulating any data in memory if the element is large
    #     enough to be deferred
    #  3. does not double-accumulate data (in chunks and then joined)
    #
    # Unfortunately, some implementations deviate from the standard and the
    # encapsulated pixel data-parsing algorithm fails. In that case, we fall
    # back to a method of scanning the entire element value for the
    # sequence delimiter, as was done historically.
    if delimiter_tag == SequenceDelimiterTag:
        was_value_found, value = _try_read_encapsulated_pixel_data(
            fp, is_little_endian, defer_size)
        if was_value_found:
            return value

    search_rewind = 3

    if is_little_endian:
        bytes_format = b"<HH"
    else:
        bytes_format = b">HH"
    bytes_to_find = pack(bytes_format, delimiter_tag.group, delimiter_tag.elem)

    found = False
    eof = False
    value_chunks = []
    byte_count = 0  # for defer_size checks
    while not found:
        chunk_start = fp.tell()
        bytes_read = fp.read(read_size)
        if len(bytes_read) < read_size:
            # try again - if still don't get required amount,
            # this is the last block
            new_bytes = fp.read(read_size - len(bytes_read))
            bytes_read += new_bytes
            if len(bytes_read) < read_size:
                eof = True  # but will still check whatever we did get
        index = bytes_read.find(bytes_to_find)
        if index != -1:
            found = True
            new_bytes = bytes_read[:index]
            byte_count += len(new_bytes)
            if defer_size is None or byte_count < defer_size:
                value_chunks.append(new_bytes)
            fp.seek(chunk_start + index + 4)  # rewind to end of delimiter
            length = fp.read(4)
            if length != b"\0\0\0\0":
                msg = ("Expected 4 zero bytes after undefined length delimiter"
                       " at pos {0:04x}")
                logger.error(msg.format(fp.tell() - 4))
        elif eof:
            fp.seek(data_start)
            raise EOFError(
                "End of file reached before delimiter {0!r} found".format(
                    delimiter_tag))
        else:
            # rewind a bit in case delimiter crossed read_size boundary
            fp.seek(fp.tell() - search_rewind)
            # accumulate the bytes read (not including the rewind)
            new_bytes = bytes_read[:-search_rewind]
            byte_count += len(new_bytes)
            if defer_size is None or byte_count < defer_size:
                value_chunks.append(new_bytes)
    # if get here then have found the byte string
    if defer_size is not None and byte_count >= defer_size:
        return None
    else:
        return b"".join(value_chunks)