def desensitize(ds,
                sensitive_tags=None,
                img_ds=True,
                img_ds_area=(None, None, None, None)):
    """ 脱敏函数
    :param ds: Dataset 待脱敏Dataset对象
    :param sensitive_tags: list 敏感标签
    :param img_ds: bool 是否进行图像脱敏
    :param img_ds_area: tuple 图像脱敏区域(x_min, x_max, y_min, y_max) None 表示不限
    :return: Dataset 已脱敏对象
    """
    if not sensitive_tags:
        sensitive_tags = [
            (0x0008, 0x0080),
            (0x0008, 0x0081),
            (0x0008, 0x1040),
            (0x0010, 0x0010),
            (0x0033, 0x1013),
            (0x0019, 0x161b),
        ]
    # 标签脱敏
    for tag in sensitive_tags:
        tag = TupleTag(tag)
        logger.debug('Desensitizing %s tag.' % tag)
        ds.pop(tag, None)
    # 图像脱敏
    if ds[0x0008, 0x0060].value in ('CR', 'DR') and img_ds:
        logger.debug('Desensitizing pixels data')
        pixels = np.fromstring(ds.PixelData, dtype=np.uint16)
        pixels = pixels.reshape([ds.Rows, ds.Columns])
        pixels = fill_zero_color(pixels, img_ds_area)
        ds.PixelData = pixels.tostring()
    return ds
Exemple #2
0
    def testNestedPrivateSQ(self):
        """Can successfully read a private SQ which contains additional SQ's....."""
        # From issue 113. When a private SQ of undefined length is used, the
        #   sequence is read in and the length of the SQ is determined upon
        #   identification of the SQ termination sequence. When using nested
        #   Sequences, the first termination sequence encountered actually
        #   belongs to the nested Sequence not the parent, therefore the
        #   remainder of the file is not read in properly
        ds = read_file(nested_priv_SQ_name)

        # Make sure that the entire dataset was read in
        pixel_data_tag = TupleTag((0x7fe0, 0x10))
        self.assertTrue(
            pixel_data_tag in ds,
            "Entire dataset was not parsed properly. PixelData is not present")

        # Check that the DataElement is indeed a Sequence
        tag = TupleTag((0x01, 0x01))
        seq0 = ds[tag]
        self.assertEqual(seq0.VR, 'SQ',
                         "First level sequence not parsed properly")

        # Now verify the presence of the nested private SQ
        seq1 = seq0[0][tag]
        self.assertEqual(seq1.VR, 'SQ',
                         "Second level sequence not parsed properly")

        # Now make sure the values that are parsed are correct
        got = seq1[0][tag].value
        expected = b'Double Nested SQ'
        self.assertEqual(got, expected,
                         "Expected a value of %s, got %s'" % (expected, got))

        got = seq0[0][0x01, 0x02].value
        expected = b'Nested SQ'
        self.assertEqual(got, expected,
                         "Expected a value of %s, got %s'" % (expected, got))
def desensitize(ds,
                sensitive_tags=None,
                img_ds=True,
                img_ds_area=(None, None, None, None)):
    """ 脱敏函数
    :param ds: Dataset 待脱敏Dataset对象
    :param sensitive_tags: list 敏感标签
    :param img_ds: bool 是否进行图像脱敏
    :param img_ds_area: tuple 图像脱敏区域(x_min, x_max, y_min, y_max) None 表示不限
    :return: Dataset 已脱敏对象
    """
    if not sensitive_tags:
        sensitive_tags = SENSITIVE_TAGS
        # 标签脱敏
    for tag in sensitive_tags:
        tag = TupleTag(tag)
        logger.debug('Desensitizing %s tag.' % tag)
        ds.pop(tag, None)
    return ds
def absorb_delimiter_item(fp, is_little_endian, delimiter):
    """Read (and ignore) undefined length sequence or item terminators."""
    if is_little_endian:
        struct_format = "<HHL"
    else:
        struct_format = ">HHL"
    group, elem, length = unpack(struct_format, fp.read(8))
    tag = TupleTag((group, elem))
    if tag != delimiter:
        msg = "Did not find expected delimiter '%s'" % dictionary_description(
            delimiter)
        msg += ", instead found %s at file position 0x%x" % (str(tag),
                                                             fp.tell() - 8)
        logger.warn(msg)
        fp.seek(fp.tell() - 8)
        return
    logger.debug("%04x: Found Delimiter '%s'",
                 fp.tell() - 8, dictionary_description(delimiter))
    if length == 0:
        logger.debug("%04x: Read 0 bytes after delimiter", fp.tell() - 4)
    else:
        logger.debug("%04x: Expected 0x00000000 after delimiter, found 0x%x",
                     fp.tell() - 4, length)
Exemple #5
0
def convert_tag(byte_string, is_little_endian, offset=0):
    if is_little_endian:
        struct_format = "<HH"
    else:
        struct_format = ">HH"
    return TupleTag(unpack(struct_format, byte_string[offset:offset + 4]))
Exemple #6
0
 def testGoodTuple(self):
     """Tags can be constructed with two-tuple of 2-byte integers."""
     tag = TupleTag((0xFFFF, 0xFFee))
     tag = TupleTag((0x300a, 0x00b0))
     self.assertEqual(tag.group, 0x300a, "Expected tag.group 0x300a, got %r" % tag.group)
Exemple #7
0
def data_element_generator(fp, is_implicit_VR, is_little_endian,
                                    stop_when=None, defer_size=None, encoding=default_encoding):
    """Create a generator to efficiently return the raw data elements
    Returns (VR, length, raw_bytes, value_tell, is_little_endian),
    where:
    VR -- None if implicit VR, otherwise the VR read from the file
    length -- the length as in the DICOM data element (could be
        DICOM "undefined length" 0xffffffffL),
    value_bytes -- the raw bytes from the DICOM file
                    (not parsed into python types)
    is_little_endian -- True if transfer syntax is little endian; else False
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #       The 4-byte length can be FFFFFFFF (undefined length)*
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length

    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        element_struct = Struct(endian_chr + "HHL")
    else:  # Explicit VR
        # tag, VR, 2-byte length (or 0 if special VRs)
        element_struct = Struct(endian_chr + "HH2sH")
        extra_length_struct = Struct(endian_chr + "L")  # for special VRs
        extra_length_unpack = extra_length_struct.unpack  # for lookup speed

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = dicom.debugging
    element_struct_unpack = element_struct.unpack

    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)
        if len(bytes_read) < 8:
            raise StopIteration  # at end of file
        if debugging:
            debug_msg = "{0:08x}: {1}".format(fp.tell() - 8,
                                             bytes2hex(bytes_read))

        if is_implicit_VR:
            # must reset VR each time; could have set last iteration (e.g. SQ)
            VR = None
            group, elem, length = element_struct_unpack(bytes_read)
        else:  # explicit VR
            group, elem, VR, length = element_struct_unpack(bytes_read)
            if in_py3:
                VR = VR.decode(default_encoding)
            if VR in extra_length_VRs:
                bytes_read = fp_read(4)
                length = extra_length_unpack(bytes_read)[0]
                if debugging:
                    debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR:
                debug_msg += " %s " % VR
            if length != 0xFFFFFFFFL:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        tag = TupleTag((group, elem))
        if stop_when is not None:
            # XXX VR may be None here!! Should stop_when just take tag?
            if stop_when(tag, VR, length):
                if debugging:
                    logger_debug("Reading ended by stop_when callback. "
                                 "Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in extra_length_VRs:
                    rewind_length += 4
                fp.seek(value_tell - rewind_length)
                raise StopIteration

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFFL:
            if defer_size is not None and length > defer_size:
                # Flag as deferred by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded."
                            "Skipping forward to next data element.")
                fp.seek(fp_tell() + length)
            else:
                value = fp_read(length)
                if debugging:
                    dotdot = "   "
                    if length > 12:
                        dotdot = "..."
                    logger_debug("%08x: %-34s %s %r %s" % (value_tell,
                           bytes2hex(value[:12]), dotdot, value[:12], dotdot))

            # If the tag is (0008,0005) Specific Character Set, then store it
            if tag == (0x08, 0x05):
                from dicom.values import convert_string
                encoding = convert_string(value, is_little_endian, encoding=default_encoding)
                # Store the encoding value in the generator for use with future elements (SQs)
                encoding = convert_encodings(encoding)

            yield RawDataElement(tag, VR, length, value, value_tell,
                                     is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        # unless is SQ type, in which case is easier to parse it, because
        # undefined length SQs and items of undefined lengths can be nested
        # and it would be error-prone to read to the correct outer delimiter
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes unless it is
            #   identified as a Sequence
            if VR is None:
                try:
                    VR = dictionaryVR(tag)
                except KeyError:
                    # Look ahead to see if it consists of items and is thus a SQ
                    next_tag = TupleTag(unpack(endian_chr + "HH", fp_read(4)))
                    # Rewind the file
                    fp.seek(fp_tell() - 4)
                    if next_tag == ItemTag:
                        VR = 'SQ'

            if VR == 'SQ':
                if debugging:
                    msg = "{0:08x}: Reading/parsing undefined length sequence"
                    logger_debug(msg.format(fp_tell()))
                seq = read_sequence(fp, is_implicit_VR,
                                                    is_little_endian, length, encoding)
                yield DataElement(tag, VR, seq, value_tell,
                                                    is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian,
                                                delimiter, defer_size)

                # If the tag is (0008,0005) Specific Character Set, then store it
                if tag == (0x08, 0x05):
                    from dicom.values import convert_string
                    encoding = convert_string(value, is_little_endian, encoding=default_encoding)
                    # Store the encoding value in the generator for use with future elements (SQs)
                    encoding = convert_encodings(encoding)

                yield RawDataElement(tag, VR, length, value, value_tell,
                                is_implicit_VR, is_little_endian)
Exemple #8
0
def data_element_generator(fp, is_implicit_VR, is_little_endian, stop_when=None, 
                            defer_size=None):
    """Create a generator to efficiently return the raw data elements
    Specifically, returns (VR, length, raw_bytes, value_tell, is_little_endian),
    where:
    VR -- None if implicit VR, otherwise the VR read from the file
    length -- the length as in the DICOM data element (could be
        DICOM "undefined length" 0xffffffffL),
    value_bytes -- the raw bytes from the DICOM file (not parsed into python types)
    is_little_endian -- True if transfer syntax is little endian; else False
    """
    # Summary of DICOM standard PS3.5-2008 chapter 7:
    # If Implicit VR, data element is:
    #    tag, 4-byte length, value.
    #       The 4-byte length can be FFFFFFFF (undefined length)*
    # If Explicit VR:
    #    if OB, OW, OF, SQ, UN, or UT:
    #       tag, VR, 2-bytes reserved (both zero), 4-byte length, value
    #           For all but UT, the length can be FFFFFFFF (undefined length)*
    #   else: (any other VR)
    #       tag, VR, (2 byte length), value
    # * for undefined length, a Sequence Delimitation Item marks the end
    #        of the Value Field.
    # Note, except for the special_VRs, both impl and expl VR use 8 bytes;
    #    the special VRs follow the 8 bytes with a 4-byte length
    
    # With a generator, state is stored, so we can break down
    #    into the individual cases, and not have to check them again for each
    #    data element

    # Make local variables so have faster lookup
    fp_read = fp.read
    fp_tell = fp.tell
    logger_debug = logger.debug
    debugging = dicom.debugging
    
    if is_little_endian:
        endian_chr = "<"
    else:
        endian_chr = ">"
    if is_implicit_VR:
        unpack_format = endian_chr + "HHL" # XXX in python >=2.5, can do struct.Struct to save time
    else: # Explicit VR
        unpack_format = endian_chr + "HH2sH" # tag, VR, 2-byte length (or 0 if special VRs)
        extra_length_format = endian_chr + "L"  # for special VRs
        
    while True:
        # Read tag, VR, length, get ready to read value
        bytes_read = fp_read(8)        
        if len(bytes_read) < 8:
            raise StopIteration # at end of file
        if debugging: debug_msg = "%08x: %s" % (fp.tell()-8, bytes2hex(bytes_read))
        
        if is_implicit_VR:
            VR = None # must reset each time -- may have looked up on last iteration (e.g. SQ)
            group, elem, length = unpack(unpack_format, bytes_read)
        else: # explicit VR
            group, elem, VR, length = unpack(unpack_format, bytes_read)
            if VR in ('OB','OW','OF','SQ','UN', 'UT'):
                bytes_read = fp_read(4)
                length = unpack(extra_length_format, bytes_read)[0]
                if debugging: debug_msg += " " + bytes2hex(bytes_read)
        if debugging:
            debug_msg = "%-47s  (%04x, %04x)" % (debug_msg, group, elem)
            if not is_implicit_VR: debug_msg += " %s " % VR
            if length != 0xFFFFFFFFL:
                debug_msg += "Length: %d" % length
            else:
                debug_msg += "Length: Undefined length (FFFFFFFF)"
            logger_debug(debug_msg)

        # Now are positioned to read the value, but may not want to -- check stop_when
        value_tell = fp_tell()
        # logger.debug("%08x: start of value of length %d" % (value_tell, length))
        tag = TupleTag((group, elem))
        if stop_when is not None:
            if stop_when(tag, VR, length): # XXX VR may be None here!! Should stop_when just take tag?
                if debugging: 
                    logger_debug("Reading ended by stop_when callback. Rewinding to start of data element.")
                rewind_length = 8
                if not is_implicit_VR and VR in ('OB','OW','OF','SQ','UN', 'UT'): 
                    rewind_length += 4
                fp.seek(value_tell-rewind_length)
                raise StopIteration

        # Reading the value
        # First case (most common): reading a value with a defined length
        if length != 0xFFFFFFFFL:
            if defer_size is not None and length > defer_size:
                # Flag as deferred read by setting value to None, and skip bytes
                value = None
                logger_debug("Defer size exceeded. Skipping forward to next data element.")
                fp.seek(fp_tell()+length)
            else:
                value = fp_read(length)
                if debugging:
                    dotdot = "   "
                    if length > 12:
                        dotdot = "..."
                    logger_debug("%08x: %-34s %s %r %s" % (value_tell, 
                              bytes2hex(value[:12]), dotdot, value[:12], dotdot))
            yield RawDataElement(tag, VR, length, value, value_tell, 
                                     is_implicit_VR, is_little_endian)

        # Second case: undefined length - must seek to delimiter,
        #  ... unless is SQ type, in which case is easier to parse it, because
        #      undefined length SQs and items of undefined lengths can be nested
        #      and it would be error-prone to read to the correct outer delimiter 
        else:
            # Try to look up type to see if is a SQ
            # if private tag, won't be able to look it up in dictionary,
            #   in which case just ignore it and read the bytes
            if VR is None:
                try:
                    VR = dictionaryVR(tag)
                except KeyError: 
                    pass
            if VR == 'SQ':
                if debugging:
                    logger_debug("%08x: Reading and parsing undefined length sequence"
                                % fp_tell())
                seq = read_sequence(fp, is_implicit_VR, is_little_endian, length)
                yield DataElement(tag, VR, seq, value_tell, is_undefined_length=True)
            else:
                delimiter = SequenceDelimiterTag
                if debugging:
                    logger_debug("Reading undefined length data element")
                value = read_undefined_length_value(fp, is_little_endian, delimiter,
                                        defer_size)
                yield RawDataElement(tag, VR, length, value, value_tell,
                                is_implicit_VR, is_little_endian)