def test_bad_charset(self): """Test bad charset defaults to ISO IR 6""" # Python 3: elem.value is PersonName3, Python 2: elem.value is str elem = DataElement(0x00100010, 'PN', 'CITIZEN') pydicom.charset.decode(elem, ['ISO 2022 IR 126']) # After decode Python 2: elem.value is PersonNameUnicode assert 'iso_ir_126' in elem.value.encodings assert 'iso8859' not in elem.value.encodings # default encoding is iso8859 pydicom.charset.decode(elem, []) assert 'iso8859' in elem.value.encodings
def test_code_extensions_not_allowed(self): """Test that UTF8 does not allow code extensions""" elem = DataElement(0x00100010, 'PN', b'Buc^J\xc3\xa9r\xc3\xb4me') msg = ("Value 'ISO_IR 192' for Specific Character Set does not " "allow code extensions, ignoring: ISO 2022 IR 100, " "ISO 2022 IR 144") with pytest.warns(UserWarning, match=msg): pydicom.charset.decode( elem, ['ISO_IR 192', 'ISO 2022 IR 100', 'ISO 2022 IR 144']) assert u'Buc^Jérôme' == elem.value
def test_unknown_escape_sequence_enforce_standard(self, enforce_valid_values): """Test handling bad encoding for single encoding if `config.enforce_valid_values` is set""" elem = DataElement( 0x00100010, 'PN', b'\x1b\x2d\x46\xc4\xe9\xef\xed\xf5\xf3\xe9\xef\xf2') with pytest.raises(ValueError, match='Found unknown escape sequence ' 'in encoded string value'): pydicom.charset.decode_element(elem, ['ISO_IR 100'])
def test_unknown_escape_sequence(self, allow_invalid_values): """Test handling bad encoding for single encoding""" elem = DataElement( 0x00100010, 'PN', b'\x1b\x2d\x46\xc4\xe9\xef\xed\xf5\xf3\xe9\xef\xf2') with pytest.warns(UserWarning, match='Found unknown escape sequence ' 'in encoded string value'): pydicom.charset.decode_element(elem, ['ISO_IR 100']) assert '\x1b-FÄéïíõóéïò' == elem.value
def test_read_OV_explicit_little(self): """Check reading element with VR of OV encoded as explicit""" ds = dcmread(self.fp_ex, force=True) val = (b'\x00\x00\x00\x00\x00\x00\x00\x00' b'\x01\x02\x03\x04\x05\x06\x07\x08') elem = ds['ExtendedOffsetTable'] assert 'OV' == elem.VR assert 0x7FE00001 == elem.tag assert val == elem.value new = DataElement(0x7FE00001, 'OV', val) assert elem == new
def test_little_endian_implicit(self): """Test reading little endian implicit VR data""" # (0010, 0010) PatientName PN 6 ABCDEF bytestream = b'\x10\x00\x10\x00' \ b'\x06\x00\x00\x00' \ b'ABCDEF' fp = BytesIO(bytestream) gen = data_element_generator(fp, is_implicit_VR=True, is_little_endian=True) elem = DataElement(0x00100010, 'PN', 'ABCDEF') assert elem == DataElement_from_raw(next(gen), 'ISO_IR 100')
def test_big_endian_explicit(self): """Test reading big endian explicit VR data""" # (0010, 0010) PatientName PN 6 ABCDEF bytestream = b'\x00\x10\x00\x10' \ b'PN' \ b'\x00\x06' \ b'ABCDEF' fp = BytesIO(bytestream) # fp, is_implicit_VR, is_little_endian, gen = data_element_generator(fp, False, False) elem = DataElement(0x00100010, 'PN', 'ABCDEF') assert elem == DataElement_from_raw(next(gen), 'ISO_IR 100')
def setup(self): self.data_elementSH = DataElement((1, 2), "SH", "hello") self.data_elementIS = DataElement((1, 2), "IS", "42") self.data_elementDS = DataElement((1, 2), "DS", "42.00001") self.data_elementMulti = DataElement((1, 2), "DS", ['42.1', '42.2', '42.3']) self.data_elementCommand = DataElement(0x00000000, 'UL', 100) self.data_elementPrivate = DataElement(0x00090000, 'UL', 101) self.data_elementRetired = DataElement(0x00080010, 'SH', 102) config.use_none_as_empty_text_VR_value = False
def test_write_invalid_non_ascii_pn(self, enforce_writing_invalid_values): fp = DicomBytesIO() ds = Dataset() ds.is_implicit_VR = False ds.is_little_endian = True ds.SpecificCharacterSet = "ISO_IR 192" # UTF-8 # string length is 40 ds.add(DataElement(0x00100010, "PN", "洪^吉洞" * 10)) msg = r"The PN component length \(100\) exceeds the maximum allowed *" with pytest.raises(ValueError, match=msg): ds.save_as(fp)
def test_known_tags_with_UN_VR(self): """Known tags with VR UN are correctly decoded.""" ds = Dataset() ds[0x00080005] = DataElement(0x00080005, 'UN', b'ISO_IR 126') ds[0x00100010] = DataElement(0x00100010, 'UN', 'Διονυσιος'.encode('iso_ir_126')) ds.decode() assert 'CS' == ds[0x00080005].VR assert 'PN' == ds[0x00100010].VR assert 'Διονυσιος' == ds[0x00100010].value ds = Dataset() ds[0x00080005] = DataElement(0x00080005, 'UN', b'ISO 2022 IR 100\\ISO 2022 IR 126') ds[0x00100010] = DataElement( 0x00100010, 'UN', b'Dionysios=\x1b\x2d\x46' + 'Διονυσιος'.encode('iso_ir_126')) ds.decode() assert 'CS' == ds[0x00080005].VR assert 'PN' == ds[0x00100010].VR assert 'Dionysios=Διονυσιος' == ds[0x00100010].value
def test_long_specific_char_set(self): """Test that specific character set is read even if it is longer than defer_size""" ds = Dataset() long_specific_char_set_value = ['ISO 2022IR 100'] * 9 ds.add(DataElement(0x00080005, 'CS', long_specific_char_set_value)) fp = BytesIO() file_ds = FileDataset(fp, ds) file_ds.save_as(fp) ds = read_file(fp, defer_size=65, force=True) self.assertEqual(ds[0x00080005].value, long_specific_char_set_value)
def create_data(self, disable_value_validation): self.data_elementSH = DataElement((1, 2), "SH", "hello") self.data_elementIS = DataElement((1, 2), "IS", "42") self.data_elementDS = DataElement((1, 2), "DS", "42.00001") self.data_elementMulti = DataElement((1, 2), "DS", ['42.1', '42.2', '42.3']) self.data_elementCommand = DataElement(0x00000000, 'UL', 100) self.data_elementPrivate = DataElement(0x00090000, 'UL', 101) self.data_elementRetired = DataElement(0x00080010, 'SH', "102") config.use_none_as_empty_text_VR_value = False yield config.use_none_as_empty_text_VR_value = False
def test_long_specific_char_set(self): """Test that specific character set is read even if it is longer than defer_size""" ds = Dataset() long_specific_char_set_value = ['ISO 2022IR 100'] * 9 ds.add(DataElement(0x00080005, 'CS', long_specific_char_set_value)) fp = BytesIO() file_ds = FileDataset(fp, ds) file_ds.save_as(fp, write_like_original=True) ds = dcmread(fp, defer_size=65, force=True) assert long_specific_char_set_value == ds[0x00080005].value
def test_read_invalid_length_non_ascii_text(self): fp = DicomBytesIO() ds = Dataset() ds.is_implicit_VR = True ds.is_little_endian = True ds.SpecificCharacterSet = "ISO_IR 192" # UTF-8 ds.add(DataElement(0x00080050, "SH", "洪^吉洞=홍^길동")) # disable value validation to write an invalid value with config.disable_value_validation(): ds.save_as(fp) # no warning will be issued during reading, as only RawDataElement # objects are read ds = dcmread(fp, force=True)
def test_equality_inheritance(self): """DataElement: equality returns correct value for subclasses""" class DataElementPlus(DataElement): pass dd = DataElement(0x00100010, 'PN', 'ANON') ee = DataElementPlus(0x00100010, 'PN', 'ANON') assert ee == ee assert dd == ee assert ee == dd ee = DataElementPlus(0x00100010, 'PN', 'ANONY') assert not dd == ee assert not ee == dd
def testEqualityInheritance(self): """DataElement: equality returns correct value for subclasses""" class DataElementPlus(DataElement): pass dd = DataElement(0x00100010, 'PN', 'ANON') ee = DataElementPlus(0x00100010, 'PN', 'ANON') self.assertTrue(ee == ee) self.assertTrue(dd == ee) self.assertTrue(ee == dd) ee = DataElementPlus(0x00100010, 'PN', 'ANONY') self.assertFalse(dd == ee) self.assertFalse(ee == dd)
def test_dataelement_dumphandler(self): element = DataElement(0x00100010, 'PN', 'Jane^Doe') # as the order of the keys is not defined, we have to check both assert element.to_json() in ('{"vr": "PN", "Value": [{' '"Alphabetic": "Jane^Doe"}]}', '{"Value": [{' '"Alphabetic": "Jane^Doe"}], "vr": "PN"}') assert { "vr": "PN", "Value": [{ "Alphabetic": "Jane^Doe" }] } == element.to_json(dump_handler=lambda d: d)
def apply_rules(self, rules: RuleSet, dataset: Dataset) -> Dataset: """Apply rules to each element in dataset, recursing into sequence elements This creates a deep copy of the input dataset. Except for PixelData, which will be a reference. PixelData is not copied because it can take up a lot of memory """ deidentified = Dataset() pixel_data_tag = 0x7FE00010 for element in dataset: if element.tag == pixel_data_tag: deidentified.add( element) # add pixel data as reference to save mem elif element.VR == VRs.Sequence.short_name: # recurse into sequences deidentified.add( DataElement( tag=element.tag, VR=element.VR, value=Sequence([ self.apply_rules(rules, sub_dataset) for sub_dataset in element ]), )) elif rule := rules.get_rule(element): if type(rule.operation) == Remove: continue # special handling. should be removed, do not add try: new = rule.operation.apply(element, dataset) deidentified.add(new) except ElementShouldBeRemoved: # Operators signals removal continue else: # no rule found. Just copy the element over deidentified.add( DataElement(tag=element.tag, VR=element.VR, value=element.value))
def get_idis_code_sequence(ruleset_names: List[str]) -> DataElement: """Create the element (0012,0064) - DeIdentificationMethodCodeSequence This sequence specifies what kind of anonymization has been performed. It is quite free form. This implementation uses the following format: DeIdentificationMethodCodeSequence will contain the code of each official DICOM deidentification profile that was used. Codes are taken from Table CID 7050 Parameters ---------- ruleset_names: List[str] list of names as defined in nema.E1_1_METHOD_INFO Returns ------- DataElement Sequence element (0012,0064) - DeIdentificationMethodCodeSequence. Will contain the code of each official DICOM deidentification profile passed Raises ------ ValueError When any name in ruleset_names is not recognized as a standard DICOM rule set """ code_per_name = {x.full_name: x for x in E1_1_METHOD_INFO} codes = [] for name in ruleset_names: try: # check whether we know this ruleset as a standard DICOM one ruleset_info = code_per_name[name] except KeyError: raise ValueError( f'Could not find the code for rule set "{name}". I do' f" not know this ruleset") # Create the required designation for this dataset code_dataset = Dataset() code_dataset.CodeValue = ruleset_info.code code_dataset.CodingSchemeDesignator = "DCM" code_dataset.CodeMeaning = ruleset_info.full_name codes.append(code_dataset) element = DataElement( tag=Tag("DeidentificationMethodCodeSequence"), VR=VRs.Sequence.short_name, value=Sequence(codes), ) return element
def test_empty_AT(self): """Write empty AT correctly..........""" # Was issue 74 data_elem = DataElement(0x00280009, "AT", []) expected = hex2bytes(( " 28 00 09 00" # (0028,0009) Frame Increment Pointer " 00 00 00 00" # length 0 )) write_data_element(self.f1, data_elem) got = self.f1.parent.getvalue() msg = ("Did not write zero-length AT value correctly. " "Expected %r, got %r") % (bytes2hex(expected), bytes2hex(got)) msg = "%r %r" % (type(expected), type(got)) msg = "'%r' '%r'" % (expected, got) self.assertEqual(expected, got, msg)
def convert_to_dataset(json_obj): ds = Dataset() for key in json_obj: obj = json_obj[key] #print "*" , key, obj val = '' if 'Value' in obj: val = obj['Value'] if obj['vr'] == 'SQ' and val != '': child_ds_list = [] for val_item in val: child_ds = convert_to_dataset(val_item) child_ds_list.append(child_ds) #print (child_ds_list) #print "Creating SQ Element....." delem = DataElement(key, obj['vr'], child_ds_list) ds.add(delem) else: if val != '': val = val[0] delem = DataElement(key, obj['vr'], val) ds.add(delem) #print delem return ds
def test_overlay(self): """Test correcting OverlayData""" # Implicit VR must be 'OW' ref_ds = Dataset() ref_ds.is_implicit_VR = True ref_ds.add(DataElement(0x60003000, 'OB or OW', b'\x00')) ref_ds.add(DataElement(0x601E3000, 'OB or OW', b'\x00')) ds = correct_ambiguous_vr(deepcopy(ref_ds), True) self.assertTrue(ds[0x60003000].VR == 'OW') self.assertTrue(ds[0x601E3000].VR == 'OW') self.assertTrue(ref_ds[0x60003000].VR == 'OB or OW') self.assertTrue(ref_ds[0x601E3000].VR == 'OB or OW') # Explicit VR may be 'OB' or 'OW' (leave unchanged) ref_ds.is_implicit_VR = False ds = correct_ambiguous_vr(deepcopy(ref_ds), True) self.assertTrue(ds[0x60003000].VR == 'OB or OW') self.assertTrue(ref_ds[0x60003000].VR == 'OB or OW') # Missing is_implicit_VR (leave unchanged) del ref_ds.is_implicit_VR ds = correct_ambiguous_vr(deepcopy(ref_ds), True) self.assertTrue(ds[0x60003000].VR == 'OB or OW') self.assertTrue(ref_ds[0x60003000].VR == 'OB or OW')
def get_deidentification_method( method: str = DEFAULT_DEIDENTIFICATION_METHOD, ) -> DataElement: """Create the element (0012,0063) - DeIdentificationMethod A string description of the deidentification method used Parameters ---------- method: str, optional String representing the deidentification method used. Defaults to 'idiscore <version>' """ return DataElement(tag=Tag("DeidentificationMethod"), VR=VRs.LongString.short_name, value=method)
def subfix_UpdateOrInsertCodeAttribute(seqelem: DataElement, index: int, kw: str, value: str) -> str: text_fun = lambda ds, att: '{}: {}\t'.format(att, ds[att]) out_msg = '' if kw in seqelem.value[index]: out_msg = " {} modified <{}> -> <{}>".format( kw, seqelem.value[index][kw].value, value) seqelem.value[index][kw].value = value else: out_msg = "{} = <{}> was added".format(kw, value) newtag = Dictionary.tag_for_keyword(kw) newvr = Dictionary.dictionary_VR(newtag) elem = DataElement(newtag, newvr, value) seqelem.value[index].add(elem) return out_msg
def test_data_element_factory_init(): """Check different init methods """ # factory casts tag init argument to Tag(). Verify that this works. assert type(DataElementFactory().tag) == BaseTag assert type(DataElement(tag="Modality", VR="SH", value="kees").tag) == BaseTag assert type(DataElementFactory(tag="Modality").tag) == BaseTag assert type(DataElementFactory(tag=(0x0010, 0x0020)).tag) == BaseTag # For unknown tags, just give a default VR. Assuming the default position # for users of DataElementFactory will be 'Don't care, just give me the Element' assert DataElementFactory(tag=(0xee30, 0xf120)).VR == VRs.LongString.short_name assert DataElementFactory.create(tag=(0xee30, 0xf120), value=100).VR == VRs.LongString.short_name
def test_write_invalid_length_non_ascii_text( self, enforce_writing_invalid_values): fp = DicomBytesIO() ds = Dataset() ds.is_implicit_VR = True ds.is_little_endian = True ds.SpecificCharacterSet = "ISO_IR 192" # UTF-8 # the string length is 9, so constructing the data element # is possible ds.add(DataElement(0x00080050, "SH", "洪^吉洞=홍^길동")) # encoding the element during writing shall fail, # as the encoded length is 21, while only 16 bytes are allowed for SH msg = r"The value length \(21\) exceeds the maximum length of 16 *" with pytest.raises(ValueError, match=msg): ds.save_as(fp)
def test_code_sequence(self): """Test utils.codify.code_dataelem""" # ControlPointSequence elem = DataElement(0x300A0111, 'SQ', []) elem.value.append(Dataset()) elem[0].PatientID = '1234' out = "\n" out += "# Control Point Sequence\n" out += "cp_sequence = Sequence()\n" out += "ds.ControlPointSequence = cp_sequence\n" out += "\n" out += "# Control Point Sequence: Control Point 1\n" out += "cp1 = Dataset()\n" out += "cp1.PatientID = '1234'\n" out += "cp_sequence.append(cp1)" assert code_dataelem(elem) == out
def test_write_OL_implicit_little(self): """Test writing elements with VR of OL works correctly.""" # TrackPointIndexList bytestring = b'\x00\x01\x02\x03\x04\x05\x06\x07' \ b'\x01\x01\x02\x03' elem = DataElement(0x00660129, 'OL', bytestring) encoded_elem = self.encode_element(elem) # Tag pair (0066, 0129): 66 00 29 01 # Length (12): 0c 00 00 00 # | Tag | Length | Value -> ref_bytes = b'\x66\x00\x29\x01\x0c\x00\x00\x00' + bytestring self.assertEqual(encoded_elem, ref_bytes) # Empty data elem.value = b'' encoded_elem = self.encode_element(elem) ref_bytes = b'\x66\x00\x29\x01\x00\x00\x00\x00' self.assertEqual(encoded_elem, ref_bytes)
def test_write_OD_implicit_little(self): """Test writing elements with VR of OD works correctly.""" # VolumetricCurvePoints bytestring = b'\x00\x01\x02\x03\x04\x05\x06\x07' \ b'\x01\x01\x02\x03\x04\x05\x06\x07' elem = DataElement(0x0070150d, 'OD', bytestring) encoded_elem = self.encode_element(elem) # Tag pair (0070, 150d): 70 00 0d 15 # Length (16): 10 00 00 00 # | Tag | Length | Value -> ref_bytes = b'\x70\x00\x0d\x15\x10\x00\x00\x00' + bytestring self.assertEqual(encoded_elem, ref_bytes) # Empty data elem.value = b'' encoded_elem = self.encode_element(elem) ref_bytes = b'\x70\x00\x0d\x15\x00\x00\x00\x00' self.assertEqual(encoded_elem, ref_bytes)
def apply(self, element: DataElement, dataset: Optional[Dataset] = None) -> DataElement: vr = VRs.short_name_to_vr(element.VR) if element.tag.is_private: return self.clean_private(element, dataset) elif vr in VRs.date_like: return self.clean_date_time(element, dataset) elif vr in VRs.string_like: return DataElement(tag=element.tag, VR=element.VR, value="CLEANED") elif vr == VRs.Sequence: return copy(element) # sequence elements are processed later. pass else: # too difficult. Cannot do it raise ValueError( f"Cannot clean {element}. I don't know how to handle " f"tags of type '{vr}'")