def decode(data_element, dicom_character_set): """Apply the DICOM character encoding to the data element data_element -- DataElement instance containing a value to convert dicom_character_set -- the value of Specific Character Set (0008,0005), which may be a single value, a multiple value (code extension), or may also be '' or None. If blank or None, ISO_IR 6 is used. """ if not dicom_character_set: dicom_character_set = ['ISO_IR 6'] encodings = convert_encodings(dicom_character_set) # decode the string value to unicode # PN is special case as may have 3 components with differenct chr sets if data_element.VR == "PN": # logger.warn("%s ... type: %s" %(str(data_element), # type(data_element.VR))) if not in_py2: if data_element.VM == 1: data_element.value = data_element.value.decode(encodings) else: data_element.value = [ val.decode(encodings) for val in data_element.value ] else: if data_element.VM == 1: data_element.value = PersonNameUnicode(data_element.value, encodings) else: data_element.value = [ PersonNameUnicode(value, encodings) for value in data_element.value ] if data_element.VR in text_VRs: # Remove the first encoding if this is a multi-byte encoding if len(encodings) > 1: del encodings[0] # You can't re-decode unicode (string literals in py3) if data_element.VM == 1: if isinstance(data_element.value, compat.text_type): return data_element.value = clean_escseq( data_element.value.decode(encodings[0]), encodings) else: output = list() for value in data_element.value: if isinstance(value, compat.text_type): output.append(value) else: output.append( clean_escseq(value.decode(encodings[0]), encodings)) data_element.value = output
def decode_element(data_element, dicom_character_set): """Apply the DICOM character encoding to a data element Parameters ---------- data_element : dataelem.DataElement The :class:`DataElement<pydicom.dataelem.DataElement>` instance containing an encoded byte string value to decode. dicom_character_set : str or list of str or None The value of (0008,0005) *Specific Character Set*, which may be a single value, a multiple value (code extension), or may also be ``''`` or ``None``, in which case ``'ISO_IR 6'`` will be used. """ if data_element.is_empty: return data_element.empty_value if not dicom_character_set: dicom_character_set = ['ISO_IR 6'] encodings = convert_encodings(dicom_character_set) # decode the string value to unicode # PN is special case as may have 3 components with different chr sets if data_element.VR == "PN": if not in_py2: if data_element.VM <= 1: data_element.value = data_element.value.decode(encodings) else: data_element.value = [ val.decode(encodings) for val in data_element.value ] else: if data_element.VM <= 1: data_element.value = PersonNameUnicode(data_element.value, encodings) else: data_element.value = [ PersonNameUnicode(value, encodings) for value in data_element.value ] if data_element.VR in text_VRs: # You can't re-decode unicode (string literals in py3) if data_element.VM == 1: if isinstance(data_element.value, compat.text_type): return data_element.value = decode_string(data_element.value, encodings, TEXT_VR_DELIMS) else: output = list() for value in data_element.value: if isinstance(value, compat.text_type): output.append(value) else: output.append( decode_string(value, encodings, TEXT_VR_DELIMS)) data_element.value = output
def decode(data_element, dicom_character_set): """Apply the DICOM character encoding to the data element data_element -- DataElement instance containing a value to convert dicom_character_set -- the value of Specific Character Set (0008,0005), which may be a single value, a multiple value (code extension), or may also be '' or None. If blank or None, ISO_IR 6 is used. """ if not dicom_character_set: dicom_character_set = ['ISO_IR 6'] encodings = convert_encodings(dicom_character_set) # decode the string value to unicode # PN is special case as may have 3 components with different chr sets if data_element.VR == "PN": if not in_py2: if data_element.VM == 1: data_element.value = data_element.value.decode(encodings) else: data_element.value = [ val.decode(encodings) for val in data_element.value ] else: if data_element.VM == 1: data_element.value = PersonNameUnicode(data_element.value, encodings) else: data_element.value = [ PersonNameUnicode(value, encodings) for value in data_element.value ] if data_element.VR in text_VRs: # You can't re-decode unicode (string literals in py3) if data_element.VM == 1: if isinstance(data_element.value, compat.text_type): return data_element.value = decode_string(data_element.value, encodings, TEXT_VR_DELIMS) else: output = list() for value in data_element.value: if isinstance(value, compat.text_type): output.append(value) else: output.append( decode_string(value, encodings, TEXT_VR_DELIMS)) data_element.value = output
def test_hash(self): """Test that the same name creates the same hash.""" # Regression test for #785 in Python 3 pn1 = PersonNameUnicode("John^Doe^^Dr", encodings=default_encoding) pn2 = PersonNameUnicode("John^Doe^^Dr", encodings=default_encoding) assert hash(pn1) == hash(pn2) pn3 = PersonNameUnicode("John^Doe", encodings=default_encoding) assert hash(pn1) != hash(pn3) pn1 = PersonNameUnicode(u'Yamada^Tarou=山田^太郎=やまだ^たろう', [default_encoding, 'iso2022_jp']) pn2 = PersonNameUnicode(u'Yamada^Tarou=山田^太郎=やまだ^たろう', [default_encoding, 'iso2022_jp']) assert hash(pn1) == hash(pn2)
def write_PN(fp, data_element, encodings=None): if not encodings: encodings = [default_encoding] if data_element.VM == 1: val = [ data_element.value, ] else: val = data_element.value if val and isinstance(val[0], compat.text_type) or not in_py2: try: val = [elem.encode(encodings) for elem in val] except TypeError: # we get here in Python 2 if val is a unicode string val = [PersonNameUnicode(elem, encodings) for elem in val] val = [elem.encode(encodings) for elem in val] val = b'\\'.join(val) if len(val) % 2 != 0: val = val + b' ' fp.write(val)
def test_unicode_jp_from_unicode(self): """A person name initialized from unicode is already decoded""" pn = PersonNameUnicode(u'Yamada^Tarou=山田^太郎=やまだ^たろう', [default_encoding, 'iso2022_jp']) assert (u'Yamada', u'Tarou') == (pn.family_name, pn.given_name) assert u'山田^太郎' == pn.ideographic assert u'やまだ^たろう' == pn.phonetic
def testUnicodeKr(self): """PN: 3component in unicode works (Korean)..............................""" # Example name from PS3.5-2008 section I.2 p. 101 pn = PersonNameUnicode( """Hong^Gildong=\033$)C\373\363^\033$)C\321\316\324\327=\033$)C\310\253^\033$)C\261\346\265\277""", [default_encoding, 'euc_kr']) expected = ("Hong", "Gildong") got = (pn.family_name, pn.given_name) self.assertEqual(got, expected, "PN: Expected single_byte name '{0!s}', got '{1!s}'".format(expected, got))
def test_unicode_jp(self): """PN: 3component in unicode works (Japanese)...""" # Example name from PS3.5-2008 section H p. 98 pn = PersonNameUnicode( 'Yamada^Tarou=' '\033$B;3ED\033(B^\033$BB@O:\033(B=' '\033$B$d$^$@\033(B^\033$B$?$m$&\033(B', [default_encoding, 'iso2022_jp']) assert ("Yamada", "Tarou") == (pn.family_name, pn.given_name)
def test_unicode_kr(self): """PN: 3component in unicode works (Korean)...""" # Example name from PS3.5-2008 section I.2 p. 101 pn = PersonNameUnicode( 'Hong^Gildong=' '\033$)C\373\363^\033$)C\321\316\324\327=' '\033$)C\310\253^\033$)C\261\346\265\277', [default_encoding, 'euc_kr']) assert ("Hong", "Gildong") == (pn.family_name, pn.given_name)
def testUnicodeJp(self): """PN: 3component in unicode works (Japanese)............................""" # Example name from PS3.5-2008 section H p. 98 pn = PersonNameUnicode( """Yamada^Tarou=\033$B;3ED\033(B^\033$BB@O:\033(B=\033$B$d$^$@\033(B^\033$B$?$m$&\033(B""", [default_encoding, 'iso2022_jp']) expected = ("Yamada", "Tarou") got = (pn.family_name, pn.given_name) self.assertEqual(got, expected, "PN: Expected single_byte name '{0!s}', got '{1!s}'".format(expected, got))
def test_unicode_jp_from_bytes_comp_delimiter(self): """The example encoding without the escape sequence before '='""" pn = PersonNameUnicode(b'Yamada^Tarou=' b'\033$B;3ED\033(B^\033$BB@O:=' b'\033$B$d$^$@\033(B^\033$B$?$m$&\033(B', [default_encoding, 'iso2022_jp']) if not in_py2: pn = pn.decode() assert (u'Yamada', u'Tarou') == (pn.family_name, pn.given_name) assert u'山田^太郎' == pn.ideographic assert u'やまだ^たろう' == pn.phonetic
def test_unicode_jp_from_bytes_caret_delimiter(self): """PN: 3component in unicode works (Japanese)...""" # Example name from PS3.5-2008 section H p. 98 pn = PersonNameUnicode(b'Yamada^Tarou=' b'\033$B;3ED\033(B^\033$BB@O:\033(B=' b'\033$B$d$^$@\033(B^\033$B$?$m$&\033(B', [default_encoding, 'iso2022_jp']) if not in_py2: pn = pn.decode() assert (u'Yamada', u'Tarou') == (pn.family_name, pn.given_name) assert u'山田^太郎' == pn.ideographic assert u'やまだ^たろう' == pn.phonetic
def test_unicode_kr(self): """PN: 3component in unicode works (Korean)...""" # Example name from PS3.5-2008 section I.2 p. 101 pn = PersonNameUnicode(b'Hong^Gildong=' b'\033$)C\373\363^\033$)C\321\316\324\327=' b'\033$)C\310\253^\033$)C\261\346\265\277', [default_encoding, 'euc_kr']) # PersonNameUnicode and PersonName3 behave differently: # PersonName3 does not decode the components automatically if not in_py2: pn = pn.decode() assert (u'Hong', u'Gildong') == (pn.family_name, pn.given_name) assert u'洪^吉洞' == pn.ideographic assert u'홍^길동' == pn.phonetic
def test_copy(self): """PN: Copy and deepcopy works...""" pn = PersonNameUnicode( 'Hong^Gildong=' '\033$)C\373\363^\033$)C\321\316\324\327=' '\033$)C\310\253^\033$)C\261\346\265\277', [default_encoding, 'euc_kr']) pn_copy = copy.copy(pn) assert pn == pn_copy assert pn.components == pn_copy.components # the copied object references the original components assert pn_copy.components is pn.components assert pn.encodings == pn_copy.encodings pn_copy = copy.deepcopy(pn) assert pn == pn_copy assert pn.components == pn_copy.components # deepcopy() returns the same immutable objects (tuples) assert pn_copy.components is pn.components assert pn.encodings is pn_copy.encodings
def testCopy(self): """PN: Copy and deepcopy works...""" pn = PersonNameUnicode( 'Hong^Gildong=' '\033$)C\373\363^\033$)C\321\316\324\327=' '\033$)C\310\253^\033$)C\261\346\265\277', [default_encoding, 'euc_kr']) pn_copy = copy.copy(pn) self.assertEqual(pn_copy, pn) self.assertEqual(pn_copy.components, pn.components) # the copied object references the original components self.assertTrue(pn_copy.components is pn.components) self.assertEqual(pn_copy.encodings, pn.encodings) pn_copy = copy.deepcopy(pn) self.assertEqual(pn_copy, pn) self.assertEqual(pn_copy.components, pn.components) # deepcopy() shall have made a copy of components self.assertFalse(pn_copy.components is pn.components) self.assertEqual(pn_copy.encodings, pn.encodings)
def get_pn_element_value(self, value): """Return PersonName value from JSON value. Values with VR PN have a special JSON encoding, see the DICOM Standard, Part 18, :dcm:`Annex F.2.2<part18/sect_F.2.2.html>`. Parameters ---------- value : dict The person name components in the JSON entry. Returns ------- PersonName3 or PersonNameUnicode or str The decoded PersonName object or an empty string. """ if not isinstance(value, dict): # Some DICOMweb services get this wrong, so we # workaround the issue and warn the user # rather than raising an error. warnings.warn( 'value of data element "{}" with VR Person Name (PN) ' 'is not formatted correctly'.format(self.tag)) return value else: if 'Phonetic' in value: comps = ['', '', ''] elif 'Ideographic' in value: comps = ['', ''] else: comps = [''] if 'Alphabetic' in value: comps[0] = value['Alphabetic'] if 'Ideographic' in value: comps[1] = value['Ideographic'] if 'Phonetic' in value: comps[2] = value['Phonetic'] elem_value = '='.join(comps) if compat.in_py2: elem_value = PersonNameUnicode(elem_value, 'UTF8') return elem_value
def to_json_dict(self, bulk_data_element_handler, bulk_data_threshold): """Return a dictionary representation of the :class:`DataElement` conforming to the DICOM JSON Model as described in the DICOM Standard, Part 18, :dcm:`Annex F<part18/chaptr_F.html>`. .. versionadded:: 1.4 Parameters ---------- bulk_data_element_handler: callable or None Callable that accepts a bulk data element and returns the "BulkDataURI" for retrieving the value of the data element via DICOMweb WADO-RS bulk_data_threshold: int Size of base64 encoded data element above which a value will be provided in form of a "BulkDataURI" rather than "InlineBinary". Ignored if no bulk data handler is given. Returns ------- dict Mapping representing a JSON encoded data element """ json_element = { 'vr': self.VR, } if self.VR in jsonrep.BINARY_VR_VALUES: if not self.is_empty: binary_value = self.value encoded_value = base64.b64encode(binary_value).decode('utf-8') if (bulk_data_element_handler is not None and len(encoded_value) > bulk_data_threshold): json_element['BulkDataURI'] = bulk_data_element_handler( self) else: logger.info('encode bulk data element "{}" inline'.format( self.name)) json_element['InlineBinary'] = encoded_value elif self.VR == 'SQ': # recursive call to get sequence item JSON dicts value = [ ds.to_json(bulk_data_element_handler=bulk_data_element_handler, bulk_data_threshold=bulk_data_threshold, dump_handler=lambda d: d) for ds in self ] json_element['Value'] = value elif self.VR == 'PN': if not self.is_empty: elem_value = [] if self.VM > 1: value = self.value else: value = [self.value] for v in value: if compat.in_py2: v = PersonNameUnicode(v, 'UTF8') comps = {'Alphabetic': v.components[0]} if len(v.components) > 1: comps['Ideographic'] = v.components[1] if len(v.components) > 2: comps['Phonetic'] = v.components[2] elem_value.append(comps) json_element['Value'] = elem_value elif self.VR == 'AT': if not self.is_empty: value = self.value if self.VM == 1: value = [value] json_element['Value'] = [format(v, '08X') for v in value] else: if not self.is_empty: if self.VM > 1: value = self.value else: value = [self.value] json_element['Value'] = [v for v in value] if hasattr(json_element, 'Value'): json_element['Value'] = jsonrep.convert_to_python_number( json_element['Value'], self.VR) return json_element
def Decode2PNGB(inName): from pydicom.valuerep import PersonNameUnicode default_encoding = 'iso8859' return PersonNameUnicode(inName, [default_encoding, 'GB18030'])
from pydicom.valuerep import PersonNameUnicode if compat.in_py2: import Tkinter as tkinter else: import tkinter print(__doc__) default_encoding = 'iso8859' root = tkinter.Tk() person_names = [ PersonNameUnicode( b"Yamada^Tarou=\033$B;3ED\033(B^\033$BB@O:" b"\033(B=\033$B$d$^$@\033(B^\033$B$?$m$&\033(B", [default_encoding, 'iso2022_jp']), # DICOM standard 2008-PS3.5 H.3 p98 PersonNameUnicode( b"Wang^XiaoDong=\xcd\xf5\x5e\xd0\xa1\xb6\xab=", [default_encoding, 'GB18030']), # DICOM standard 2008-PS3.5 J.3 p 105 PersonNameUnicode( b"Wang^XiaoDong=\xe7\x8e\x8b\x5e\xe5\xb0\x8f\xe6\x9d\xb1=", [default_encoding, 'UTF-8']), # DICOM standard 2008-PS3.5 J.1 p 104 PersonNameUnicode( b"Hong^Gildong=\033$)C\373\363^\033$)C\321\316\324\327=" b"\033$)C\310\253^\033$)C\261\346\265\277", [default_encoding, 'euc_kr']), # DICOM standard 2008-PS3.5 I.2 p 101 ] for person_name in person_names: label = tkinter.Label(text=person_name)
def to_json(self, bulk_data_element_handler, bulk_data_threshold, dump_handler): """Converts a DataElement to JSON representation. Parameters ---------- bulk_data_element_handler: Union[Callable, None] callable that accepts a bulk data element and returns the "BulkDataURI" for retrieving the value of the data element via DICOMweb WADO-RS bulk_data_threshold: int size of base64 encoded data element above which a value will be provided in form of a "BulkDataURI" rather than "InlineBinary" Returns ------- dict mapping representing a JSON encoded data element Raises ------ TypeError when size of encoded data element exceeds `bulk_data_threshold` but `bulk_data_element_handler` is ``None`` and hence not callable """ # TODO: Determine whether more VRs need to be converted to strings _VRs_TO_QUOTE = [ 'AT', ] json_element = { 'vr': self.VR, } if self.VR in jsonrep.BINARY_VR_VALUES: if self.value is not None: binary_value = self.value encoded_value = base64.b64encode(binary_value).decode('utf-8') if len(encoded_value) > bulk_data_threshold: if bulk_data_element_handler is None: raise TypeError( 'No bulk data element handler provided to ' 'generate URL for value of data element "{}".'. format(self.name)) json_element['BulkDataURI'] = bulk_data_element_handler( self) else: logger.info('encode bulk data element "{}" inline'.format( self.name)) json_element['InlineBinary'] = encoded_value elif self.VR == 'SQ': # recursive call to co-routine to format sequence contents value = [ json.loads( e.to_json( bulk_data_element_handler=bulk_data_element_handler, bulk_data_threshold=bulk_data_threshold, dump_handler=dump_handler)) for e in self ] json_element['Value'] = value elif self.VR == 'PN': elem_value = self.value if elem_value is not None: if compat.in_py2: elem_value = PersonNameUnicode(elem_value, 'UTF8') if len(elem_value.components) > 2: json_element['Value'] = [ { 'Phonetic': elem_value.components[2], }, ] elif len(elem_value.components) > 1: json_element['Value'] = [ { 'Ideographic': elem_value.components[1], }, ] else: json_element['Value'] = [ { 'Alphabetic': elem_value.components[0], }, ] else: if self.value is not None: is_multivalue = isinstance(self.value, MultiValue) if self.VM > 1 or is_multivalue: value = self.value else: value = [self.value] # ensure it's a list and not another iterable # (e.g. tuple), which would not be JSON serializable if self.VR in _VRs_TO_QUOTE: json_element['Value'] = [str(v) for v in value] else: json_element['Value'] = [v for v in value] if hasattr(json_element, 'Value'): json_element['Value'] = jsonrep.convert_to_python_number( json_element['Value'], self.VR) return json_element
def from_json(cls, dataset_class, tag, vr, value, value_key, bulk_data_uri_handler=None, encodings=None): """Creates a DataElement from JSON. Parameters ---------- dataset_class: Dataset derived class class used to create sequence items tag: pydicom.tag.Tag data element tag vr: str data element value representation value: list data element value(s) value_key: Union[str, None] key of the data element that contains the value (options: ``{"Value", "InlineBinary", "BulkDataURI"}``) bulk_data_uri_handler: Union[Callable, None] callable that accepts the "BulkDataURI" of the JSON representation of a data element and returns the actual value of that data element (retrieved via DICOMweb WADO-RS) Returns ------- pydicom.dataelem.DataElement """ # TODO: test wado-rs retrieve wrapper try: vm = dictionary_VM(tag) except KeyError: # Private tag vm = str(len(value)) if value_key == 'Value': if not (isinstance(value, list)): fmt = '"{}" of data element "{}" must be a list.' raise TypeError(fmt.format(value_key, tag)) elif value_key in {'InlineBinary', 'BulkDataURI'}: if isinstance(value, list): fmt = '"{}" of data element "{}" must be a {}.' expected_type = ('string' if value_key == 'BulkDataURI' else 'bytes-like object') raise TypeError(fmt.format(value_key, tag, expected_type)) if vr == 'SQ': elem_value = [] for value_item in value: ds = dataset_class() if value_item: for key, val in value_item.items(): if 'vr' not in val: fmt = 'Data element "{}" must have key "vr".' raise KeyError(fmt.format(tag)) unique_value_keys = tuple( set(val.keys()) & set(jsonrep.JSON_VALUE_KEYS)) if len(unique_value_keys) == 0: logger.debug( 'data element has neither key "{}".'.format( '" nor "'.join(jsonrep.JSON_VALUE_KEYS))) elem = DataElement(tag=tag, value='', VR=vr) else: value_key = unique_value_keys[0] elem = cls.from_json(dataset_class, key, val['vr'], val[value_key], value_key) ds.add(elem) elem_value.append(ds) elif vr == 'PN': # Special case, see DICOM Part 18 Annex F2.2 elem_value = [] for v in value: if not isinstance(v, dict): # Some DICOMweb services get this wrong, so we # workaround the issue and warn the user # rather than raising an error. logger.error( 'value of data element "{}" with VR Person Name (PN) ' 'is not formatted correctly'.format(tag)) elem_value.append(v) else: elem_value.extend(list(v.values())) if vm == '1': try: elem_value = elem_value[0] except IndexError: elem_value = '' else: if vm == '1': if value_key == 'InlineBinary': elem_value = base64.b64decode(value) elif value_key == 'BulkDataURI': if bulk_data_uri_handler is None: logger.warning( 'no bulk data URI handler provided for retrieval ' 'of value of data element "{}"'.format(tag)) elem_value = b'' else: elem_value = bulk_data_uri_handler(value) else: if value: elem_value = value[0] else: elem_value = value else: elem_value = value if elem_value is None: logger.warning('missing value for data element "{}"'.format(tag)) elem_value = '' elem_value = jsonrep.convert_to_python_number(elem_value, vr) try: if compat.in_py2 and vr == "PN": elem_value = PersonNameUnicode(elem_value, 'UTF8') return DataElement(tag=tag, value=elem_value, VR=vr) except Exception: raise ValueError( 'Data element "{}" could not be loaded from JSON: {}'.format( tag, elem_value))