def test_get_dataset(self):
        """Test the different functions to get lists of data files."""

        # Test base locations
        charbase = os.path.join(DATA_ROOT, 'charset_files')
        self.assertTrue(os.path.exists(charbase))

        testbase = os.path.join(DATA_ROOT, 'test_files')
        self.assertTrue(os.path.exists(testbase))

        # Test file get
        chardata = get_charset_files()
        self.assertTrue(len(chardata) > 15)

        # Test that top level file is included
        bases = [basename(x) for x in chardata]

        # Test that subdirectory files included
        testdata = get_testdata_files()
        bases = [basename(x) for x in testdata]
        self.assertTrue('2693' in bases)
        self.assertTrue(len(testdata) > 70)

        # The files should be from their respective bases
        [self.assertTrue(testbase in x) for x in testdata]
        [self.assertTrue(charbase in x) for x in chardata]
    def test_get_dataset(self):
        """Test the different functions to get lists of data files."""
        # Test base locations
        charbase = os.path.join(DATA_ROOT, 'charset_files')
        assert os.path.exists(charbase)

        testbase = os.path.join(DATA_ROOT, 'test_files')
        assert os.path.exists(testbase)

        # Test file get
        chardata = get_charset_files()
        assert 15 < len(chardata)

        # Test that top level file is included
        bases = [basename(x) for x in chardata]

        # Test that subdirectory files included
        testdata = get_testdata_files()
        bases = [basename(x) for x in testdata]
        assert '2693' in bases
        assert 70 < len(testdata)

        # The files should be from their respective bases
        for x in testdata:
            assert testbase in x
        for x in chardata:
            assert charbase in x
Exemple #3
0
    def test_japanese_multi_byte_personname(self):
        """Test japanese person name which has multi byte strings are
        correctly encoded."""
        file_path = get_charset_files('chrH32.dcm')[0]
        ds = dcmread(file_path)
        ds.decode()

        if hasattr(ds.PatientName, 'original_string'):
            original_string = ds.PatientName.original_string
            ds.PatientName.original_string = None
            fp = DicomBytesIO()
            fp.is_implicit_VR = False
            fp.is_little_endian = True
            ds.save_as(fp, write_like_original=False)
            fp.seek(0)
            ds_out = dcmread(fp)
            assert original_string == ds_out.PatientName.original_string

        japanese_pn = PersonName(u"Mori^Ogai=森^鷗外=もり^おうがい")
        pyencs = pydicom.charset.convert_encodings(
            ["ISO 2022 IR 6", "ISO 2022 IR 87", "ISO 2022 IR 159"])
        actual_encoded = bytes(japanese_pn.encode(pyencs))
        expect_encoded = (
            b"\x4d\x6f\x72\x69\x5e\x4f\x67\x61\x69\x3d\x1b\x24\x42\x3f"
            b"\x39\x1b\x28\x42\x5e\x1b\x24\x28\x44\x6c\x3f\x1b\x24\x42"
            b"\x33\x30\x1b\x28\x42\x3d\x1b\x24\x42\x24\x62\x24\x6a\x1b"
            b"\x28\x42\x5e\x1b\x24\x42\x24\x2a\x24\x26\x24\x2c\x24\x24"
            b"\x1b\x28\x42")
        assert expect_encoded == actual_encoded
    def test_get_dataset(self):
        """Test the different functions to get lists of data files."""

        # Test base locations
        charbase = os.path.join(DATA_ROOT, 'charset_files')
        self.assertTrue(os.path.exists(charbase))

        testbase = os.path.join(DATA_ROOT, 'test_files')
        self.assertTrue(os.path.exists(testbase))

        # Test file get
        chardata = get_charset_files()
        self.assertTrue(len(chardata) > 15)

        # Test that top level file is included
        bases = [basename(x) for x in chardata]

        # Test that subdirectory files included
        testdata = get_testdata_files()
        bases = [basename(x) for x in testdata]
        self.assertTrue('2693' in bases)
        self.assertTrue(len(testdata) > 70)

        # The files should be from their respective bases
        [self.assertTrue(testbase in x) for x in testdata]
        [self.assertTrue(charbase in x) for x in chardata]
Exemple #5
0
    def test_charset_patient_names(self, filename, patient_name):
        """Test patient names are correctly decoded and encoded."""
        # check that patient names are correctly read
        file_path = get_charset_files(filename + '.dcm')[0]
        ds = dcmread(file_path)
        ds.decode()
        assert patient_name == ds.PatientName

        # check that patient names are correctly written back
        fp = DicomBytesIO()
        fp.is_implicit_VR = False
        fp.is_little_endian = True
        ds.save_as(fp, write_like_original=False)
        fp.seek(0)
        ds = dcmread(fp)
        assert patient_name == ds.PatientName

        # check that patient names are correctly written back
        # without original byte string (PersonName3 only)
        if hasattr(ds.PatientName, 'original_string'):
            ds.PatientName.original_string = None
            fp = DicomBytesIO()
            fp.is_implicit_VR = False
            fp.is_little_endian = True
            ds.save_as(fp, write_like_original=False)
            fp.seek(0)
            ds = dcmread(fp)
            assert patient_name == ds.PatientName
Exemple #6
0
    def test_charset_patient_names(self, filename, patient_name):
        """Test patient names are correctly decoded and encoded."""
        # check that patient names are correctly read
        file_path = get_charset_files(filename + '.dcm')[0]
        ds = dcmread(file_path)
        ds.decode()
        assert patient_name == ds.PatientName

        # check that patient names are correctly written back
        fp = DicomBytesIO()
        fp.is_implicit_VR = False
        fp.is_little_endian = True
        ds.save_as(fp, write_like_original=False)
        fp.seek(0)
        ds = dcmread(fp)
        assert patient_name == ds.PatientName

        # check that patient names are correctly written back
        # without original byte string (PersonName3 only)
        if hasattr(ds.PatientName, 'original_string'):
            ds.PatientName.original_string = None
            fp = DicomBytesIO()
            fp.is_implicit_VR = False
            fp.is_little_endian = True
            ds.save_as(fp, write_like_original=False)
            fp.seek(0)
            ds = dcmread(fp)
            assert patient_name == ds.PatientName
Exemple #7
0
 def test_decoding_with_specific_tags(self):
     """Decoding is correctly applied even if  Specific Character Set
     is not in specific tags..."""
     rus_file = get_charset_files("chrRuss.dcm")[0]
     ds = dcmread(rus_file, specific_tags=['PatientName'])
     ds.decode()
     assert 2 == len(ds)  # specific character set is always decoded
     assert u'Люкceмбypг' == ds.PatientName
Exemple #8
0
 def test_decoding_with_specific_tags(self):
     """Decoding is correctly applied even if  Specific Character Set
     is not in specific tags..."""
     rus_file = get_charset_files("chrRuss.dcm")[0]
     ds = dcmread(rus_file, specific_tags=['PatientName'])
     ds.decode()
     assert 2 == len(ds)  # specific character set is always decoded
     assert u'Люкceмбypг' == ds.PatientName
    def test_get_dataset_pattern(self):
        """Test that pattern is working properly."""
        pattern = 'CT_small*'
        filename = get_testdata_files(pattern)
        assert filename[0].endswith('CT_small.dcm')

        pattern = 'chrX1*'
        filename = get_charset_files(pattern)
        assert filename[0].endswith('chrX1.dcm')
    def test_get_dataset_pattern(self):
        """Test that pattern is working properly."""

        pattern = 'CT_small'
        filename = get_testdata_files(pattern)
        self.assertTrue(filename[0].endswith('CT_small.dcm'))

        pattern = 'chrX1'
        filename = get_charset_files(pattern)
        self.assertTrue(filename[0].endswith('chrX1.dcm'))
Exemple #11
0
    def test_inherited_character_set_in_sequence(self):
        """charset: can read and decode SQ with parent encoding............."""
        ds = dcmread(get_charset_files('chrSQEncoding1.dcm')[0])
        ds.decode()

        # These datasets inside of the SQ shall be decoded with the parent
        # dataset's encoding
        sequence = ds[0x32, 0x1064][0]
        assert ['shift_jis', 'iso2022_jp'] == sequence._character_set
        assert u'ヤマダ^タロウ=山田^太郎=やまだ^たろう' == sequence.PatientName
Exemple #12
0
    def test_inherited_character_set_in_sequence(self):
        """charset: can read and decode SQ with parent encoding............."""
        ds = dcmread(get_charset_files('chrSQEncoding1.dcm')[0])
        ds.decode()

        # These datasets inside of the SQ shall be decoded with the parent
        # dataset's encoding
        sequence = ds[0x32, 0x1064][0]
        assert ['shift_jis', 'iso2022_jp'] == sequence._character_set
        assert u'ヤマダ^タロウ=山田^太郎=やまだ^たろう' == sequence.PatientName
Exemple #13
0
 def test_changed_character_set(self):
     # Regression test for #629
     multiPN_name = get_charset_files("chrFrenMulti.dcm")[0]
     ds = dcmread(multiPN_name)  # is Latin-1
     ds.SpecificCharacterSet = 'ISO_IR 192'
     from pydicom.filebase import DicomBytesIO
     fp = DicomBytesIO()
     ds.save_as(fp, write_like_original=False)
     fp.seek(0)
     ds_out = dcmread(fp)
     # we expect UTF-8 encoding here
     assert b'Buc^J\xc3\xa9r\xc3\xb4me' == ds_out.get_item(0x00100010).value
Exemple #14
0
 def test_changed_character_set(self):
     # Regression test for #629
     multiPN_name = get_charset_files("chrFrenMulti.dcm")[0]
     ds = dcmread(multiPN_name)  # is Latin-1
     ds.SpecificCharacterSet = 'ISO_IR 192'
     from pydicom.filebase import DicomBytesIO
     fp = DicomBytesIO()
     ds.save_as(fp, write_like_original=False)
     fp.seek(0)
     ds_out = dcmread(fp)
     # we expect UTF-8 encoding here
     assert b'Buc^J\xc3\xa9r\xc3\xb4me' == ds_out.get_item(0x00100010).value
Exemple #15
0
    def test_nested_character_sets(self):
        """charset: can read and decode SQ with different encodings........."""
        ds = dcmread(get_charset_files("chrSQEncoding.dcm")[0])
        ds.decode()

        # These datasets inside of the SQ cannot be decoded with
        # default_encoding OR UTF-8 (the parent dataset's encoding).
        # Instead, we make sure that it is decoded using the
        # (0008,0005) tag of the dataset

        sequence = ds[0x32, 0x1064][0]
        assert ['shift_jis', 'iso2022_jp'] == sequence._character_set
        assert u'ヤマダ^タロウ=山田^太郎=やまだ^たろう' == sequence.PatientName
Exemple #16
0
    def test_nested_character_sets(self):
        """charset: can read and decode SQ with different encodings........."""
        ds = dcmread(get_charset_files("chrSQEncoding.dcm")[0])
        ds.decode()

        # These datasets inside of the SQ cannot be decoded with
        # default_encoding OR UTF-8 (the parent dataset's encoding).
        # Instead, we make sure that it is decoded using the
        # (0008,0005) tag of the dataset

        sequence = ds[0x32, 0x1064][0]
        assert ['shift_jis', 'iso2022_jp'] == sequence._character_set
        assert u'ヤマダ^タロウ=山田^太郎=やまだ^たろう' == sequence.PatientName
Exemple #17
0
    def test_japanese_multi_byte_personname(self):
        """Test japanese person name which has multi byte strings are
        correctly encoded."""
        file_path = get_charset_files('chrH32.dcm')[0]
        ds = dcmread(file_path)
        ds.decode()

        if hasattr(ds.PatientName, 'original_string'):
            original_string = ds.PatientName.original_string
            ds.PatientName.original_string = None
            fp = DicomBytesIO()
            fp.is_implicit_VR = False
            fp.is_little_endian = True
            ds.save_as(fp, write_like_original=False)
            fp.seek(0)
            ds_out = dcmread(fp)
            assert original_string == ds_out.PatientName.original_string
    def test_get_dataset(self):
        """Test the different functions to get lists of data files."""
        # The cached files downloaded from the pydicom-data repo
        cached_data_test_files = str(get_data_dir())

        # If pydicom-data is available locally
        ext_path = None
        if 'pydicom-data' in external_data_sources():
            ext_path = os.fspath(
                external_data_sources()['pydicom-data'].data_path)

        # Test base locations
        charbase = os.path.join(DATA_ROOT, 'charset_files')
        assert os.path.exists(charbase)

        testbase = os.path.join(DATA_ROOT, 'test_files')
        assert os.path.exists(testbase)

        # Test file get
        chardata = get_charset_files()
        assert 15 < len(chardata)

        # Test that top level file is included
        bases = [basename(x) for x in chardata]

        # Test that subdirectory files included
        testdata = get_testdata_files()
        bases = [basename(x) for x in testdata]
        assert '2693' in bases
        assert 70 < len(testdata)

        # The files should be from their respective bases
        for x in testdata:
            # Don't check files from external sources other than pydicom-data
            if (testbase not in x and cached_data_test_files not in x
                    and (ext_path not in x if ext_path else True)):
                continue

            assert (testbase in x or cached_data_test_files in x
                    or (ext_path in x if ext_path else False))

        for x in chardata:
            assert charbase in x
Exemple #19
0
 def test_charset_patient_names(self, filename, patient_name):
     """Test pixel_array for big endian matches little."""
     file_path = get_charset_files(filename + '.dcm')[0]
     ds = dcmread(file_path)
     ds.decode()
     assert patient_name == ds.PatientName
Exemple #20
0
# Copyright 2008-2018 pydicom authors. See LICENSE file for details.
"""unittest cases for pydicom.charset module"""

import unittest

from pydicom.data import get_charset_files
from pydicom.data import get_testdata_files
import pydicom.charset
from pydicom.dataelem import DataElement
from pydicom import dcmread


latin1_file = get_charset_files("chrFren.dcm")[0]
jp_file = get_charset_files("chrH31.dcm")[0]
multiPN_file = get_charset_files("chrFrenMulti.dcm")[0]
sq_encoding_file = get_charset_files("chrSQEncoding.dcm")[0]
sq_encoding1_file = get_charset_files("chrSQEncoding1.dcm")[0]
explicit_ir6_file = get_charset_files("chrJapMultiExplicitIR6.dcm")[0]
normal_file = get_testdata_files("CT_small.dcm")[0]


class CharsetTests(unittest.TestCase):
    def test_latin1(self):
        """charset: can read and decode latin_1 file........................"""
        ds = dcmread(latin1_file)
        ds.decode()
        # Make sure don't get unicode encode error on converting to string
        expected = u'Buc^J\xe9r\xf4me'
        got = ds.PatientName
        self.assertEqual(expected, got,
                         "Expected %r, got %r" % (expected, got))
Exemple #21
0
"""List summary info for the test files in the charset directory"""

from glob import glob
import logging
import os

import pydicom
from pydicom.data import get_charset_files


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO, format='%(message)s')
    logger = logging.getLogger('charlist')

    # Get list of all DICOM files
    names = get_charset_files("*.dcm")

    # Collect summary information from the files
    files_info = []
    for name in names:
        ds = pydicom.dcmread(name)
        ds.decode()
        fname = os.path.basename(name)
        try:
            files_info.append((fname, ds.SpecificCharacterSet, ds.PatientName))
        except Exception:
            try:
                requested_seq = ds.RequestedProcedureCodeSequence[0]
                spec_charset = requested_seq.SpecificCharacterSet
                patient_name = requested_seq.PatientName
                files_info.append((fname,
Exemple #22
0
# -*- coding: utf-8 -*-
# Copyright 2008-2018 pydicom authors. See LICENSE file for details.
"""unittest cases for pydicom.charset module"""

import unittest

import pytest

from pydicom.data import get_charset_files
from pydicom.data import get_testdata_files
import pydicom.charset
from pydicom.dataelem import DataElement, RawDataElement, DataElement_from_raw
from pydicom import dcmread, Dataset

latin1_file = get_charset_files("chrFren.dcm")[0]
jp_file = get_charset_files("chrH31.dcm")[0]
multiPN_file = get_charset_files("chrFrenMulti.dcm")[0]
sq_encoding_file = get_charset_files("chrSQEncoding.dcm")[0]
sq_encoding1_file = get_charset_files("chrSQEncoding1.dcm")[0]
explicit_ir6_file = get_charset_files("chrJapMultiExplicitIR6.dcm")[0]
normal_file = get_testdata_files("CT_small.dcm")[0]


class CharsetTests(unittest.TestCase):
    def test_latin1(self):
        """charset: can read and decode latin_1 file........................"""
        ds = dcmread(latin1_file)
        ds.decode()
        # Make sure don't get unicode encode error on converting to string
        expected = u'Buc^Jérôme'
        got = ds.PatientName