Example #1
0
def anonymize_dicomdir(inputdir, outdir, write_logs=True):
    """ Anonymize all DICOM files of the input directory.

    Parameters
    ----------
    inputdir: str (mandatory)
        A folder that contains only DICOM files to be anonymized.
    outdir: str (mandatory)
        The anonimized DICOM files folder.
    write_logs: bool (optional, default True)
        If True write the anonimization logs.

    Returns
    -------
    dcmfiles: str
        The anonimized DICOM files.
    logfiles: list
        The anonimization log files.

    """
    # Load the first dataset
    input_dicoms = [os.path.join(inputdir, fname)
                    for fname in os.listdir(inputdir)]
    dataset = dicom.read_file(input_dicoms[0], force=True)

    # Load the tags to anonymize
    filedir = os.path.dirname(os.path.realpath(__file__))
    with open(os.path.join(filedir, "deidentify.json"), "r") as open_file:
        anon_tags = json.load(open_file)[1:]

    # Set up the desired callbacks and tags to be anonymized
    # Iterate over all the tag to anonymize according to PS 3.15-2008 and
    # supplement 142
    for tag_item in anon_tags:
        tag_repr = tag_item["Tag"][1:-1]
        action = tag_item["Basic Profile"]
        group, element = tag_repr.split(",", 1)

        # Deal with special tags
        if "xx" in group or "xx" in element:
            pattern = re.compile(tag_repr.replace("x", "[0-9A-Fa-f]"))
            CALLBACKS[tag_repr] = [pattern, callback_xxxx]

        # Deal with private tags
        elif "gggg" in group:
            if (0x0008, 0x0070) in dataset:
                MANUFACTURER.append(dataset[0x0008, 0x0070].value)
            if len(MANUFACTURER) > 0:
                CALLBACKS[tag_repr] = [None, callback_private]
            else:
                raise Exception(
                    "The '(0008,0070)' manufacturer tag is not specified and "
                    "is required to anonymize private tags.")

        # Deal with standard tags
        else:
            TAGS[tag_repr] = (int(group, 16), int(element, 16)), action

    # Now compile the diffusion private tags patterns
    filedir = os.path.dirname(os.path.realpath(__file__))
    with open(os.path.join(filedir, "private_deidentify.json"),
              "r") as open_file:
        private_anons = json.load(open_file)
    for key, values in private_anons.items():
        for value in values:
            pattern = re.compile(value["Tag"].replace("x", "[0-9A-Fa-f]"))
            PRIVATE_DEIDENTIFY.setdefault(key, []).append(pattern)

    # Process all DICOM files
    progress_indicator = DownloadProgressBar(max=len(input_dicoms))
    dcmfiles = []
    logfiles = []
    for cnt, input_dicom in enumerate(input_dicoms):
        statinfo = os.stat(input_dicom)
        DownloadProgressBar.suffix = "{0:.3f}MB".format(
            statinfo.st_size / 10e5)
        progress_indicator.next(1)
        output_dicom, output_log = anonymize_dicomfile(
            input_dicom, outdir, outname=str(cnt), write_log=write_logs)
        dcmfiles.append(output_dicom)
        logfiles.append(output_log)
    progress_indicator.finish()

    return dcmfiles, logfiles
Example #2
0
def split_series(dicom_dir, outdir):
    """ Split all the folder Dicom files by series in different folders.

    Dicom files are searched recursively in the input folder and all files
    are expected to be Dicom files.

    Expect to split files from a single session.

    Parameters
    ----------
    dicom_dir: str (mandatory)
        a folder containing Dicom files to organize by series.
    outdir: str (mandatory)
        the destination folder.
    """
    # Read the incoming directory:
    # process each file in this directory and its sub-directories
    # expect each file to be a DICOM file
    to_treat_dicom = []
    for root, dirs, files in os.walk(dicom_dir):
        to_treat_dicom.extend(
            [os.path.join(root, basename) for basename in files])

    # Go through each file: expected to be in Dicom format
    progress_indicator = DownloadProgressBar(max=len(to_treat_dicom))
    acquisition_datetime = None
    for dicom_file in to_treat_dicom:

        # Update progress bar
        statinfo = os.stat(dicom_file)
        DownloadProgressBar.suffix = "{0:.3f}MB".format(statinfo.st_size /
                                                        10e5)
        progress_indicator.next(1)

        # Get the time of last modification
        mtime = os.path.getmtime(dicom_file)

        # Read DICOM dataset
        dataset = dicom.read_file(dicom_file)

        # Find character encoding of DICOM attributes:
        # we currently expect encoding to be ISO_IR 100
        if (0x0008, 0x0005) in dataset:
            SpecificCharacterSet = dataset[0x0008, 0x0005].value
            if SpecificCharacterSet != "ISO_IR 100":
                print("'{0}' file encoding is not ISO_IR 100 as "
                      "expected.".format(dicom_file))
                continue
        else:
            print("Can't check encoding of '{0}', missing (0x0008, 0x0005) "
                  "tag.".format(dicom_file))

        # Process other DICOM attributes:
        # decode strings assuming 'ISO_IR 100'
        SeriesDescription = None
        SOPInstanceUID = dataset[0x0008, 0x0018].value
        if (0x0008, 0x103e) in dataset:
            SeriesDescription = cleanup(decode(dataset[0x0008, 0x103e].value))
        SeriesNumber = dataset[0x0020, 0x0011].value
        EchoTime = dataset[0x0018, 0x0081].value

        # Check the session time
        current_acquisition_datetime = (dataset[0x0008, 0x0020].value +
                                        dataset[0x0008, 0x0030].value)
        if acquisition_datetime is None:
            acquisition_datetime = current_acquisition_datetime
        elif acquisition_datetime != current_acquisition_datetime:
            raise ValueError(
                "Two sessions detected in the input folder '{0}': {1} - "
                "{2}.".format(dicom_dir, acquisition_datetime,
                              current_acquisition_datetime))

        # Build the full path to the outgoing directory:
        # we assume that there is only one session
        if SeriesDescription:
            serie_name = (SeriesDescription + "_" + str(EchoTime) + "_" +
                          str(SeriesNumber).rjust(6, "0"))
        else:
            serie_name = str(EchoTime) + "_" + str(SeriesNumber).rjust(6, "0")
        output_dicom_dir = os.path.join(outdir, serie_name)

        # Check that the destination folder exists
        if not os.path.isdir(output_dicom_dir):
            os.mkdir(output_dicom_dir)

        # Build a new name for the DICOM file
        output_dicom_file = os.path.join(output_dicom_dir,
                                         SOPInstanceUID + '.dcm')

        # Copy DICOM file:
        # handle case where outgoing file already exists
        if os.path.exists(output_dicom_file):

            # Compare modification time and keep the most recent file
            if os.path.getmtime(output_dicom_file) < mtime:
                shutil.copy2(dicom_file, output_dicom_file)

        # file does not exists and can be copied
        else:
            shutil.copy2(dicom_file, output_dicom_file)
Example #3
0
def split_series(dicom_dir, outdir):
    """ Split all the folder Dicom files by series in different folders.

    Dicom files are searched recursively in the input folder and all files
    are expected to be Dicom files.

    Expect to split files from a single session.

    Parameters
    ----------
    dicom_dir: str (mandatory)
        a folder containing Dicom files to organize by series.
    outdir: str (mandatory)
        the destination folder.
    """
    # Read the incoming directory:
    # process each file in this directory and its sub-directories
    # expect each file to be a DICOM file
    to_treat_dicom = []
    for root, dirs, files in os.walk(dicom_dir):
        to_treat_dicom.extend([
            os.path.join(root, basename) for basename in files])

    # Go through each file: expected to be in Dicom format
    progress_indicator = DownloadProgressBar(max=len(to_treat_dicom))
    acquisition_datetime = None
    for dicom_file in to_treat_dicom:

        # Update progress bar
        statinfo = os.stat(dicom_file)
        DownloadProgressBar.suffix = "{0:.3f}MB".format(
            statinfo.st_size / 10e5)
        progress_indicator.next(1)

        # Get the time of last modification
        mtime = os.path.getmtime(dicom_file)

        # Read DICOM dataset
        dataset = dicom.read_file(dicom_file)

        # Find character encoding of DICOM attributes:
        # we currently expect encoding to be ISO_IR 100
        if (0x0008, 0x0005) in dataset:
            SpecificCharacterSet = dataset[0x0008, 0x0005].value
            if SpecificCharacterSet != "ISO_IR 100":
                print("'{0}' file encoding is not ISO_IR 100 as "
                      "expected.".format(dicom_file))
                continue
        else:
            print("Can't check encoding of '{0}', missing (0x0008, 0x0005) "
                  "tag.".format(dicom_file))

        # Process other DICOM attributes:
        # decode strings assuming 'ISO_IR 100'
        SeriesDescription = None
        SOPInstanceUID = dataset[0x0008, 0x0018].value
        if (0x0008, 0x103e) in dataset:
            SeriesDescription = cleanup(decode(dataset[0x0008, 0x103e].value))
        SeriesNumber = dataset[0x0020, 0x0011].value
        EchoTime = dataset[0x0018, 0x0081].value

        # Check the session time
        current_acquisition_datetime = (dataset[0x0008, 0x0020].value +
                                        dataset[0x0008, 0x0030].value)
        if acquisition_datetime is None:
            acquisition_datetime = current_acquisition_datetime
        elif acquisition_datetime != current_acquisition_datetime:
            raise ValueError(
                "Two sessions detected in the input folder '{0}': {1} - "
                "{2}.".format(dicom_dir, acquisition_datetime,
                              current_acquisition_datetime))

        # Build the full path to the outgoing directory:
        # we assume that there is only one session
        if SeriesDescription:
            serie_name = (SeriesDescription + "_" + str(EchoTime) + "_" +
                          str(SeriesNumber).rjust(6, "0"))
        else:
            serie_name = str(EchoTime) + "_" + str(SeriesNumber).rjust(6, "0")
        output_dicom_dir = os.path.join(outdir, serie_name)

        # Check that the destination folder exists
        if not os.path.isdir(output_dicom_dir):
            os.mkdir(output_dicom_dir)

        # Build a new name for the DICOM file
        output_dicom_file = os.path.join(output_dicom_dir,
                                         SOPInstanceUID + '.dcm')

        # Copy DICOM file:
        # handle case where outgoing file already exists
        if os.path.exists(output_dicom_file):

            # Compare modification time and keep the most recent file
            if os.path.getmtime(output_dicom_file) < mtime:
                shutil.copy2(dicom_file, output_dicom_file)

        # file does not exists and can be copied
        else:
            shutil.copy2(dicom_file, output_dicom_file)
Example #4
0
def anonymize_dicomdir(inputdir, outdir, write_logs=True):
    """ Anonymize all DICOM files of the input directory.

    Parameters
    ----------
    inputdir: str (mandatory)
        A folder that contains only DICOM files to be anonymized.
    outdir: str (mandatory)
        The anonimized DICOM files folder.
    write_logs: bool (optional, default True)
        If True write the anonimization logs.

    Returns
    -------
    dcmfiles: str
        The anonimized DICOM files.
    logfiles: list
        The anonimization log files.

    """
    # Load the first dataset
    input_dicoms = [
        os.path.join(inputdir, fname) for fname in os.listdir(inputdir)
    ]
    dataset = dicom.read_file(input_dicoms[0], force=True)

    # Load the tags to anonymize
    filedir = os.path.dirname(os.path.realpath(__file__))
    with open(os.path.join(filedir, "deidentify.json"), "r") as open_file:
        anon_tags = json.load(open_file)[1:]

    # Set up the desired callbacks and tags to be anonymized
    # Iterate over all the tag to anonymize according to PS 3.15-2008 and
    # supplement 142
    for tag_item in anon_tags:
        tag_repr = tag_item["Tag"][1:-1]
        action = tag_item["Basic Profile"]
        group, element = tag_repr.split(",", 1)

        # Deal with special tags
        if "xx" in group or "xx" in element:
            pattern = re.compile(tag_repr.replace("x", "[0-9A-Fa-f]"))
            CALLBACKS[tag_repr] = [pattern, callback_xxxx]

        # Deal with private tags
        elif "gggg" in group:
            if (0x0008, 0x0070) in dataset:
                MANUFACTURER.append(dataset[0x0008, 0x0070].value)
            if len(MANUFACTURER) > 0:
                CALLBACKS[tag_repr] = [None, callback_private]
            else:
                raise Exception(
                    "The '(0008,0070)' manufacturer tag is not specified and "
                    "is required to anonymize private tags.")

        # Deal with standard tags
        else:
            TAGS[tag_repr] = (int(group, 16), int(element, 16)), action

    # Now compile the diffusion private tags patterns
    filedir = os.path.dirname(os.path.realpath(__file__))
    with open(os.path.join(filedir, "private_deidentify.json"),
              "r") as open_file:
        private_anons = json.load(open_file)
    for key, values in private_anons.items():
        for value in values:
            pattern = re.compile(value["Tag"].replace("x", "[0-9A-Fa-f]"))
            PRIVATE_DEIDENTIFY.setdefault(key, []).append(pattern)

    # Process all DICOM files
    progress_indicator = DownloadProgressBar(max=len(input_dicoms))
    dcmfiles = []
    logfiles = []
    for cnt, input_dicom in enumerate(input_dicoms):
        statinfo = os.stat(input_dicom)
        DownloadProgressBar.suffix = "{0:.3f}MB".format(statinfo.st_size /
                                                        10e5)
        progress_indicator.next(1)
        output_dicom, output_log = anonymize_dicomfile(input_dicom,
                                                       outdir,
                                                       outname=str(cnt),
                                                       write_log=write_logs)
        dcmfiles.append(output_dicom)
        logfiles.append(output_log)
    progress_indicator.finish()

    return dcmfiles, logfiles