def anonymize_dicomdir(inputdir, outdir, write_logs=True): """ Anonymize all DICOM files of the input directory. Parameters ---------- inputdir: str (mandatory) A folder that contains only DICOM files to be anonymized. outdir: str (mandatory) The anonimized DICOM files folder. write_logs: bool (optional, default True) If True write the anonimization logs. Returns ------- dcmfiles: str The anonimized DICOM files. logfiles: list The anonimization log files. """ # Load the first dataset input_dicoms = [os.path.join(inputdir, fname) for fname in os.listdir(inputdir)] dataset = dicom.read_file(input_dicoms[0], force=True) # Load the tags to anonymize filedir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(filedir, "deidentify.json"), "r") as open_file: anon_tags = json.load(open_file)[1:] # Set up the desired callbacks and tags to be anonymized # Iterate over all the tag to anonymize according to PS 3.15-2008 and # supplement 142 for tag_item in anon_tags: tag_repr = tag_item["Tag"][1:-1] action = tag_item["Basic Profile"] group, element = tag_repr.split(",", 1) # Deal with special tags if "xx" in group or "xx" in element: pattern = re.compile(tag_repr.replace("x", "[0-9A-Fa-f]")) CALLBACKS[tag_repr] = [pattern, callback_xxxx] # Deal with private tags elif "gggg" in group: if (0x0008, 0x0070) in dataset: MANUFACTURER.append(dataset[0x0008, 0x0070].value) if len(MANUFACTURER) > 0: CALLBACKS[tag_repr] = [None, callback_private] else: raise Exception( "The '(0008,0070)' manufacturer tag is not specified and " "is required to anonymize private tags.") # Deal with standard tags else: TAGS[tag_repr] = (int(group, 16), int(element, 16)), action # Now compile the diffusion private tags patterns filedir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(filedir, "private_deidentify.json"), "r") as open_file: private_anons = json.load(open_file) for key, values in private_anons.items(): for value in values: pattern = re.compile(value["Tag"].replace("x", "[0-9A-Fa-f]")) PRIVATE_DEIDENTIFY.setdefault(key, []).append(pattern) # Process all DICOM files progress_indicator = DownloadProgressBar(max=len(input_dicoms)) dcmfiles = [] logfiles = [] for cnt, input_dicom in enumerate(input_dicoms): statinfo = os.stat(input_dicom) DownloadProgressBar.suffix = "{0:.3f}MB".format( statinfo.st_size / 10e5) progress_indicator.next(1) output_dicom, output_log = anonymize_dicomfile( input_dicom, outdir, outname=str(cnt), write_log=write_logs) dcmfiles.append(output_dicom) logfiles.append(output_log) progress_indicator.finish() return dcmfiles, logfiles
def split_series(dicom_dir, outdir): """ Split all the folder Dicom files by series in different folders. Dicom files are searched recursively in the input folder and all files are expected to be Dicom files. Expect to split files from a single session. Parameters ---------- dicom_dir: str (mandatory) a folder containing Dicom files to organize by series. outdir: str (mandatory) the destination folder. """ # Read the incoming directory: # process each file in this directory and its sub-directories # expect each file to be a DICOM file to_treat_dicom = [] for root, dirs, files in os.walk(dicom_dir): to_treat_dicom.extend( [os.path.join(root, basename) for basename in files]) # Go through each file: expected to be in Dicom format progress_indicator = DownloadProgressBar(max=len(to_treat_dicom)) acquisition_datetime = None for dicom_file in to_treat_dicom: # Update progress bar statinfo = os.stat(dicom_file) DownloadProgressBar.suffix = "{0:.3f}MB".format(statinfo.st_size / 10e5) progress_indicator.next(1) # Get the time of last modification mtime = os.path.getmtime(dicom_file) # Read DICOM dataset dataset = dicom.read_file(dicom_file) # Find character encoding of DICOM attributes: # we currently expect encoding to be ISO_IR 100 if (0x0008, 0x0005) in dataset: SpecificCharacterSet = dataset[0x0008, 0x0005].value if SpecificCharacterSet != "ISO_IR 100": print("'{0}' file encoding is not ISO_IR 100 as " "expected.".format(dicom_file)) continue else: print("Can't check encoding of '{0}', missing (0x0008, 0x0005) " "tag.".format(dicom_file)) # Process other DICOM attributes: # decode strings assuming 'ISO_IR 100' SeriesDescription = None SOPInstanceUID = dataset[0x0008, 0x0018].value if (0x0008, 0x103e) in dataset: SeriesDescription = cleanup(decode(dataset[0x0008, 0x103e].value)) SeriesNumber = dataset[0x0020, 0x0011].value EchoTime = dataset[0x0018, 0x0081].value # Check the session time current_acquisition_datetime = (dataset[0x0008, 0x0020].value + dataset[0x0008, 0x0030].value) if acquisition_datetime is None: acquisition_datetime = current_acquisition_datetime elif acquisition_datetime != current_acquisition_datetime: raise ValueError( "Two sessions detected in the input folder '{0}': {1} - " "{2}.".format(dicom_dir, acquisition_datetime, current_acquisition_datetime)) # Build the full path to the outgoing directory: # we assume that there is only one session if SeriesDescription: serie_name = (SeriesDescription + "_" + str(EchoTime) + "_" + str(SeriesNumber).rjust(6, "0")) else: serie_name = str(EchoTime) + "_" + str(SeriesNumber).rjust(6, "0") output_dicom_dir = os.path.join(outdir, serie_name) # Check that the destination folder exists if not os.path.isdir(output_dicom_dir): os.mkdir(output_dicom_dir) # Build a new name for the DICOM file output_dicom_file = os.path.join(output_dicom_dir, SOPInstanceUID + '.dcm') # Copy DICOM file: # handle case where outgoing file already exists if os.path.exists(output_dicom_file): # Compare modification time and keep the most recent file if os.path.getmtime(output_dicom_file) < mtime: shutil.copy2(dicom_file, output_dicom_file) # file does not exists and can be copied else: shutil.copy2(dicom_file, output_dicom_file)
def split_series(dicom_dir, outdir): """ Split all the folder Dicom files by series in different folders. Dicom files are searched recursively in the input folder and all files are expected to be Dicom files. Expect to split files from a single session. Parameters ---------- dicom_dir: str (mandatory) a folder containing Dicom files to organize by series. outdir: str (mandatory) the destination folder. """ # Read the incoming directory: # process each file in this directory and its sub-directories # expect each file to be a DICOM file to_treat_dicom = [] for root, dirs, files in os.walk(dicom_dir): to_treat_dicom.extend([ os.path.join(root, basename) for basename in files]) # Go through each file: expected to be in Dicom format progress_indicator = DownloadProgressBar(max=len(to_treat_dicom)) acquisition_datetime = None for dicom_file in to_treat_dicom: # Update progress bar statinfo = os.stat(dicom_file) DownloadProgressBar.suffix = "{0:.3f}MB".format( statinfo.st_size / 10e5) progress_indicator.next(1) # Get the time of last modification mtime = os.path.getmtime(dicom_file) # Read DICOM dataset dataset = dicom.read_file(dicom_file) # Find character encoding of DICOM attributes: # we currently expect encoding to be ISO_IR 100 if (0x0008, 0x0005) in dataset: SpecificCharacterSet = dataset[0x0008, 0x0005].value if SpecificCharacterSet != "ISO_IR 100": print("'{0}' file encoding is not ISO_IR 100 as " "expected.".format(dicom_file)) continue else: print("Can't check encoding of '{0}', missing (0x0008, 0x0005) " "tag.".format(dicom_file)) # Process other DICOM attributes: # decode strings assuming 'ISO_IR 100' SeriesDescription = None SOPInstanceUID = dataset[0x0008, 0x0018].value if (0x0008, 0x103e) in dataset: SeriesDescription = cleanup(decode(dataset[0x0008, 0x103e].value)) SeriesNumber = dataset[0x0020, 0x0011].value EchoTime = dataset[0x0018, 0x0081].value # Check the session time current_acquisition_datetime = (dataset[0x0008, 0x0020].value + dataset[0x0008, 0x0030].value) if acquisition_datetime is None: acquisition_datetime = current_acquisition_datetime elif acquisition_datetime != current_acquisition_datetime: raise ValueError( "Two sessions detected in the input folder '{0}': {1} - " "{2}.".format(dicom_dir, acquisition_datetime, current_acquisition_datetime)) # Build the full path to the outgoing directory: # we assume that there is only one session if SeriesDescription: serie_name = (SeriesDescription + "_" + str(EchoTime) + "_" + str(SeriesNumber).rjust(6, "0")) else: serie_name = str(EchoTime) + "_" + str(SeriesNumber).rjust(6, "0") output_dicom_dir = os.path.join(outdir, serie_name) # Check that the destination folder exists if not os.path.isdir(output_dicom_dir): os.mkdir(output_dicom_dir) # Build a new name for the DICOM file output_dicom_file = os.path.join(output_dicom_dir, SOPInstanceUID + '.dcm') # Copy DICOM file: # handle case where outgoing file already exists if os.path.exists(output_dicom_file): # Compare modification time and keep the most recent file if os.path.getmtime(output_dicom_file) < mtime: shutil.copy2(dicom_file, output_dicom_file) # file does not exists and can be copied else: shutil.copy2(dicom_file, output_dicom_file)
def anonymize_dicomdir(inputdir, outdir, write_logs=True): """ Anonymize all DICOM files of the input directory. Parameters ---------- inputdir: str (mandatory) A folder that contains only DICOM files to be anonymized. outdir: str (mandatory) The anonimized DICOM files folder. write_logs: bool (optional, default True) If True write the anonimization logs. Returns ------- dcmfiles: str The anonimized DICOM files. logfiles: list The anonimization log files. """ # Load the first dataset input_dicoms = [ os.path.join(inputdir, fname) for fname in os.listdir(inputdir) ] dataset = dicom.read_file(input_dicoms[0], force=True) # Load the tags to anonymize filedir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(filedir, "deidentify.json"), "r") as open_file: anon_tags = json.load(open_file)[1:] # Set up the desired callbacks and tags to be anonymized # Iterate over all the tag to anonymize according to PS 3.15-2008 and # supplement 142 for tag_item in anon_tags: tag_repr = tag_item["Tag"][1:-1] action = tag_item["Basic Profile"] group, element = tag_repr.split(",", 1) # Deal with special tags if "xx" in group or "xx" in element: pattern = re.compile(tag_repr.replace("x", "[0-9A-Fa-f]")) CALLBACKS[tag_repr] = [pattern, callback_xxxx] # Deal with private tags elif "gggg" in group: if (0x0008, 0x0070) in dataset: MANUFACTURER.append(dataset[0x0008, 0x0070].value) if len(MANUFACTURER) > 0: CALLBACKS[tag_repr] = [None, callback_private] else: raise Exception( "The '(0008,0070)' manufacturer tag is not specified and " "is required to anonymize private tags.") # Deal with standard tags else: TAGS[tag_repr] = (int(group, 16), int(element, 16)), action # Now compile the diffusion private tags patterns filedir = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(filedir, "private_deidentify.json"), "r") as open_file: private_anons = json.load(open_file) for key, values in private_anons.items(): for value in values: pattern = re.compile(value["Tag"].replace("x", "[0-9A-Fa-f]")) PRIVATE_DEIDENTIFY.setdefault(key, []).append(pattern) # Process all DICOM files progress_indicator = DownloadProgressBar(max=len(input_dicoms)) dcmfiles = [] logfiles = [] for cnt, input_dicom in enumerate(input_dicoms): statinfo = os.stat(input_dicom) DownloadProgressBar.suffix = "{0:.3f}MB".format(statinfo.st_size / 10e5) progress_indicator.next(1) output_dicom, output_log = anonymize_dicomfile(input_dicom, outdir, outname=str(cnt), write_log=write_logs) dcmfiles.append(output_dicom) logfiles.append(output_log) progress_indicator.finish() return dcmfiles, logfiles