Example #1
0
def change_tag(dicom, field, value):
    """change tag is a general function that can be used by 
       update_tag or add_tag. The only difference is the print output,
       and determining to call the function based on different conditions

       Parameters
       ==========
       dicom: the pydicom.dataset Dataset (pydicom.read_file)
       field: the name of the field to add
       value: the value to set, if name is a valid tag

    """
    # Case 1: Dealing with a string tag (field name)
    if isinstance(field, str):
        tag = get_tag(field)

        if field in tag:
            dicom.add_new(tag[field]["tag"], tag[field]["VR"], value)
        else:
            bot.error("%s is not a valid field to add. Skipping." % (field))

    # Case 2: we already have a tag for the field name (type BaseTag)
    else:
        tag = dicom.get(field)
        dicom.add_new(field, tag.VR, value)

    return dicom
Example #2
0
def load_combined_deid(deids):
    '''load one or more deids, either based on a path or a tag
    
       Parameters
       ==========
       deids: should be a custom list of deids

    '''
    if not isinstance(deids, list):
        bot.warning("load_combined_deids expects a list.")
        sys.exit(1)

    found_format = None
    deid = None

    for single_deid in deids:

        # If not a tag or path, returns None
        next_deid = get_deid(tag=single_deid,
                             exit_on_fail=False,
                             quiet=True,
                             load=True)

        if next_deid is not None:

            # Formats must match
            if found_format is None:
                found_format = next_deid['format']
            else:
                if found_format != next_deid['format']:
                    bot.error('Mismatch in deid formats, %s and %s' %
                              (found_format, next_deid['format']))
                    sys.exit(1)

            # If it's the first one, use as starter template
            if deid is None:
                deid = next_deid
            else:

                # Update filter, appending to end to give first preference
                if "filter" in next_deid:
                    if "filter" not in deid:
                        deid['filter'] = next_deid['filter']
                    else:
                        for name, group in next_deid['filter'].items():
                            if name in deid['filter']:
                                deid['filter'][
                                    name] = deid['filter'][name] + group
                            else:
                                deid['filter'][name] = group

                if "header" in next_deid:
                    if "header" not in deid:
                        deid['header'] = next_deid['header']
                    else:
                        deid['header'] = deid['header'] + next_deid['header']

        else:
            bot.warning('Problem loading %s, skipping.' % single_deid)
    return deid
Example #3
0
def find_deid(path=None):
    '''find_deid is a helper function to load_deid to find a deid file in
    a folder, or return the path provided if it is the file.

    Parameters
    ==========
    path: a path on the filesystem. If not provided, will assume PWD.
    '''
    if path is None:
        path = os.getcwd()

    # The user has provided a directory
    if os.path.isdir(path):
        contenders = [
            "%s/%s" % (path, x) for x in os.listdir(path)
            if x.startswith('deid')
        ]

        if len(contenders) == 0:
            bot.error("No deid settings files found in %s, exiting." % (path))
            sys.exit(1)

        elif len(contenders) > 1:
            bot.warning("Multiple deid files found in %s, will use first." %
                        (path))

        path = contenders[0]

    # We have a file path at this point
    if not os.path.exists(path):
        bot.error("Cannot find deid file %s, exiting." % (path))
        sys.exit(1)

    return path
Example #4
0
def main(args, parser):

    # Global output folder
    output_folder = args.outfolder
    if output_folder is None:
        output_folder = tempfile.mkdtemp()

    # If a deid is given, check against format
    if args.deid is not None:
        params = load_deid(args.deid)
        if params["format"] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting." %
                (params["format"], args.format))
    # Get list of dicom files
    base = args.input
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset("dicom-cookies")
    basename = os.path.basename(base)
    dicom_files = list(
        get_files(base))  # todo : consider using generator functionality

    do_get = False
    do_put = False
    ids = None
    if args.action == "all":
        bot.info("GET and PUT identifiers from %s" % (basename))
        do_get = True
        do_put = True

    elif args.action == "get":
        do_get = True
        bot.info("GET and PUT identifiers from %s" % (basename))

    elif args.action == "put":
        bot.info("PUT identifiers from %s" % (basename))
        do_put = True
        if args.ids is None:
            bot.exit(
                "To PUT without GET you must provide a json file with ids.")

        ids = args.ids

    # GET identifiers

    if do_get is True:
        ids = get_identifiers(dicom_files)

    if do_put is True:
        cleaned_files = replace_identifiers(
            dicom_files=dicom_files,
            ids=ids,
            deid=args.deid,
            overwrite=args.overwrite,
            output_folder=output_folder,
        )

        bot.info("%s %s files at %s" %
                 (len(cleaned_files), args.format, output_folder))
Example #5
0
def get_identifiers(
    dicom_files, force=True, config=None, strip_sequences=False, remove_private=False
):
    """ extract all identifiers from a dicom image.
        This function returns a lookup by file name, where each value indexed
        includes a dictionary of nested fields (indexed by nested tag).

        Parameters
        ==========
        dicom_files: the dicom file(s) to extract from
        force: force reading the file (default True)
        config: if None, uses default in provided module folder
        strip_sequences: if True, remove all sequences
        remove_private: remove private tags

    """
    if config is None:
        config = "%s/config.json" % here

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))
    config = read_json(config, ordered_dict=True)["get"]

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]

    bot.debug("Extracting identifiers for %s dicom" % len(dicom_files))
    lookup = dict()

    # Parse each dicom file
    for dicom_file in dicom_files:
        parser = DicomParser(dicom_file, force=force)
        lookup[parser.dicom_file] = parser.get_fields()

    return lookup
Example #6
0
def update_tag(dicom, field, value):
    """update tag will update a value in the header, if it exists
       if not, nothing is added. This check is the only difference
       between this function and change_tag. 
       If the user wants to add a value (that might not exist) 
       the function add_tag should be used with a private identifier
       as a string.

       Parameters
       ==========
       dicom: the pydicom.dataset Dataset (pydicom.read_file)
       field: the name of the field to update
       value: the value to set, if name is a valid tag

    """
    if field not in dicom:
        return dicom

    # Case 1: Dealing with a string tag (field name)
    if isinstance(field, str):
        tag = get_tag(field)
        if tag:
            dicom.add_new(tag["tag"], tag["VR"], value)
        else:
            bot.error("%s is not a valid field to add. Skipping." % (field))

    # Case 2: we already have a tag for the field name (type BaseTag)
    else:
        tag = dicom.get(field)
        dicom.add_new(field, tag.VR, value)

    return dicom
Example #7
0
def _prepare_replace_config(dicom_files, deid=None, config=None):
    """ replace identifiers will replace dicom_files with data from ids based
        on a combination of a config (default is remove all) and a user deid

        Parameters
        ==========
        dicom_files: the dicom file(s) to extract from
        force: force reading the file (default True)
        save: if True, save to file. Otherwise, return dicom objects
        config: if None, uses default in provided module folder
        overwrite: if False, save updated files to temporary directory
    
    """

    if config is None:
        config = "%s/config.json" % (here)
    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))

    if not isinstance(deid, DeidRecipe):
        deid = DeidRecipe(deid)
    config = read_json(config, ordered_dict=True)

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]

    return dicom_files, deid, config
Example #8
0
def save_dicom(dicom, dicom_file, output_folder=None, overwrite=False):
    '''save_dicom will save a dicom file to an output folder,
       making sure to not overwrite unless the user has enforced it

       Parameters
       ==========
       dicom: the pydicon Dataset to save
       dicom_file: the path to the dicom file to save (we only use basename)
       output_folder: the folder to save the file to
       overwrite: overwrite any existing file? (default is False)

    '''

    if output_folder is None:
        if overwrite is False:
            output_folder = tempfile.mkdtemp()
        else:
            output_folder = os.path.dirname(dicom_file)

    dicom_name = os.path.basename(dicom_file)
    output_dicom = os.path.join(output_folder,dicom_name)
    dowrite = True
    if overwrite is False:
        if os.path.exists(output_dicom):
            bot.error("%s already exists, overwrite set to False. Not writing." %dicom_name)
            dowrite = False

    if dowrite:
        dicom.save_as(output_dicom)
    return output_dicom
Example #9
0
def main(args, parser):
    """inspect currently serves to inspect the header fields of a set
       of dicom files against a standard, and flag images that don't
       pass the different levels of criteria
    """

    # If a deid is given, check against format
    deid = args.deid
    if deid is not None:
        params = load_deid(deid)
        if params["format"] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting."
                % (params["format"], args.format)
            )
    # Get list of dicom files
    base = args.folder
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset("dicom-cookies")

    dicom_files = list(
        get_files(base, pattern=args.pattern)
    )  # todo : consider using generator functionality
    result = has_burned_pixels(dicom_files, deid=deid)

    print("\nSUMMARY ================================\n")
    if result["clean"]:
        bot.custom(
            prefix="CLEAN", message="%s files" % len(result["clean"]), color="CYAN"
        )

    if result["flagged"]:
        for group, files in result["flagged"].items():
            bot.flag("%s %s files" % (group, len(files)))

    if args.save:
        folders = "-".join([os.path.basename(folder) for folder in base])
        outfile = "pixel-flag-results-%s-%s.tsv" % (
            folders,
            datetime.datetime.now().strftime("%y-%m-%d"),
        )

        with open(outfile, "w") as filey:
            filey.writelines("dicom_file\tpixels_flagged\tflag_list\treason\n")

            for clean in result["clean"]:
                filey.writelines("%s\tCLEAN\t\t\n" % clean)

            for flagged, details in result["flagged"].items():
                if details["flagged"] is True:
                    for result in details["results"]:
                        group = result["group"]
                        reason = result["reason"]
                        filey.writelines(
                            "%s\tFLAGGED\t%s\t%s\n" % (flagged, group, reason)
                        )

            print("Result written to %s" % outfile)
Example #10
0
def main(args, parser):
    '''inspect currently serves to inspect the header fields of a set
    of dicom files against a standard, and flag images that don't
    pass the different levels of criteria
    '''

    # Global output folder
    #output_folder = args.outfolder
    #if output_folder is None:
    #    output_folder = tempfile.mkdtemp()

    # If a deid is given, check against format
    deid = args.deid
    if deid is not None:
        params = load_deid(deid)
        if params['format'] != args.format:
            bot.error(
                "Format in deid (%s) doesn't match choice here (%s) exiting." %
                (params['format'], args.format))
    # Get list of dicom files
    base = args.folder
    if base is None:
        bot.info("No input folder specified, will use demo dicom-cookies.")
        base = get_dataset('dicom-cookies')

    dicom_files = list(get_files(
        base,
        pattern=args.pattern))  # todo : consider using generator functionality
    result = has_burned_pixels(dicom_files, deid=deid)

    print('\nSUMMARY ================================\n')
    if len(result['clean']) > 0:
        bot.custom(prefix='CLEAN',
                   message="%s files" % len(result['clean']),
                   color="CYAN")

    if len(result['flagged']) > 0:
        for group, files in result['flagged'].items():
            bot.flag("%s %s files" % (group, len(files)))

    if args.save is True:
        folders = '-'.join([os.path.basename(folder) for folder in base])
        outfile = "pixel-flag-results-%s-%s.tsv" % (
            folders, datetime.datetime.now().strftime('%y-%m-%d'))
        with open(outfile, 'w') as filey:
            filey.writelines('dicom_file\tpixels_flagged\tflag_list\treason\n')
            for clean in result['clean']:
                filey.writelines('%s\tCLEAN\t\t\n' % clean)
            for flagged, details in result['flagged'].items():
                if details['flagged'] is True:
                    for result in details['results']:
                        group = result['group']
                        reason = result['reason']
                        filey.writelines('%s\tFLAGGED\t%s\t%s\n' %
                                         (flagged, group, reason))

            print('Result written to %s' % outfile)
Example #11
0
 def remove_private(self):
     """Remove private tags from the loaded dicom"""
     try:
         self.dicom.remove_private_tags()
     except:
         bot.error(
             """Private tags for %s could not be completely removed, usually
                      this is due to invalid data type. Removing others."""
             % self.dicom_name)
         for ptag in get_private(self.dicom):
             del self.dicom[ptag.tag]
Example #12
0
def get_identifiers(dicom_files,
                    force=True,
                    config=None,
                    expand_sequences=True,
                    skip_fields=None):
    """ extract all identifiers from a dicom image.
        This function returns a lookup by file name, and does not include
        private tags.

        Parameters
        ==========
        dicom_files: the dicom file(s) to extract from
        force: force reading the file (default True)
        config: if None, uses default in provided module folder
        expand_sequences: if True, expand sequences. Otherwise, skips
        skip_fields: if not None, added fields to skip

    """
    if config is None:
        config = "%s/config.json" % here

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))
    config = read_json(config, ordered_dict=True)["get"]

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]

    bot.debug("Extracting identifiers for %s dicom" % len(dicom_files))
    ids = dict()  # identifiers

    # We will skip PixelData
    skip = config["skip"]
    if skip_fields is not None:
        if not isinstance(skip_fields, list):
            skip_fields = [skip_fields]
        skip = skip + skip_fields

    for dicom_file in dicom_files:

        if isinstance(dicom_file, Dataset):
            dicom = dicom_file
            dicom_file = dicom.filename
        else:
            dicom = read_file(dicom_file, force=force)

        if dicom_file not in ids:
            ids[dicom_file] = dict()

        ids[dicom_file] = get_fields(dicom,
                                     skip=skip,
                                     expand_sequences=expand_sequences)
    return ids
Example #13
0
def change_tag(dicom, field, value):
    '''change tag is a general function that can be used by 
    update_tag or add_tag. The only difference is the print output,
    and determining to call the function based on different conditions
    '''
    tag = get_tag(field)

    if field in tag:
        dicom.add_new(tag[field]['tag'], tag[field]['VR'], value)
    else:
        bot.error("%s is not a valid field to add. Skipping." % (field))

    return dicom
Example #14
0
def get_identifiers(dicom_files,
                    force=True,
                    config=None,
                    expand_sequences=True,
                    skip_fields=None):
    ''' extract all identifiers from a dicom image.
        This function returns a lookup by file name

        Parameters
        ==========
        dicom_files: the dicom file(s) to extract from
        force: force reading the file (default True)
        config: if None, uses default in provided module folder
        expand_sequences: if True, expand sequences. otherwise, skips
        skip_fields: if not None, added fields to skip

    '''
    bot.debug('Extracting identifiers for %s dicom' %(len(dicom_files)))

    if config is None:
        config = "%s/config.json" %(here)

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" %(config))
    config = read_json(config, ordered_dict=True)['get']

    if not isinstance(dicom_files,list):
        dicom_files = [dicom_files]

    ids = dict() # identifiers

    # We will skip PixelData
    skip = config['skip']
    if skip_fields is not None:
        if not isinstance(skip_fields,list):
            skip_fields = [skip_fields]
        skip = skip + skip_fields
 
    for dicom_file in dicom_files:
        dicom = read_file(dicom_file,force=True)

        if dicom_file not in ids:
            ids[dicom_file] = dict()

        ids[dicom_file] = get_fields(dicom,
                                  skip=skip,
                                  expand_sequences=expand_sequences)
    return ids
Example #15
0
    def __init__(self, dicom_file, recipe=None, config=None, force=True):

        # Lookup for the dicom
        self.lookup = {}

        # Will be a list of DicomField
        self.fields = {}

        # Load default configuration, or a custom one
        config = config or os.path.join(here, "config.json")
        if not os.path.exists(config):
            bot.error("Cannot find config %s, exiting" % (config))
        self.config = read_json(config, ordered_dict=True)

        # Deid can be a recipe or filename
        if not isinstance(recipe, DeidRecipe):
            recipe = DeidRecipe(recipe)
        self.load(dicom_file, force=force)
        self.recipe = recipe
Example #16
0
def change_tag(dicom, field, value):
    '''change tag is a general function that can be used by 
       update_tag or add_tag. The only difference is the print output,
       and determining to call the function based on different conditions

       Parameters
       ==========
       dicom: the pydicom.dataset Dataset (pydicom.read_file)
       field: the name of the field to add
       value: the value to set, if name is a valid tag

    '''
    tag = get_tag(field)

    if field in tag:
        dicom.add_new(tag[field]['tag'], tag[field]['VR'], value)
    else:
        bot.error("%s is not a valid field to add. Skipping." % (field))

    return dicom
Example #17
0
def get_deid(tag=None, exit_on_fail=True, quiet=False, load=False):
    '''get deid is intended to retrieve the full path of a deid file provided with
       the software, based on a tag. For example, under deid/data if a file is called
       "deid.dicom", the tag would be "dicom". 

       Parameters
       ==========
       tag: the text that comes after deid to indicate the tag of the file in deid/data
       exit_on_fail: if None is an acceptable return value, this should be set to False
                     (default is True).
       quiet: Default False. If None is acceptable, quiet can be set to True
       load: also load the deid, if resulting path (from path or tag) is not None

    '''
    # no tag/path means load default
    if tag is None:
        tag = 'dicom'

    # If it's already loaded
    if isinstance(tag, dict):
        bot.debug('deid is already loaded.')
        return tag

    # If it's a path, get full path
    if os.path.exists(tag):
        deid = os.path.abspath(tag)
    else:
        deid = "%s/deid.%s" % (data_base, tag)

    if not os.path.exists(deid):
        if quiet is False:
            bot.error("Cannot find %s" % (deid))
        if exit_on_fail is True:
            sys.exit(1)
        else:
            return None

    if load is True:
        return load_deid(deid)

    return deid
Example #18
0
def save_dicom(dicom,dicom_file,output_folder=None,overwrite=False):
    '''save_dicom will save a dicom file to an output folder,
    making sure to not overwrite unless the user has enforced it
    '''
    if output_folder is None:
        if overwrite is False:
            output_folder = tempfile.mkdtemp()
        else:
            output_folder = os.path.dirname(dicom_file)

    dicom_name = os.path.basename(dicom_file)
    output_dicom = os.path.join(output_folder,dicom_name)
    dowrite = True
    if overwrite is False:
        if os.path.exists(output_dicom):
            bot.error("%s already exists, overwrite set to False. Not writing." %dicom_name)
            dowrite = False

    if dowrite:
        dicom.save_as(output_dicom)
    return output_dicom
Example #19
0
def parse_action(section, line, config, section_name=None):
    '''add action will take a line from a deid config file, a config (dictionary), and
    an active section name (eg header) and add an entry to the config file to perform
    the action.

    Parameters
    =========
    section: a valid section name from the deid config file
    line: the line content to parse for the section/action
    config: the growing/current config dictionary
    section_name: optionally, a section name

    '''

    if not line.upper().startswith(actions):
        bot.error("%s is not a valid action line." % line)
        sys.exit(1)

    # We may have to deal with cases of spaces
    parts = line.split(' ')
    action = parts.pop(0).replace(' ', '')

    # What field is the action for?
    if len(parts) < 1:
        bot.error("%s requires a FIELD value, but not found." % (action))
        sys.exit(1)

    field = parts.pop(0)

    # Actions that require a value
    if action in ["ADD", "REPLACE", "JITTER"]:
        if len(parts) == 0:
            bot.error("%s requires a VALUE, but not found" % (action))
            sys.exit(1)
        value = ' '.join(parts[0:])  # get remained of line
        value = value.split('#')[0]  # remove comments
        bot.debug("Adding %s" % line)  #
        config[section].append({
            "action": action,
            "field": field,
            "value": value
        })

    # Actions that don't require a value
    elif action in ["BLANK", "KEEP", "REMOVE"]:
        bot.debug("%s: adding %s" % (section, line))
        config[section].append({"action": action, "field": field})

    return config
Example #20
0
def add_section(config, section, section_name=None):
    '''add section will add a section (and optionally)
       section name to a config

       Parameters
       ==========
       config: the config (dict) parsed thus far
       section: the section name to add
       section_name: an optional name, added as a level

    '''

    if section is None:
        bot.error(
            'You must define a section (e.g. %header) before any action.')
        sys.exit(1)

    if section == 'filter' and section_name is None:
        bot.error("You must provide a name for a filter section.")
        sys.exit(1)

    if section not in sections:
        bot.error("%s is not a valid section." % section)
        sys.exit(1)

    if section not in config:

        # If a section is named, we have more one level (dict)
        if section_name is not None:
            config[section] = OrderedDict()
            config[section][section_name] = []
            bot.debug("Adding section %s %s" % (section, section_name))
        else:
            config[section] = []
            bot.debug("Adding section %s" % section)
        return config

    # Section is in config
    if section_name is not None and section_name not in config[section]:
        config[section][section_name] = []

    return config
Example #21
0
def replace_identifiers(dicom_files,
                        ids,
                        deid=None,
                        save=True,
                        overwrite=False,
                        output_folder=None,
                        force=True,
                        config=None,
                        strip_sequences=True,
                        remove_private=True):
    '''replace identifiers using pydicom, can be slow when writing
    and saving new files'''

    dicom_files, deid, config = _prepare_replace_config(dicom_files,
                                                        deid=deid,
                                                        config=config)

    # Parse through dicom files, update headers, and save
    updated_files = []
    for d in range(len(dicom_files)):
        dicom_file = dicom_files[d]
        dicom = read_file(dicom_file, force=force)
        idx = os.path.basename(dicom_file)
        fields = dicom.dir()

        # Remove sequences first, maintained in DataStore
        if strip_sequences is True:
            dicom = remove_sequences(dicom)
        if deid is not None:
            if idx in ids:
                for action in deid['header']:
                    dicom = perform_action(dicom=dicom,
                                           item=ids[idx],
                                           action=action)
            else:
                bot.warning("%s is not in identifiers." % idx)
                continue
        # Next perform actions in default config, only if not done
        for action in config['put']['actions']:
            if action['field'] in fields:
                dicom = perform_action(dicom=dicom, action=action)
        if remove_private is True:
            try:
                dicom.remove_private_tags()
            except:
                bot.error(
                    '''Private tags for %s could not be completely removed, usually
                             this is due to invalid data type. Removing others.'''
                    % idx)
                private_tags = get_private(dicom)
                for ptag in private_tags:
                    del dicom[ptag.tag]
                continue
        else:
            bot.warning("Private tags were not removed!")
        ds = Dataset()
        for field in dicom.dir():
            try:
                ds.add(dicom.data_element(field))
            except:
                pass

        # Copy original data types
        attributes = [
            'is_little_endian', 'is_implicit_VR', 'preamble',
            '_parent_encoding'
        ]
        for attribute in attributes:
            ds.__setattr__(attribute, dicom.__getattribute__(attribute))

        # Retain required meta data
        file_metas = getattr(dicom, 'file_meta', Dataset())

        # Retain required meta data - not identifying
        # file_metas.MediaStorageSOPClassUID
        # file_metas.MediaStorageSOPInstanceUID
        # file_metas.ImplementationVersionName
        # file_metas.ImplementationClassUID

        # File attributes for meta
        attributes = [
            'TransferSyntaxUID', 'FileMetaInformationGroupLength',
            'FileMetaInformationVersion'
        ]
        for attribute in attributes:
            file_metas.add(dicom.file_meta.data_element(attribute))

        # Preamble is required
        ds.file_meta = file_metas
        ds.preamble = vars(dicom)['preamble']

        # Save to file?
        if save is True:
            ds = save_dicom(dicom=ds,
                            dicom_file=dicom_file,
                            output_folder=output_folder,
                            overwrite=overwrite)
        updated_files.append(ds)

    return updated_files
Example #22
0
def get_shared_identifiers(dicom_files,
                           force=True,
                           config=None,
                           aggregate=None,
                           expand_sequences=True):
    """

    extract shared identifiers across a set of dicom files, intended for
    cases when a set of images (dicom) are being compressed into one file
    and the file (still) should have some searchable metadata. By default,
    we remove fields that differ between files. To aggregate unique, define
    a list of aggregate fields (aggregate).

    """

    bot.debug("Extracting shared identifiers for %s dicom" %
              (len(dicom_files)))

    if aggregate is None:
        aggregate = []

    if config is None:
        config = "%s/config.json" % (here)

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))
    config = read_json(config, ordered_dict=True)["get"]

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]
    ids = dict()  # identifiers

    # We will skip PixelData
    skip = config["skip"]
    for dicom_file in dicom_files:

        dicom = read_file(dicom_file, force=True)

        # Get list of fields, expanded sequences are flattened
        fields = get_fields(dicom,
                            skip=skip,
                            expand_sequences=expand_sequences)

        for key, val in fields.items():

            # If it's there, only keep if the same
            if key in ids:

                # Items to aggregate are appended, not removed
                if key in aggregate:
                    if val not in ids[key]:
                        ids[key].append(val)
                else:

                    # Keep only if equal between
                    if ids[key] == val:
                        continue
                    else:
                        del ids[key]
                        skip.append(key)
            else:
                if key in aggregate:
                    val = [val]
                ids[key] = val

    # For any aggregates that are one item, unwrap again
    for field in aggregate:
        if field in ids:
            if len(ids[field]) == 1:
                ids[field] = ids[field][0]

    return ids
Example #23
0
def replace_identifiers(
    dicom_files,
    ids=None,
    deid=None,
    save=True,
    overwrite=False,
    output_folder=None,
    force=True,
    config=None,
    strip_sequences=True,
    remove_private=True,
):
    """replace identifiers using pydicom, can be slow when writing
       and saving new files. If you want to replace sequences, they need
       to be extracted with get_identifiers and expand_sequences to True.
    """
    dicom_files, recipe, config = _prepare_replace_config(dicom_files,
                                                          deid=deid,
                                                          config=config)

    # ids (a lookup) is not required
    ids = ids or {}

    # Parse through dicom files, update headers, and save
    updated_files = []
    for _, dicom_file in enumerate(dicom_files):

        if isinstance(dicom_file, Dataset):
            dicom = dicom_file
            dicom_file = dicom.filename
        else:
            dicom = read_file(dicom_file, force=force)
        dicom_name = os.path.basename(dicom_file)

        # Remove sequences first, maintained in DataStore
        if strip_sequences is True:
            dicom = remove_sequences(dicom)

        # Remove private tags at the onset, if requested
        if remove_private:
            try:
                dicom.remove_private_tags()
            except:
                bot.error(
                    """Private tags for %s could not be completely removed, usually
                             this is due to invalid data type. Removing others."""
                    % dicom_name)
                private_tags = get_private(dicom)
                for ptag in private_tags:
                    del dicom[ptag.tag]
                continue

        # Include private tags (if not removed) plus dicom.dir
        fields = dicom_dir(dicom)

        if recipe.deid is not None:

            if dicom_file not in ids:
                ids[dicom_file] = {}

            # Prepare additional lists of values and fields (updates item)
            if recipe.has_values_lists():
                for group, actions in recipe.get_values_lists().items():
                    ids[dicom_file][group] = extract_values_list(
                        dicom=dicom, actions=actions)

            if recipe.has_fields_lists():
                for group, actions in recipe.get_fields_lists().items():
                    ids[dicom_file][group] = extract_fields_list(
                        dicom=dicom, actions=actions)

            for action in recipe.get_actions():
                dicom = perform_action(dicom=dicom,
                                       item=ids[dicom_file],
                                       action=action)

        # Next perform actions in default config, only if not done
        for action in config["put"]["actions"]:
            if action["field"] in fields:
                dicom = perform_action(dicom=dicom, action=action)

        # Assemble a new dataset, again accounting for private tags
        ds = Dataset()
        for field in dicom_dir(dicom):

            try:
                # Most fields are strings
                if isinstance(field, str):
                    ds.add(dicom.data_element(field))

                # Remainder are tags
                else:
                    ds.add(dicom.get(field))
            except:
                pass

        # Copy original data attributes
        attributes = [
            "is_little_endian",
            "is_implicit_VR",
            "is_decompressed",
            "read_encoding",
            "read_implicit_vr",
            "read_little_endian",
            "_parent_encoding",
        ]

        # We aren't including preamble, we will reset to be empty 128 bytes
        ds.preamble = b"\0" * 128

        for attribute in attributes:
            if hasattr(dicom, attribute):
                ds.__setattr__(attribute, dicom.__getattribute__(attribute))

        # Original meta data                     # or default empty dataset
        file_metas = getattr(dicom, "file_meta", Dataset())

        # Media Storage SOP Instance UID can be identifying
        if hasattr(file_metas, "MediaStorageSOPInstanceUID"):
            file_metas.MediaStorageSOPInstanceUID = ""

        # Save meta data
        ds.file_meta = file_metas

        # Save to file?
        if save is True:
            ds = save_dicom(
                dicom=ds,
                dicom_file=dicom_file,
                output_folder=output_folder,
                overwrite=overwrite,
            )
        updated_files.append(ds)

    return updated_files
Example #24
0
def load_deid(path=None):
    '''load_deid will return a loaded in (user) deid configuration file
    that can be used to update a default config.json. If a file path is
    specified, it is loaded directly. If a folder is specified, we look
    for a deid file in the folder. If nothing is specified, we assume
    the user wants to load a deid file in the present working directory.
    If the user wants to have multiple deid files in a directory, this
    can be done with an extension that specifies the module, eg;
   
             deid.dicom
             deid.nifti

    Parameters
    ==========
    path: a path to a deid file

    Returns
    =======
    config: a parsed deid (dictionary) with valid sections

    '''
    path = find_deid(path)

    # Read in spec, clean up extra spaces and newlines
    spec = [
        x.strip('\n').strip(' ') for x in read_file(path)
        if x.strip('\n').strip(' ') not in ['']
    ]

    spec = [x for x in spec if x not in ['', None]]
    config = OrderedDict()
    section = None

    while len(spec) > 0:

        # Clean up white trailing/leading space
        line = spec.pop(0).strip()

        # Comment
        if line.startswith("#"):
            continue

        # Starts with Format?
        elif bool(re.match('format', line, re.I)):
            fmt = re.sub('FORMAT|(\s+)', '', line).lower()
            if fmt not in formats:
                bot.error("%s is not a valid format." % fmt)
                sys.exit(1)
            # Set format
            config['format'] = fmt
            bot.debug("FORMAT set to %s" % fmt)

        # A new section?
        elif line.startswith('%'):

            # Remove any comments
            line = line.split('#', 1)[0].strip()

            # Is there a section name?
            section_name = None
            parts = line.split(' ')
            if len(parts) > 1:
                section_name = ' '.join(parts[1:])
            section = re.sub('[%]|(\s+)', '', parts[0]).lower()
            if section not in sections:
                bot.error("%s is not a valid section." % section)
                sys.exit(1)
            config = add_section(config=config,
                                 section=section,
                                 section_name=section_name)

        # An action (replace, blank, remove, keep, jitter)
        elif line.upper().startswith(actions):

            # Start of a filter group
            if line.upper().startswith('LABEL') and section == "filter":
                members = []
                keep_going = True
                while keep_going is True:
                    next_line = spec[0]
                    if next_line.upper().strip().startswith('LABEL'):
                        keep_going = False
                    elif next_line.upper().strip().startswith("%"):
                        keep_going = False
                    else:
                        new_member = spec.pop(0)
                        members.append(new_member)
                    if len(spec) == 0:
                        keep_going = False

                # Add the filter label to the config
                config = parse_label(config=config,
                                     section=section,
                                     label=line,
                                     section_name=section_name,
                                     members=members)
            # Parse the action
            else:
                config = parse_action(section=section,
                                      section_name=section_name,
                                      line=line,
                                      config=config)
        else:
            bot.debug("%s not recognized to be in valid format, skipping." %
                      line)
    return config
Example #25
0
def parse_member(members, operator=None):

    main_operator = operator

    actions = []
    values = []
    fields = []
    operators = []
    members = [members]

    while len(members) > 0:

        operator = None
        value = None
        member = members.pop(0).strip()

        # Find the first || or +
        match_or = re.search('\|\|', member)
        match_and = re.search('\+', member)

        if match_or is not None:
            operator = "||"
        if match_and is not None:
            if match_or is not None:
                if match_or.start() >= match_and.start():
                    operator = "+"
            else:
                operator = "+"

        if operator is not None:

            member, rest = member.split(operator, 1)

            # The rest is only valid if contains a filter statement
            if any(word in rest for word in filters):
                members.append(rest.strip())

                # Split the statement based on found operator
                operator = (operator.replace('||', 'or').replace('+', 'and'))
                operators.append(operator)
            else:
                member = operator.join([member, rest])

        # Parse the member
        action, member = member.split(' ', 1)
        action = action.lower().strip()

        # Contains, equals, not equals expects FieldName Values
        if action in ['contains', 'equals', 'notequals']:
            try:
                field, value = member.split(' ', 1)
            except ValueError:
                bot.error(
                    '%s for line %s must have field and values, exiting.' %
                    (action, member))
                sys.exit(1)

        # Missing, empty, notcontains expect only a field
        elif action in ['missing', 'empty', 'notcontains', 'present']:
            field = member.strip()
        else:
            bot.error('%s is not a valid filter action.' % action)
            sys.exit(1)

        actions.append(action)
        fields.append(field.strip())

        if value is not None:
            values.append(value.strip())

    entry = {
        'action': actions,
        'field': fields,
        'operator': main_operator,
        'InnerOperators': operators,
        'value': values
    }
    return entry
Example #26
0
def replace_identifiers(dicom_files,
                        ids,
                        deid=None,
                        save=True,
                        overwrite=False,
                        output_folder=None,
                        force=True,
                        config=None,
                        strip_sequences=True,
                        remove_private=True):
    '''replace identifiers using pydicom, can be slow when writing
       and saving new files. If you want to replace sequences, they need
       to be extracted with get_identifiers and expand_sequences to True.
    '''
    dicom_files, recipe, config = _prepare_replace_config(dicom_files,
                                                          deid=deid,
                                                          config=config)

    # Parse through dicom files, update headers, and save
    updated_files = []
    for _, dicom_file in enumerate(dicom_files):
        dicom = read_file(dicom_file, force=force)
        dicom_name = os.path.basename(dicom_file)
        fields = dicom.dir()

        # Remove sequences first, maintained in DataStore
        if strip_sequences is True:
            dicom = remove_sequences(dicom)

        if recipe.deid is not None:
            if dicom_file in ids:
                for action in deid.get_actions():
                    dicom = perform_action(dicom=dicom,
                                           item=ids[dicom_file],
                                           action=action)
            else:
                bot.warning("%s is not in identifiers." % dicom_name)
                continue

        # Next perform actions in default config, only if not done
        for action in config['put']['actions']:
            if action['field'] in fields:
                dicom = perform_action(dicom=dicom, action=action)
        if remove_private is True:
            try:
                dicom.remove_private_tags()
            except:
                bot.error(
                    '''Private tags for %s could not be completely removed, usually
                             this is due to invalid data type. Removing others.'''
                    % dicom_name)
                private_tags = get_private(dicom)
                for ptag in private_tags:
                    del dicom[ptag.tag]
                continue
        else:
            bot.warning("Private tags were not removed!")

        ds = Dataset()
        for field in dicom.dir():
            try:
                ds.add(dicom.data_element(field))
            except:
                pass

        # Copy original data attributes
        attributes = [
            'is_little_endian', 'is_implicit_VR', 'is_decompressed',
            'read_encoding', 'read_implicit_vr', 'read_little_endian',
            '_parent_encoding'
        ]

        # We aren't including preamble, we will reset to be empty 128 bytes
        ds.preamble = b"\0" * 128

        for attribute in attributes:
            if hasattr(dicom, attribute):
                ds.__setattr__(attribute, dicom.__getattribute__(attribute))

        # Original meta data                     # or default empty dataset
        file_metas = getattr(dicom, 'file_meta', Dataset())

        # Media Storage SOP Instance UID can be identifying
        if hasattr(file_metas, 'MediaStorageSOPInstanceUID'):
            file_metas.MediaStorageSOPInstanceUID = ''

        # Save meta data
        ds.file_meta = file_metas

        # Save to file?
        if save is True:
            ds = save_dicom(dicom=ds,
                            dicom_file=dicom_file,
                            output_folder=output_folder,
                            overwrite=overwrite)
        updated_files.append(ds)

    return updated_files
Example #27
0
def _has_burned_pixels_single(dicom_file,force=True, deid=None):

    '''has burned pixels single will evaluate one dicom file for burned in
    pixels based on 'filter' criteria in a deid. If deid is not provided,
    will use application default. The method proceeds as follows:

    1. deid is loaded, with criteria groups ordered from specific --> general
    2. image is run down the criteria, stops when hits and reports FLAG
    3. passing through the entire list gives status of pass
    
    The default deid has a greylist, whitelist, then blacklist

    Parameters
    =========
    dicom_file: the fullpath to the file to evaluate
    force: force reading of a potentially erroneous file
    deid: the full path to a deid specification. if not defined, only default used

    deid['filter']['dangerouscookie'] <-- filter list "dangerouscookie"

    --> This is what an item in the criteria looks like
        [{'coordinates': ['0,0,512,110'],
          'filters': [{'InnerOperators': [],
          'action': ['notequals'],
          'field': ['OperatorsName'],
          'operator': 'and',
          'value': ['bold bread']}],
        'name': 'criteria for dangerous cookie'}]

    
    Returns
    =======
    --> This is what a clean image looks like:
        {'flagged': False, 'results': []}


    --> This is what a flagged image looks like:
       {'flagged': True,
        'results': [
                      {'reason': ' ImageType missing  or ImageType empty ',
                       'group': 'blacklist',
                       'coordinates': []}
                   ]
        }
    '''

    dicom = read_file(dicom_file,force=force)
    dicom_name = os.path.basename(dicom_file)
        
    # Load criteria (actions) for flagging
    if 'filter' not in deid:
        bot.error('Deid provided does not have %filter, exiting.')
        sys.exit(1)

    # Return list with lookup as dicom_file
    results = []
    global_flagged = False

    for name,items in deid['filter'].items():
        for item in items:
            flags = []

            descriptions = [] # description for each group across items

            for group in item['filters']:
                group_flags = []         # evaluation for a single line
                group_descriptions = []

                # You cannot pop from the list
                for a in range(len(group['action'])):
                    action = group['action'][a]
                    field = group['field'][a]
                    value = ''
                    if len(group['value']) > a:
                        value = group['value'][a]
                    flag = apply_filter(dicom=dicom,
                                        field=field,
                                        filter_name=action,
                                        value=value or None)
                    group_flags.append(flag)
                    description = "%s %s %s" %(field,action,value)
                    if len(group['InnerOperators']) > a:
                        inner_operator = group['InnerOperators'][a]
                        group_flags.append(inner_operator)
                        description = "%s %s" %(description,inner_operator)
                    group_descriptions.append(description)

                # At the end of a group, evaluate the inner group   
                flag = evaluate_group(group_flags)

                # "Operator" is relevant for the outcome of the list of actions 
                operator = ''
                if 'operator' in group:
                    if group['operator'] is not None:
                        operator = group['operator']
                        flags.append(operator)

                flags.append(flag)
                reason = ('%s %s' %(operator,' '.join(group_descriptions))).replace('\n',' ')
                descriptions.append(reason)

            group_name = ''
            if "name" in item:
                group_name = item['name']

            # When we parse through a group, we evaluate based on all flags
            flagged = evaluate_group(flags=flags)

            if flagged is True:
                global_flagged = True
                reason = ' '.join(descriptions)

                result = {'reason': reason,
                          'group': name,
                          'coordinates': item['coordinates'] }

                results.append(result)

    results = {'flagged': global_flagged,
               'results': results }
    return results