Example #1
0
def get_identifiers(
    dicom_files, force=True, config=None, strip_sequences=False, remove_private=False
):
    """ extract all identifiers from a dicom image.
        This function returns a lookup by file name, where each value indexed
        includes a dictionary of nested fields (indexed by nested tag).

        Parameters
        ==========
        dicom_files: the dicom file(s) to extract from
        force: force reading the file (default True)
        config: if None, uses default in provided module folder
        strip_sequences: if True, remove all sequences
        remove_private: remove private tags

    """
    if config is None:
        config = "%s/config.json" % here

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))
    config = read_json(config, ordered_dict=True)["get"]

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]

    bot.debug("Extracting identifiers for %s dicom" % len(dicom_files))
    lookup = dict()

    # Parse each dicom file
    for dicom_file in dicom_files:
        parser = DicomParser(dicom_file, force=force)
        lookup[parser.dicom_file] = parser.get_fields()

    return lookup
Example #2
0
def _clean_item(item, deid, default="KEEP"):
    '''clean a single item according to a deid specification.
    This function is expected to be called from clean_identifiers
    below

    Parameters
    ==========
    item: the item dictionary to clean
    deid: the already loaded deid, with a header section with 
          actions to specify how to clean
    '''

    # Keep track of the fields we've seen, not to blank them
    seen = []
    for action in deid['header']:
        item,fields = perform_action(item=item,
                                     action=action,
                                     return_seen=True)
        seen = seen + [f for f in fields if f not in seen]
    remaining = [x for x in item.keys() if x not in seen]

    # Apply default action to remaining fields
    if len(remaining) > 0 and default != "KEEP":
        bot.debug("%s fields set for default action %s" %(len(remaining),default))
        for field in remaining:
            action = {'action': default, "field":field}
            item = perform_action(item=item, action=action)
    return item
Example #3
0
def validate_dicoms(dcm_files, force=False):
    """validate dicoms will test opening one or more dicom files, 
       and return a list of valid files.

       Parameters
       ==========
       dcm_files: one or more dicom files to test
    
    """
    if not isinstance(dcm_files, list):
        dcm_files = [dcm_files]

    valids = []

    bot.debug("Checking %s dicom files for validation." % (len(dcm_files)))
    for dcm_file in dcm_files:

        try:
            with open(dcm_file, "rb") as filey:
                read_file(filey, force=force)
            valids.append(dcm_file)
        except:
            bot.warning(
                "Cannot read input file {0!s}, skipping.".format(dcm_file))

    bot.debug("Found %s valid dicom files" % (len(valids)))
    return valids
Example #4
0
def get_files(contenders, check=True, pattern=None, force=False):
    '''get_dcm_files will take a list of single dicom files or directories,
    and return a generator that yields complete paths to all files
    :param pattern: A pattern to use with fnmatch. If None, * is used
    :param force: force reading of the files, if some headers invalid.
    Not recommended, as many non-dicom will come through
    '''
    if not isinstance(contenders, list):
        contenders = [contenders]

    for contender in contenders:
        if os.path.isdir(contender):
            dicom_files = recursive_find(contender, pattern=pattern)
        else:
            dicom_files = [contender]

        for dicom_file in dicom_files:
            if dicom_file is not None:
                if check:
                    validated_files = validate_dicoms(dicom_file, force=force)
                else:
                    validated_files = [dicom_file]

                for validated_file in validated_files:
                    bot.debug("Found contender file %s" % (validated_file))
                    yield validated_file
Example #5
0
def remove_private_identifiers(dicom_files,
                               save=True,
                               overwrite=False,
                               output_folder=None,
                               force=True):
    """remove_private_identifiers is a wrapper for the 
    simple call to dicom.remove_private_tags, it simply
    reads in the files for the user and saves accordingly
    """
    updated_files = []
    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]

    for dicom_file in dicom_files:
        dicom = read_file(dicom_file, force=force)
        dicom.remove_private_tags()
        dicom_name = os.path.basename(dicom_file)
        bot.debug("Removed private identifiers for %s" % dicom_name)

        if save:
            dicom = save_dicom(
                dicom=dicom,
                dicom_file=dicom_file,
                output_folder=output_folder,
                overwrite=overwrite,
            )

        updated_files.append(dicom)
    return updated_files
Example #6
0
def _perform_action(field, item, action, value=None):
    '''_perform_action is the base function for performing an action.
    It is equivalent to the dicom module version, except we work with
    dictionary field/value instead of dicom headers.
    If no action is done, None is returned
    '''
    done = False
    if action not in valid_actions:
        bot.warning('%s in not a valid choice [%s]. Defaulting to blanked.' %
                    (action, ".".join(valid_actions)))
        action = "BLANK"

    if field in item and action != "ADD":

        # Blank the value
        if action == "BLANK":
            item[field] = ""
            done = True

        # Code the value with something in the response
        elif action == "REPLACE":
            value = parse_value(item, value)
            if value is not None:
                done = True
                item[field] = value
            else:
                bot.warning("REPLACE failed for %s" % field)

        # Code the value with something in the response
        elif action == "JITTER":
            value = parse_value(item, value)
            if value is not None:
                done = True
                item = jitter_timestamp(field=field, value=value, item=item)
            else:
                bot.warning('JITTER failed for %s' % field)

        # Do nothing. Keep the original
        elif action == "KEEP":
            done = True
            bot.debug('KEEP %s' % field)

        # Remove the field entirely
        elif action == "REMOVE":
            del item[field]
            done = True
        if not done:
            bot.warning("%s not done for %s" % (action, field))

    elif action == "ADD":
        value = parse_value(item, value)
        if value is not None:
            item[field] = value
        else:
            bot.warning('ADD failed for %s' % field)

    return item
Example #7
0
def parse_group_action(section, line, config, section_name):
    """parse a group action, either FIELD or SPLIT, which must belong to
       either a fields or values section.

       Parameters
       =========
       section: a valid section name from the deid config file
       line: the line content to parse for the section/action
       config: the growing/current config dictionary
       section_name: optionally, a section name
    """
    if not line.upper().startswith(group_actions):
        bot.exit("%s is not a valid group action." % line)

    if not line.upper().startswith("FIELD") and section == "fields":
        bot.exit("%fields only supports FIELD actions.")

    # We may have to deal with cases of spaces
    bot.debug("%s: adding %s" % (section, line))
    parts = line.split(" ")
    action = parts.pop(0).replace(" ", "")

    # Both require some parts
    if not parts:
        bot.exit("%s action %s requires additional arguments" %
                 (section, action))

    # For both, the second is always a field or field expander
    field = parts.pop(0)

    # Fields supports one or more fields with expanders (no third arguments)
    if section == "fields":
        config[section][section_name].append({
            "action": action,
            "field": field
        })

    # Values supports FIELD or SPLIT
    elif section == "values":

        # If we have a third set of arguments
        if parts:
            value = _remove_comments(parts)
            config[section][section_name].append({
                "action": action,
                "field": field,
                "value": value
            })
        else:
            config[section][section_name].append({
                "action": action,
                "field": field
            })

    return config
Example #8
0
def get_identifiers(dicom_files,
                    force=True,
                    config=None,
                    expand_sequences=True,
                    skip_fields=None):
    """ extract all identifiers from a dicom image.
        This function returns a lookup by file name, and does not include
        private tags.

        Parameters
        ==========
        dicom_files: the dicom file(s) to extract from
        force: force reading the file (default True)
        config: if None, uses default in provided module folder
        expand_sequences: if True, expand sequences. Otherwise, skips
        skip_fields: if not None, added fields to skip

    """
    if config is None:
        config = "%s/config.json" % here

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))
    config = read_json(config, ordered_dict=True)["get"]

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]

    bot.debug("Extracting identifiers for %s dicom" % len(dicom_files))
    ids = dict()  # identifiers

    # We will skip PixelData
    skip = config["skip"]
    if skip_fields is not None:
        if not isinstance(skip_fields, list):
            skip_fields = [skip_fields]
        skip = skip + skip_fields

    for dicom_file in dicom_files:

        if isinstance(dicom_file, Dataset):
            dicom = dicom_file
            dicom_file = dicom.filename
        else:
            dicom = read_file(dicom_file, force=force)

        if dicom_file not in ids:
            ids[dicom_file] = dict()

        ids[dicom_file] = get_fields(dicom,
                                     skip=skip,
                                     expand_sequences=expand_sequences)
    return ids
Example #9
0
def parse_format(line):
    """given a line that starts with FORMAT, parse the format of the
       file and check that it is supported. If not, exit on error. If yes,
       return the format.

       Parameters
       ==========
       line: the line that starts with format.
    """
    fmt = re.sub("FORMAT|(\s+)", "", line).lower()
    if fmt not in formats:
        bot.exit("%s is not a valid format." % fmt)
    bot.debug("FORMAT set to %s" % fmt)
    return fmt
Example #10
0
def add_tag(dicom, field, value, quiet=False):
    '''add tag will add a tag only if it's in the (active) DicomDictionary
    :param dicom: the pydicom.dataset Dataset (pydicom.read_file)
    :param field: the name of the field to add
    :param value: the value to set, if name is a valid tag
    '''
    if quiet is False:
        bot.debug("Attempting ADDITION of %s." % (field))
    dicom = change_tag(dicom, field, value)

    # dicom.data_element("PatientIdentityRemoved")
    # (0012, 0062) Patient Identity Removed            CS: 'Yes'

    return dicom
Example #11
0
def jitter_timestamp(field, value, item):
    '''if present, jitter a timestamp in dicom
    field "field" by number of days specified by "value"
    The value can be positive or negative.
    '''
    value = to_int(value)
    original = item.get(field, None)
    if original is not None:
        jittered = get_timestamp(item_date=original,
                                 jitter_days=value,
                                 format="%Y%m%d")
        bot.debug("JITTER %s + (%s): %s" % (original, value, jittered))
        item[field] = jittered
    return item
Example #12
0
def parse_action(section, line, config, section_name=None):
    '''add action will take a line from a deid config file, a config (dictionary), and
    an active section name (eg header) and add an entry to the config file to perform
    the action.

    Parameters
    =========
    section: a valid section name from the deid config file
    line: the line content to parse for the section/action
    config: the growing/current config dictionary
    section_name: optionally, a section name

    '''

    if not line.upper().startswith(actions):
        bot.error("%s is not a valid action line." % line)
        sys.exit(1)

    # We may have to deal with cases of spaces
    parts = line.split(' ')
    action = parts.pop(0).replace(' ', '')

    # What field is the action for?
    if len(parts) < 1:
        bot.error("%s requires a FIELD value, but not found." % (action))
        sys.exit(1)

    field = parts.pop(0)

    # Actions that require a value
    if action in ["ADD", "REPLACE", "JITTER"]:
        if len(parts) == 0:
            bot.error("%s requires a VALUE, but not found" % (action))
            sys.exit(1)
        value = ' '.join(parts[0:])  # get remained of line
        value = value.split('#')[0]  # remove comments
        bot.debug("Adding %s" % line)  #
        config[section].append({
            "action": action,
            "field": field,
            "value": value
        })

    # Actions that don't require a value
    elif action in ["BLANK", "KEEP", "REMOVE"]:
        bot.debug("%s: adding %s" % (section, line))
        config[section].append({"action": action, "field": field})

    return config
Example #13
0
def get_identifiers(dicom_files,
                    force=True,
                    config=None,
                    expand_sequences=True,
                    skip_fields=None):
    ''' extract all identifiers from a dicom image.
        This function returns a lookup by file name

        Parameters
        ==========
        dicom_files: the dicom file(s) to extract from
        force: force reading the file (default True)
        config: if None, uses default in provided module folder
        expand_sequences: if True, expand sequences. otherwise, skips
        skip_fields: if not None, added fields to skip

    '''
    bot.debug('Extracting identifiers for %s dicom' %(len(dicom_files)))

    if config is None:
        config = "%s/config.json" %(here)

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" %(config))
    config = read_json(config, ordered_dict=True)['get']

    if not isinstance(dicom_files,list):
        dicom_files = [dicom_files]

    ids = dict() # identifiers

    # We will skip PixelData
    skip = config['skip']
    if skip_fields is not None:
        if not isinstance(skip_fields,list):
            skip_fields = [skip_fields]
        skip = skip + skip_fields
 
    for dicom_file in dicom_files:
        dicom = read_file(dicom_file,force=True)

        if dicom_file not in ids:
            ids[dicom_file] = dict()

        ids[dicom_file] = get_fields(dicom,
                                  skip=skip,
                                  expand_sequences=expand_sequences)
    return ids
Example #14
0
def add_section(config, section, section_name=None):
    '''add section will add a section (and optionally)
       section name to a config

       Parameters
       ==========
       config: the config (dict) parsed thus far
       section: the section name to add
       section_name: an optional name, added as a level

    '''

    if section is None:
        bot.error(
            'You must define a section (e.g. %header) before any action.')
        sys.exit(1)

    if section == 'filter' and section_name is None:
        bot.error("You must provide a name for a filter section.")
        sys.exit(1)

    if section not in sections:
        bot.error("%s is not a valid section." % section)
        sys.exit(1)

    if section not in config:

        # If a section is named, we have more one level (dict)
        if section_name is not None:
            config[section] = OrderedDict()
            config[section][section_name] = []
            bot.debug("Adding section %s %s" % (section, section_name))
        else:
            config[section] = []
            bot.debug("Adding section %s" % section)
        return config

    # Section is in config
    if section_name is not None and section_name not in config[section]:
        config[section][section_name] = []

    return config
Example #15
0
def get_files(contenders, check=True, pattern=None, force=False, tempdir=None):
    """get_files will take a list of single dicom files or directories,
    and return a generator that yields complete paths to all files

    Parameters
    ==========
    contenders: a list of files or directories (contenders!)
    check: boolean to indicate if we should validate dicoms (default True)
    pattern: A pattern to use with fnmatch. If None, * is used
    force: force reading of the files, if some headers invalid.
           Not recommended, as many non-dicom will come through

    """
    if not isinstance(contenders, list):
        contenders = [contenders]

    for contender in contenders:
        if os.path.isdir(contender):
            dicom_files = recursive_find(contender, pattern=pattern)
        else:
            dicom_files = [contender]

        for dicom_file in dicom_files:
            dfile, dextension = os.path.splitext(dicom_file)
            # The code currently only assumes a single-file per zip.  This could be
            # expanded to allow for multiple test files within an archive.
            if dextension == ".zip":
                with zipfile.ZipFile(dicom_file, "r") as compressedFile:
                    compressedFile.extractall(tempdir)
                    dicom_file = next(
                        os.path.join(tempdir, f) for f in os.listdir(tempdir)
                        if os.path.isfile(os.path.join(tempdir, f)))

            if dicom_file is not None:
                if check:
                    validated_files = validate_dicoms(dicom_file, force=force)
                else:
                    validated_files = [dicom_file]

                for validated_file in validated_files:
                    bot.debug("Found contender file %s" % (validated_file))
                    yield validated_file
Example #16
0
    def _get_clean_name(self, output_folder, extension="dcm"):
        """return a full path to an output file, with custom folder and
        extension. If the output folder isn't yet created, make it.

        Parameters
        ==========
        output_folder: the output folder to create, will be created if doesn't
        exist.
        extension: the extension of the file to create a name for, should
        not start with "."
        """
        if output_folder is None:
            output_folder = self.output_folder

        if not os.path.exists(output_folder):
            bot.debug("Creating output folder %s" % output_folder)
            os.makedirs(output_folder)

        basename = re.sub("[.]dicom|[.]dcm", "", os.path.basename(self.dicom_file))
        return "%s/cleaned-%s.%s" % (output_folder, basename, extension)
Example #17
0
def get_deid(tag=None, exit_on_fail=True, quiet=False, load=False):
    '''get deid is intended to retrieve the full path of a deid file provided with
       the software, based on a tag. For example, under deid/data if a file is called
       "deid.dicom", the tag would be "dicom". 

       Parameters
       ==========
       tag: the text that comes after deid to indicate the tag of the file in deid/data
       exit_on_fail: if None is an acceptable return value, this should be set to False
                     (default is True).
       quiet: Default False. If None is acceptable, quiet can be set to True
       load: also load the deid, if resulting path (from path or tag) is not None

    '''
    # no tag/path means load default
    if tag is None:
        tag = 'dicom'

    # If it's already loaded
    if isinstance(tag, dict):
        bot.debug('deid is already loaded.')
        return tag

    # If it's a path, get full path
    if os.path.exists(tag):
        deid = os.path.abspath(tag)
    else:
        deid = "%s/deid.%s" % (data_base, tag)

    if not os.path.exists(deid):
        if quiet is False:
            bot.error("Cannot find %s" % (deid))
        if exit_on_fail is True:
            sys.exit(1)
        else:
            return None

    if load is True:
        return load_deid(deid)

    return deid
Example #18
0
def get_private(dicom):
    '''get private tags
    '''
    datasets = [dicom]
    private_tags = []
    while len(datasets) > 0:
        ds = datasets.pop(0)
        taglist = sorted(ds.keys())
        for tag in taglist:
            with tag_in_exception(tag):
                if tag in ds:
                    try:
                        data_element = ds[tag]
                        if data_element.tag.is_private:
                            bot.debug(data_element.name)
                            private_tags.append(data_element)
                            if tag in ds and data_element.VR == "SQ":
                                sequence = data_element.value
                                for dataset in sequence:
                                    datasets.append(dataset)
                    except IndexError:
                        bot.debug("tag %s key present without value" % tag)
                    except NotImplementedError:
                        bot.debug('tag %s is invalid, skipping' % tag)
    return private_tags
Example #19
0
def validate_dicoms(dcm_files, force=False):
    '''validate dicoms will test opening one or more dicom files, and return a list
    of valid files.
    :param dcm_files: one or more dicom files to test'''
    if not isinstance(dcm_files, list):
        dcm_files = [dcm_files]

    valids = []

    bot.debug("Checking %s dicom files for validation." % (len(dcm_files)))
    for dcm_file in dcm_files:

        try:
            with open(dcm_file, 'rb') as filey:
                dataset = read_file(filey, force=force)
            valids.append(dcm_file)
        except:
            bot.warning(
                'Cannot read input file {0!s}, skipping.'.format(dcm_file))

    bot.info("Found %s valid dicom files" % (len(valids)))
    return valids
Example #20
0
def extract_values_list(dicom, actions, fields=None):
    """Given a list of actions for a named group (a list) extract values from
    the dicom based on the list of actions provided. This function
    always returns a list intended to update some lookup to be used
    to further process the dicom.
    """
    values = set()

    # The function can be provided fields to save re-parsing
    if not fields:
        fields = get_fields(dicom)

    for action in actions:

        # Extract some subset of fields based on action
        subset = expand_field_expression(
            field=action["field"], dicom=dicom, contenders=fields
        )

        # Just grab the entire value string for a field, no parsing
        if action["action"] == "FIELD":
            for uid, field in subset.items():
                if field.element.value not in ["", None]:
                    values.add(field.element.value)

        # Split action, can optionally have a "by" and/or minlength parameter
        elif action["action"] == "SPLIT":

            # Default values for split are length 1 and character empty space
            bot.debug("Parsing action %s" % action)
            split_by = " "
            minlength = 1

            if "value" in action:
                for param in action["value"].split(";"):
                    param_name, param_val = param.split("=")
                    param_name = param_name.strip()
                    param_val = param_val.strip()

                    # Set a custom parameter legnth
                    if param_name == "minlength":
                        minlength = int(param_val)
                        bot.debug("Minimum length set to %s" % minlength)
                    elif param_name == "by":
                        split_by = param_val.strip("'").strip('"')
                        bot.debug("Splitting value set to %s" % split_by)

            for uid, field in subset.items():
                new_values = (str(field.element.value) or "").split(split_by)
                for new_value in new_values:
                    if len(new_value) >= minlength:
                        values.add(new_value)

        else:
            bot.warning(
                "Unrecognized action %s for values list extraction." % action["action"]
            )

    return list(values)
Example #21
0
def get_files(contenders, check=True, pattern=None, force=False):
    '''get_dcm_files will take a list of single dicom files or directories,
    and return a single list of complete paths to all files
    :param pattern: A pattern to use with fnmatch. If None, * is used
    :param force: force reading of the files, if some headers invalid.
    Not recommended, as many non-dicom will come through
    '''
    if not isinstance(contenders, list):
        contenders = [contenders]

    dcm_files = []
    for contender in contenders:
        if os.path.isdir(contender):
            dicom_dir = recursive_find(contender, pattern=pattern)
            bot.debug("Found %s contender files in %s" %
                      (len(dicom_dir), os.path.basename(contender)))
            dcm_files.extend(dicom_dir)
        else:
            bot.debug("Adding single contender file %s" % (contender))
            dcm_files.append(contender)

    if check:
        dcm_files = validate_dicoms(dcm_files, force=force)
    return dcm_files
Example #22
0
def parse_config_action(section, line, config, section_name=None):
    """add action will take a line from a deid config file, a config (dictionary), and
    an active section name (eg header) and add an entry to the config file to perform
    the action.

    Parameters
    =========
    section: a valid section name from the deid config file
    line: the line content to parse for the section/action
    config: the growing/current config dictionary
    section_name: optionally, a section name

    """
    if not line.upper().startswith(actions):
        bot.exit("%s is not a valid action line." % line)

    # We may have to deal with cases of spaces
    parts = line.split(" ")
    action = parts.pop(0).replace(" ", "")

    # What field is the action for?
    if len(parts) < 1:
        bot.exit("%s requires a FIELD value, but not found." % action)

    field = parts.pop(0)

    # Actions that require a value
    if action in ["ADD", "REPLACE", "JITTER"]:
        if len(parts) == 0:
            bot.exit("%s requires a VALUE, but not found" % action)

        value = _remove_comments(parts)
        bot.debug("%s: adding %s" % (section, line))
        config[section].append({"action": action, "field": field, "value": value})

    # Actions that can optionally have a value
    elif action in ["REMOVE"]:
        bot.debug("%s: adding %s" % (section, line))

        # Case 1: removing without any criteria
        if len(parts) == 0:
            config[section].append({"action": action, "field": field})

        # Case 2: REMOVE can have a func:is_thing to return boolean
        else:
            value = _remove_comments(parts)
            config[section].append({"action": action, "field": field, "value": value})

    # Actions that don't require a value
    elif action in ["BLANK", "KEEP"]:
        bot.debug("%s: adding %s" % (section, line))
        config[section].append({"action": action, "field": field})

    return config
Example #23
0
def load_deid(path=None):
    '''load_deid will return a loaded in (user) deid configuration file
    that can be used to update a default config.json. If a file path is
    specified, it is loaded directly. If a folder is specified, we look
    for a deid file in the folder. If nothing is specified, we assume
    the user wants to load a deid file in the present working directory.
    If the user wants to have multiple deid files in a directory, this
    can be done with an extension that specifies the module, eg;
   
             deid.dicom
             deid.nifti

    Parameters
    ==========
    path: a path to a deid file

    Returns
    =======
    config: a parsed deid (dictionary) with valid sections

    '''
    path = find_deid(path)

    # Read in spec, clean up extra spaces and newlines
    spec = [
        x.strip('\n').strip(' ') for x in read_file(path)
        if x.strip('\n').strip(' ') not in ['']
    ]

    spec = [x for x in spec if x not in ['', None]]
    config = OrderedDict()
    section = None

    while len(spec) > 0:

        # Clean up white trailing/leading space
        line = spec.pop(0).strip()

        # Comment
        if line.startswith("#"):
            continue

        # Starts with Format?
        elif bool(re.match('format', line, re.I)):
            fmt = re.sub('FORMAT|(\s+)', '', line).lower()
            if fmt not in formats:
                bot.error("%s is not a valid format." % fmt)
                sys.exit(1)
            # Set format
            config['format'] = fmt
            bot.debug("FORMAT set to %s" % fmt)

        # A new section?
        elif line.startswith('%'):

            # Remove any comments
            line = line.split('#', 1)[0].strip()

            # Is there a section name?
            section_name = None
            parts = line.split(' ')
            if len(parts) > 1:
                section_name = ' '.join(parts[1:])
            section = re.sub('[%]|(\s+)', '', parts[0]).lower()
            if section not in sections:
                bot.error("%s is not a valid section." % section)
                sys.exit(1)
            config = add_section(config=config,
                                 section=section,
                                 section_name=section_name)

        # An action (replace, blank, remove, keep, jitter)
        elif line.upper().startswith(actions):

            # Start of a filter group
            if line.upper().startswith('LABEL') and section == "filter":
                members = []
                keep_going = True
                while keep_going is True:
                    next_line = spec[0]
                    if next_line.upper().strip().startswith('LABEL'):
                        keep_going = False
                    elif next_line.upper().strip().startswith("%"):
                        keep_going = False
                    else:
                        new_member = spec.pop(0)
                        members.append(new_member)
                    if len(spec) == 0:
                        keep_going = False

                # Add the filter label to the config
                config = parse_label(config=config,
                                     section=section,
                                     label=line,
                                     section_name=section_name,
                                     members=members)
            # Parse the action
            else:
                config = parse_action(section=section,
                                      section_name=section_name,
                                      line=line,
                                      config=config)
        else:
            bot.debug("%s not recognized to be in valid format, skipping." %
                      line)
    return config
Example #24
0
def get_shared_identifiers(dicom_files,
                           force=True,
                           config=None,
                           aggregate=None,
                           expand_sequences=True):
    """

    extract shared identifiers across a set of dicom files, intended for
    cases when a set of images (dicom) are being compressed into one file
    and the file (still) should have some searchable metadata. By default,
    we remove fields that differ between files. To aggregate unique, define
    a list of aggregate fields (aggregate).

    """

    bot.debug("Extracting shared identifiers for %s dicom" %
              (len(dicom_files)))

    if aggregate is None:
        aggregate = []

    if config is None:
        config = "%s/config.json" % (here)

    if not os.path.exists(config):
        bot.error("Cannot find config %s, exiting" % (config))
    config = read_json(config, ordered_dict=True)["get"]

    if not isinstance(dicom_files, list):
        dicom_files = [dicom_files]
    ids = dict()  # identifiers

    # We will skip PixelData
    skip = config["skip"]
    for dicom_file in dicom_files:

        dicom = read_file(dicom_file, force=True)

        # Get list of fields, expanded sequences are flattened
        fields = get_fields(dicom,
                            skip=skip,
                            expand_sequences=expand_sequences)

        for key, val in fields.items():

            # If it's there, only keep if the same
            if key in ids:

                # Items to aggregate are appended, not removed
                if key in aggregate:
                    if val not in ids[key]:
                        ids[key].append(val)
                else:

                    # Keep only if equal between
                    if ids[key] == val:
                        continue
                    else:
                        del ids[key]
                        skip.append(key)
            else:
                if key in aggregate:
                    val = [val]
                ids[key] = val

    # For any aggregates that are one item, unwrap again
    for field in aggregate:
        if field in ids:
            if len(ids[field]) == 1:
                ids[field] = ids[field][0]

    return ids