Ejemplo n.º 1
0
def section_parent_page(sect_div):
    parent_section_id = sect_div.parent.div.div.div.find('a').get('id')
    sections = parent_section_id.split('.')
    try:
        cutoff_index = sections.index('1')
        return '.'.join(sections[0:cutoff_index])
    except ValueError:
        return parent_section_id


def get_refs_from_pairs(pairs):
    refs_to_record = set()
    for pair in pairs:
        ref_page_id_pairs = map(get_location_from_ref,
                                pair['externalReferences'])
        for ref in ref_page_id_pairs:
            refs_to_record.add(ref)
    return refs_to_record


def get_location_from_ref(ref):
    return tuple(pl.get_short_html_location(ref['sourceUrl']).split('/'))


if __name__ == '__main__':
    module_attr_pairs = pl.read_json_to_dict(sys.argv[1])
    section_listing = pl.read_json_to_dict(sys.argv[2])
    references = find_reference_html_in_sections(module_attr_pairs,
                                                 section_listing)
    pl.write_pretty_json(references)
Ejemplo n.º 2
0
    parsed_description = BeautifulSoup(pair['description'], 'html.parser')
    references = get_valid_reference_anchors(parsed_description)
    external_references = list(map(reference_structure_from_anchor,
                                   references))
    for ref in references:
        mark_as_recorded(ref)
    pair['externalReferences'] = [] if len(
        external_references) < 1 else external_references
    pair['description'] = str(parsed_description)
    finalize_descriptions(pair)
    return pair


def finalize_descriptions(pair):
    pair['description'] = pl.clean_html(pair['description'])


def reference_structure_from_anchor(reference):
    return {"sourceUrl": reference.get('href'), "title": reference.get_text()}


def mark_as_recorded(anchor):
    anchor['href'] = ''
    anchor.name = 'span'


if __name__ == '__main__':
    pairs = pl.read_json_data(sys.argv[1])
    updated_pairs = record_references_inside_pairs(pairs)
    pl.write_pretty_json(updated_pairs)
        set([
            rel['ciodId'] for rel in ciod_to_macro
            if rel['moduleType'] == 'Multi-frame'
        ]))
    ciod_specific_attrs += process_fg_attributes(module_to_attr,
                                                 ciods_with_mffg_macros,
                                                 MF_FUNC_GROUP_MODULE_ID)
    ciods_with_cffg_macros = list(
        set([
            rel['ciodId'] for rel in ciod_to_macro
            if rel['moduleType'] == 'Current Frame'
        ]))
    ciod_specific_attrs += process_fg_attributes(module_to_attr,
                                                 ciods_with_cffg_macros,
                                                 CF_FUNC_GROUP_MODULE_ID)
    return ciod_specific_attrs


if __name__ == '__main__':
    module_to_attributes = pl.read_json_data(sys.argv[1])
    macros = pl.read_json_data(sys.argv[2])
    ciod_to_macro = pl.read_json_data(sys.argv[3])
    macro_to_attributes = pl.read_json_data(sys.argv[4])
    new_attributes = process_ciod_specific_attributes(module_to_attributes,
                                                      macros, ciod_to_macro,
                                                      macro_to_attributes)
    sorted_modules_to_attributes = sorted(module_to_attributes +
                                          new_attributes,
                                          key=itemgetter('path'))
    pl.write_pretty_json(sorted_modules_to_attributes)
Ejemplo n.º 4
0
def expand_conditional_statement(usage_field_html):
    usage_field = process_usage_html(usage_field_html)
    conditional_statement = extract_conditional_statement(usage_field)
    usage = usage_field[0]
    return usage, conditional_statement


def process_usage_html(usage_field_html):
    usage_field = pl.text_from_html_string(usage_field_html)
    processed_usage_field = usage_field.strip()
    if len(processed_usage_field) == 0:
        raise Exception('Empty module usage field')
    return processed_usage_field


def extract_conditional_statement(usage_field):
    if usage_field.startswith('C - '):
        conditional_statement = usage_field[4:].strip()
    elif usage_field.startswith('C') and len(usage_field) > 1:
        conditional_statement = usage_field[1:].strip()
    else:
        conditional_statement = None
    return conditional_statement


if __name__ == '__main__':
    ciod_module_list = pl.read_json_data(sys.argv[1])
    ciod_module_relationships = define_all_relationships(ciod_module_list)
    pl.write_pretty_json(ciod_module_relationships)
Ejemplo n.º 5
0

def get_table_and_tdiv(
        standard: BeautifulSoup) -> Tuple[List[TableDictType], Tag]:
    all_tables = standard.find_all('div', class_='table')
    html_table = pl.find_tdiv_by_id(all_tables, TABLE_ID)
    list_table = attribute_table_to_list(html_table)
    table_dict_list = table_to_dict(list_table, COLUMN_TITLES)
    return (table_dict_list, html_table)


def generate_ciod_id(name: str) -> str:
    cleaned_name = name.split('IOD')[0].strip()
    return IOD_ABBREVIATIONS.get(cleaned_name, cleaned_name)


def table_to_json(table: List[TableDictType],
                  tdiv: Tag) -> List[TableDictType]:
    attributes = []
    for row in table:
        row['ciod'] = generate_ciod_id(row['ciod'])
        attributes.append(row)
    return attributes


if __name__ == '__main__':
    standard = pl.parse_html_file(sys.argv[1])
    table, tdiv = get_table_and_tdiv(standard)
    parsed_table_data = table_to_json(table, tdiv)
    pl.write_pretty_json(parsed_table_data)
Ejemplo n.º 6
0
                         pl.read_json_data(sys.argv[2]))
    modules = modules_from_tables(module_attr_tables)
    ciods_with_mffg_macros = list(
        set([
            rel['ciodId'] for rel in ciod_to_macro
            if rel['moduleType'] == 'Multi-frame'
        ]))
    ciods_with_cffg_macros = list(
        set([
            rel['ciodId'] for rel in ciod_to_macro
            if rel['moduleType'] == 'Current Frame'
        ]))
    multi_frame_func_group_module = next(
        filter(lambda rel: rel['id'] == MF_FUNC_GROUP_MODULE_ID, modules),
        None)
    assert multi_frame_func_group_module is not None, f'Module ID "{MF_FUNC_GROUP_MODULE_ID}" not found'
    current_frame_func_group_module = next(
        filter(lambda rel: rel['id'] == CF_FUNC_GROUP_MODULE_ID, modules),
        None)
    assert current_frame_func_group_module is not None, f'Module ID "{CF_FUNC_GROUP_MODULE_ID}" not found'
    ciod_specific_mffg_modules = create_ciod_specific_modules(
        ciods_with_mffg_macros, multi_frame_func_group_module,
        MF_FUNC_GROUP_MODULE_ID)
    ciod_specific_cffg_modules = create_ciod_specific_modules(
        ciods_with_cffg_macros, current_frame_func_group_module,
        CF_FUNC_GROUP_MODULE_ID)
    sorted_modules = sorted(modules + ciod_specific_mffg_modules +
                            ciod_specific_cffg_modules,
                            key=itemgetter('id'))
    pl.write_pretty_json(sorted_modules)
# Standard workaround: Remove "Macro" from "Frame VOI LUT With LUT Macro" in Table A.84.3.2-1
# http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.84.3.2.html#table_A.84.3.2-1
def clean_macro_name(text):
    return re.sub(' Macro', '', text).strip()


def define_all_relationships(ciod_macro_list):
    all_relationships = []
    for table in ciod_macro_list:
        ciod = table['name']
        macros = table['macros']
        all_relationships.extend([define_ciod_macro_relationship(ciod, macro)
                                  for macro in macros])
    return all_relationships


def define_ciod_macro_relationship(ciod, macro):
    usage, conditional_statement = expand_conditional_statement(macro['usage'])
    return {
        "ciodId": pl.create_slug(ciod),
        "macroId": pl.create_slug(clean_macro_name(pl.text_from_html_string(macro['macro']))),
        "usage": usage,
        "conditionalStatement": conditional_statement
    }


if __name__ == '__main__':
    ciod_macro_list = pl.read_json_data(sys.argv[1])
    ciod_macro_relationships = define_all_relationships(ciod_macro_list)
    pl.write_pretty_json(ciod_macro_relationships)
Ejemplo n.º 8
0
    try:
        cutoff_index = sections.index('1')
        return '.'.join(sections[0:cutoff_index])
    except ValueError:
        return parent_section_id


def get_refs_from_pairs(pairs):
    refs_to_record = set()
    for pair in pairs:
        ref_page_id_pairs = map(get_location_from_ref,
                                pair['externalReferences'])
        for ref in ref_page_id_pairs:
            refs_to_record.add(ref)
    return refs_to_record


def get_location_from_ref(ref):
    return tuple(pl.get_short_html_location(ref['sourceUrl']).split('/'))


if __name__ == '__main__':
    module_attr_pairs = cast(List[MetadataTableType],
                             pl.read_json_data(sys.argv[1]))
    macro_attr_pairs = cast(List[MetadataTableType],
                            pl.read_json_data(sys.argv[2]))
    section_listing = pl.read_json_data(sys.argv[3])
    references = find_reference_html_in_sections(
        module_attr_pairs + macro_attr_pairs, section_listing)
    pl.write_pretty_json({r[0]: r[1] for r in sorted(references.items())})
from typing import cast, List
import sys

from dicom_standard import parse_lib as pl
from dicom_standard.macro_utils import MetadataTableType
from dicom_standard.preprocess_modules_with_attributes import (
    key_tables_by_id,
    expand_all_macros,
    preprocess_attribute_fields,
    expand_hierarchy,
)

if __name__ == '__main__':
    module_macro_attr_tables = cast(List[MetadataTableType],
                                    pl.read_json_data(sys.argv[1]))
    id_to_table = key_tables_by_id(module_macro_attr_tables)
    macro_attr_tables = [
        table for table in module_macro_attr_tables if table['isMacro']
    ]
    expanded_tables = expand_all_macros(macro_attr_tables, id_to_table)
    preprocessed_tables = preprocess_attribute_fields(expanded_tables)
    tables_with_hierarchy = expand_hierarchy(preprocessed_tables)
    pl.write_pretty_json(tables_with_hierarchy)
Ejemplo n.º 10
0
        for section in all_sections
    }


def enclosing_section_from_id(id_div):
    # TODO: put example from the standard here
    if re.match(r'sect.*', id_div['id']):
        return id_div.parent.parent.parent.parent.parent
    elif re.match(r'biblio.*', id_div['id']):
        return id_div.parent.parent.parent
    elif re.match(r'table.*', id_div['id']):
        return id_div.parent.parent
    elif re.match(r'note.*', id_div['id']):
        return id_div.parent.parent.parent.parent.parent.parent
    else:
        return id_div.parent.parent


if __name__ == '__main__':
    # TODO: figure out a way to speed up the parsing; since we only need a
    # small portion of the parse tree, we may be able to use:
    # https://docs.python.org/3/library/html.parser.html to avoid building the
    # full parse tree.
    standard = {os.path.basename(f): parse_html_file(f) for f in sys.argv[1:]}
    section_ids = extract_section_ids(standard)
    sections = {
        page: normalize_sections(html)
        for page, html in section_ids.items()
    }
    write_pretty_json(sections)
Ejemplo n.º 11
0
'''
Convert the processed macro-attribute JSON data into a
normalized listing of all macros in the DICOM Standard.
'''
import sys

from dicom_standard import parse_lib as pl

from dicom_standard.process_modules import modules_from_tables

if __name__ == '__main__':
    macro_attr_tables = pl.read_json_data(sys.argv[1])
    macros = modules_from_tables(macro_attr_tables)
    pl.write_pretty_json(macros)
Ejemplo n.º 12
0
    duplicate_paths = [k for k, v in Counter(path_list).items() if v > 1]
    path_to_node = {}
    for node in node_list:
        path = node['path']
        if path in path_to_node:
            # Standard workaround: Catch inconsistency in Table C.36.8-1 where "Content Creator's Name" attribute
            # appears twice in same hierarchy without a conditional (once in Table C.36.8-1 and once in included Table 10.9.2-1)
            # http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_C.36.8.html#table_C.36.8-1
            # http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_10.9.2.html#table_10.9.2-1
            if path not in DUPLICATE_PATH_EXCEPTIONS:
                # Add conditional to description only if the duplicates do not have identical descriptions
                if is_duplicate_node(path, node_list):
                    add_conditional_to_description(node)
                    path_to_node[path]['description'] += node['description']
                    path_to_node[path]['externalReferences'].extend(
                        node['externalReferences'])
        else:
            if path in duplicate_paths and path not in DUPLICATE_PATH_EXCEPTIONS:
                # Add conditional to description only if the duplicates do not have identical descriptions
                if is_duplicate_node(path, node_list):
                    add_conditional_to_description(node)
            path_to_node[path] = node
        path_to_node[path].pop('conditional', None)
    return list(path_to_node.values())


if __name__ == "__main__":
    node_list = pl.read_json_data(sys.argv[1])
    processed_node_list = merge_duplicate_nodes(node_list)
    pl.write_pretty_json(processed_node_list)
Ejemplo n.º 13
0
        for attribute in module['attributes']:
            entries.append({
                'module':
                module['id'],
                'path':
                attribute['id'],
                'tag':
                attribute['tag'],
                'type':
                attribute['type'],
                'linkToStandard':
                get_standard_link(module, attribute),
                'description':
                attribute['description']
            })
    return entries


def get_standard_link(module, attribute):
    if 'linkToStandard' not in attribute.keys():
        return module['linkToStandard']
    else:
        return attribute['linkToStandard']


if __name__ == "__main__":
    module_attr_list = pl.read_json_to_dict(sys.argv[1])
    module_attr_relationship_list = module_attr_relationship_table(
        module_attr_list)
    pl.write_pretty_json(module_attr_relationship_list)
Ejemplo n.º 14
0
'''
Takes the extracted CIOD information and processes it to produce a
dictionary of all CIODs in the DICOM Standard.
'''
import sys

from dicom_standard import parse_lib as pl


def ciods_from_extracted_list(ciod_module_list):
    ciods = []
    for ciod in ciod_module_list:
        ciod['description'] = pl.clean_html(ciod['description'])
        ciod.pop('modules', None)
        ciods.append(ciod)
    return ciods


if __name__ == '__main__':
    ciod_module_list = pl.read_json_data(sys.argv[1])
    ciods = ciods_from_extracted_list(ciod_module_list)
    pl.write_pretty_json(ciods)
Ejemplo n.º 15
0

def macro_attr_relationship_table(macro_attr_list):
    entries = []
    for macro in macro_attr_list:
        for attribute in macro['attributes']:
            entries.append({
                'macroId':
                macro['id'],
                'path':
                attribute['id'],
                'tag':
                attribute['tag'],
                'type':
                attribute['type'],
                'linkToStandard':
                get_standard_link(macro, attribute),
                'description':
                attribute['description'],
                'conditional':
                attribute.get('conditional'),
            })
    return entries


if __name__ == "__main__":
    macro_attr_list = pl.read_json_data(sys.argv[1])
    macro_attr_relationship_list = macro_attr_relationship_table(
        macro_attr_list)
    pl.write_pretty_json(macro_attr_relationship_list)
Ejemplo n.º 16
0
'''
Convert the processed module-attribute JSON data into a
normalized listing of all modules in the DICOM Standard.
'''
import sys

from dicom_standard import parse_lib as pl


def modules_from_tables(tables):
    modules = {}
    for module in tables:
        modules[module['id']] = {
            'id': module['id'],
            'name': module['name'],
            'description': pl.clean_html(module['description']),
            'linkToStandard': module['linkToStandard']
        }
    return modules


if __name__ == '__main__':
    module_attr_tables = pl.read_json_to_dict(sys.argv[1])
    modules = modules_from_tables(module_attr_tables)
    pl.write_pretty_json(modules)