def section_parent_page(sect_div): parent_section_id = sect_div.parent.div.div.div.find('a').get('id') sections = parent_section_id.split('.') try: cutoff_index = sections.index('1') return '.'.join(sections[0:cutoff_index]) except ValueError: return parent_section_id def get_refs_from_pairs(pairs): refs_to_record = set() for pair in pairs: ref_page_id_pairs = map(get_location_from_ref, pair['externalReferences']) for ref in ref_page_id_pairs: refs_to_record.add(ref) return refs_to_record def get_location_from_ref(ref): return tuple(pl.get_short_html_location(ref['sourceUrl']).split('/')) if __name__ == '__main__': module_attr_pairs = pl.read_json_to_dict(sys.argv[1]) section_listing = pl.read_json_to_dict(sys.argv[2]) references = find_reference_html_in_sections(module_attr_pairs, section_listing) pl.write_pretty_json(references)
parsed_description = BeautifulSoup(pair['description'], 'html.parser') references = get_valid_reference_anchors(parsed_description) external_references = list(map(reference_structure_from_anchor, references)) for ref in references: mark_as_recorded(ref) pair['externalReferences'] = [] if len( external_references) < 1 else external_references pair['description'] = str(parsed_description) finalize_descriptions(pair) return pair def finalize_descriptions(pair): pair['description'] = pl.clean_html(pair['description']) def reference_structure_from_anchor(reference): return {"sourceUrl": reference.get('href'), "title": reference.get_text()} def mark_as_recorded(anchor): anchor['href'] = '' anchor.name = 'span' if __name__ == '__main__': pairs = pl.read_json_data(sys.argv[1]) updated_pairs = record_references_inside_pairs(pairs) pl.write_pretty_json(updated_pairs)
set([ rel['ciodId'] for rel in ciod_to_macro if rel['moduleType'] == 'Multi-frame' ])) ciod_specific_attrs += process_fg_attributes(module_to_attr, ciods_with_mffg_macros, MF_FUNC_GROUP_MODULE_ID) ciods_with_cffg_macros = list( set([ rel['ciodId'] for rel in ciod_to_macro if rel['moduleType'] == 'Current Frame' ])) ciod_specific_attrs += process_fg_attributes(module_to_attr, ciods_with_cffg_macros, CF_FUNC_GROUP_MODULE_ID) return ciod_specific_attrs if __name__ == '__main__': module_to_attributes = pl.read_json_data(sys.argv[1]) macros = pl.read_json_data(sys.argv[2]) ciod_to_macro = pl.read_json_data(sys.argv[3]) macro_to_attributes = pl.read_json_data(sys.argv[4]) new_attributes = process_ciod_specific_attributes(module_to_attributes, macros, ciod_to_macro, macro_to_attributes) sorted_modules_to_attributes = sorted(module_to_attributes + new_attributes, key=itemgetter('path')) pl.write_pretty_json(sorted_modules_to_attributes)
def expand_conditional_statement(usage_field_html): usage_field = process_usage_html(usage_field_html) conditional_statement = extract_conditional_statement(usage_field) usage = usage_field[0] return usage, conditional_statement def process_usage_html(usage_field_html): usage_field = pl.text_from_html_string(usage_field_html) processed_usage_field = usage_field.strip() if len(processed_usage_field) == 0: raise Exception('Empty module usage field') return processed_usage_field def extract_conditional_statement(usage_field): if usage_field.startswith('C - '): conditional_statement = usage_field[4:].strip() elif usage_field.startswith('C') and len(usage_field) > 1: conditional_statement = usage_field[1:].strip() else: conditional_statement = None return conditional_statement if __name__ == '__main__': ciod_module_list = pl.read_json_data(sys.argv[1]) ciod_module_relationships = define_all_relationships(ciod_module_list) pl.write_pretty_json(ciod_module_relationships)
def get_table_and_tdiv( standard: BeautifulSoup) -> Tuple[List[TableDictType], Tag]: all_tables = standard.find_all('div', class_='table') html_table = pl.find_tdiv_by_id(all_tables, TABLE_ID) list_table = attribute_table_to_list(html_table) table_dict_list = table_to_dict(list_table, COLUMN_TITLES) return (table_dict_list, html_table) def generate_ciod_id(name: str) -> str: cleaned_name = name.split('IOD')[0].strip() return IOD_ABBREVIATIONS.get(cleaned_name, cleaned_name) def table_to_json(table: List[TableDictType], tdiv: Tag) -> List[TableDictType]: attributes = [] for row in table: row['ciod'] = generate_ciod_id(row['ciod']) attributes.append(row) return attributes if __name__ == '__main__': standard = pl.parse_html_file(sys.argv[1]) table, tdiv = get_table_and_tdiv(standard) parsed_table_data = table_to_json(table, tdiv) pl.write_pretty_json(parsed_table_data)
pl.read_json_data(sys.argv[2])) modules = modules_from_tables(module_attr_tables) ciods_with_mffg_macros = list( set([ rel['ciodId'] for rel in ciod_to_macro if rel['moduleType'] == 'Multi-frame' ])) ciods_with_cffg_macros = list( set([ rel['ciodId'] for rel in ciod_to_macro if rel['moduleType'] == 'Current Frame' ])) multi_frame_func_group_module = next( filter(lambda rel: rel['id'] == MF_FUNC_GROUP_MODULE_ID, modules), None) assert multi_frame_func_group_module is not None, f'Module ID "{MF_FUNC_GROUP_MODULE_ID}" not found' current_frame_func_group_module = next( filter(lambda rel: rel['id'] == CF_FUNC_GROUP_MODULE_ID, modules), None) assert current_frame_func_group_module is not None, f'Module ID "{CF_FUNC_GROUP_MODULE_ID}" not found' ciod_specific_mffg_modules = create_ciod_specific_modules( ciods_with_mffg_macros, multi_frame_func_group_module, MF_FUNC_GROUP_MODULE_ID) ciod_specific_cffg_modules = create_ciod_specific_modules( ciods_with_cffg_macros, current_frame_func_group_module, CF_FUNC_GROUP_MODULE_ID) sorted_modules = sorted(modules + ciod_specific_mffg_modules + ciod_specific_cffg_modules, key=itemgetter('id')) pl.write_pretty_json(sorted_modules)
# Standard workaround: Remove "Macro" from "Frame VOI LUT With LUT Macro" in Table A.84.3.2-1 # http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.84.3.2.html#table_A.84.3.2-1 def clean_macro_name(text): return re.sub(' Macro', '', text).strip() def define_all_relationships(ciod_macro_list): all_relationships = [] for table in ciod_macro_list: ciod = table['name'] macros = table['macros'] all_relationships.extend([define_ciod_macro_relationship(ciod, macro) for macro in macros]) return all_relationships def define_ciod_macro_relationship(ciod, macro): usage, conditional_statement = expand_conditional_statement(macro['usage']) return { "ciodId": pl.create_slug(ciod), "macroId": pl.create_slug(clean_macro_name(pl.text_from_html_string(macro['macro']))), "usage": usage, "conditionalStatement": conditional_statement } if __name__ == '__main__': ciod_macro_list = pl.read_json_data(sys.argv[1]) ciod_macro_relationships = define_all_relationships(ciod_macro_list) pl.write_pretty_json(ciod_macro_relationships)
try: cutoff_index = sections.index('1') return '.'.join(sections[0:cutoff_index]) except ValueError: return parent_section_id def get_refs_from_pairs(pairs): refs_to_record = set() for pair in pairs: ref_page_id_pairs = map(get_location_from_ref, pair['externalReferences']) for ref in ref_page_id_pairs: refs_to_record.add(ref) return refs_to_record def get_location_from_ref(ref): return tuple(pl.get_short_html_location(ref['sourceUrl']).split('/')) if __name__ == '__main__': module_attr_pairs = cast(List[MetadataTableType], pl.read_json_data(sys.argv[1])) macro_attr_pairs = cast(List[MetadataTableType], pl.read_json_data(sys.argv[2])) section_listing = pl.read_json_data(sys.argv[3]) references = find_reference_html_in_sections( module_attr_pairs + macro_attr_pairs, section_listing) pl.write_pretty_json({r[0]: r[1] for r in sorted(references.items())})
from typing import cast, List import sys from dicom_standard import parse_lib as pl from dicom_standard.macro_utils import MetadataTableType from dicom_standard.preprocess_modules_with_attributes import ( key_tables_by_id, expand_all_macros, preprocess_attribute_fields, expand_hierarchy, ) if __name__ == '__main__': module_macro_attr_tables = cast(List[MetadataTableType], pl.read_json_data(sys.argv[1])) id_to_table = key_tables_by_id(module_macro_attr_tables) macro_attr_tables = [ table for table in module_macro_attr_tables if table['isMacro'] ] expanded_tables = expand_all_macros(macro_attr_tables, id_to_table) preprocessed_tables = preprocess_attribute_fields(expanded_tables) tables_with_hierarchy = expand_hierarchy(preprocessed_tables) pl.write_pretty_json(tables_with_hierarchy)
for section in all_sections } def enclosing_section_from_id(id_div): # TODO: put example from the standard here if re.match(r'sect.*', id_div['id']): return id_div.parent.parent.parent.parent.parent elif re.match(r'biblio.*', id_div['id']): return id_div.parent.parent.parent elif re.match(r'table.*', id_div['id']): return id_div.parent.parent elif re.match(r'note.*', id_div['id']): return id_div.parent.parent.parent.parent.parent.parent else: return id_div.parent.parent if __name__ == '__main__': # TODO: figure out a way to speed up the parsing; since we only need a # small portion of the parse tree, we may be able to use: # https://docs.python.org/3/library/html.parser.html to avoid building the # full parse tree. standard = {os.path.basename(f): parse_html_file(f) for f in sys.argv[1:]} section_ids = extract_section_ids(standard) sections = { page: normalize_sections(html) for page, html in section_ids.items() } write_pretty_json(sections)
''' Convert the processed macro-attribute JSON data into a normalized listing of all macros in the DICOM Standard. ''' import sys from dicom_standard import parse_lib as pl from dicom_standard.process_modules import modules_from_tables if __name__ == '__main__': macro_attr_tables = pl.read_json_data(sys.argv[1]) macros = modules_from_tables(macro_attr_tables) pl.write_pretty_json(macros)
duplicate_paths = [k for k, v in Counter(path_list).items() if v > 1] path_to_node = {} for node in node_list: path = node['path'] if path in path_to_node: # Standard workaround: Catch inconsistency in Table C.36.8-1 where "Content Creator's Name" attribute # appears twice in same hierarchy without a conditional (once in Table C.36.8-1 and once in included Table 10.9.2-1) # http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_C.36.8.html#table_C.36.8-1 # http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_10.9.2.html#table_10.9.2-1 if path not in DUPLICATE_PATH_EXCEPTIONS: # Add conditional to description only if the duplicates do not have identical descriptions if is_duplicate_node(path, node_list): add_conditional_to_description(node) path_to_node[path]['description'] += node['description'] path_to_node[path]['externalReferences'].extend( node['externalReferences']) else: if path in duplicate_paths and path not in DUPLICATE_PATH_EXCEPTIONS: # Add conditional to description only if the duplicates do not have identical descriptions if is_duplicate_node(path, node_list): add_conditional_to_description(node) path_to_node[path] = node path_to_node[path].pop('conditional', None) return list(path_to_node.values()) if __name__ == "__main__": node_list = pl.read_json_data(sys.argv[1]) processed_node_list = merge_duplicate_nodes(node_list) pl.write_pretty_json(processed_node_list)
for attribute in module['attributes']: entries.append({ 'module': module['id'], 'path': attribute['id'], 'tag': attribute['tag'], 'type': attribute['type'], 'linkToStandard': get_standard_link(module, attribute), 'description': attribute['description'] }) return entries def get_standard_link(module, attribute): if 'linkToStandard' not in attribute.keys(): return module['linkToStandard'] else: return attribute['linkToStandard'] if __name__ == "__main__": module_attr_list = pl.read_json_to_dict(sys.argv[1]) module_attr_relationship_list = module_attr_relationship_table( module_attr_list) pl.write_pretty_json(module_attr_relationship_list)
''' Takes the extracted CIOD information and processes it to produce a dictionary of all CIODs in the DICOM Standard. ''' import sys from dicom_standard import parse_lib as pl def ciods_from_extracted_list(ciod_module_list): ciods = [] for ciod in ciod_module_list: ciod['description'] = pl.clean_html(ciod['description']) ciod.pop('modules', None) ciods.append(ciod) return ciods if __name__ == '__main__': ciod_module_list = pl.read_json_data(sys.argv[1]) ciods = ciods_from_extracted_list(ciod_module_list) pl.write_pretty_json(ciods)
def macro_attr_relationship_table(macro_attr_list): entries = [] for macro in macro_attr_list: for attribute in macro['attributes']: entries.append({ 'macroId': macro['id'], 'path': attribute['id'], 'tag': attribute['tag'], 'type': attribute['type'], 'linkToStandard': get_standard_link(macro, attribute), 'description': attribute['description'], 'conditional': attribute.get('conditional'), }) return entries if __name__ == "__main__": macro_attr_list = pl.read_json_data(sys.argv[1]) macro_attr_relationship_list = macro_attr_relationship_table( macro_attr_list) pl.write_pretty_json(macro_attr_relationship_list)
''' Convert the processed module-attribute JSON data into a normalized listing of all modules in the DICOM Standard. ''' import sys from dicom_standard import parse_lib as pl def modules_from_tables(tables): modules = {} for module in tables: modules[module['id']] = { 'id': module['id'], 'name': module['name'], 'description': pl.clean_html(module['description']), 'linkToStandard': module['linkToStandard'] } return modules if __name__ == '__main__': module_attr_tables = pl.read_json_to_dict(sys.argv[1]) modules = modules_from_tables(module_attr_tables) pl.write_pretty_json(modules)