def test_valid_references(): references_dict = pl.read_json_data('standard/references.json') errors = [] for url in references_dict: status_code = requests.get(url).status_code if status_code != 200: errors.append(url) assert not errors
import sys from dicom_standard import parse_lib as pl def update_sourceurls(pairs, references): ref_fragments = {url.split('#')[-1]: url for url in references.keys()} for pair in pairs: for ref in pair['externalReferences']: pair_fragment = ref['sourceUrl'].split('#')[-1] ref['sourceUrl'] = ref_fragments[pair_fragment] return pairs if __name__ == '__main__': pairs = pl.read_json_data(sys.argv[1]) references = pl.read_json_data(sys.argv[2]) updated_pairs = update_sourceurls(pairs, references) pl.write_pretty_json(updated_pairs)
table['attributes'] = [attr for attr in list(map(preprocess_attribute, table['attributes'])) if attr] return table def preprocess_attribute(attr): cleaned_attribute = { 'name': pl.text_from_html_string(attr['name']), 'tag': pl.text_from_html_string(attr['tag']), 'type': 'None' if 'type' not in attr.keys() else pl.text_from_html_string(attr['type']), 'description': attr['description'] } # Return empty dict if tag is invalid (exception in Table F.3-3) if cleaned_attribute['tag'] == 'See F.5': return {} return cleaned_attribute def expand_hierarchy(tables): return [record_hierarchy_for_module(table) for table in tables] if __name__ == '__main__': module_macro_attr_tables = cast(List[MetadataTableType], pl.read_json_data(sys.argv[1])) id_to_table = key_tables_by_id(module_macro_attr_tables) module_attr_tables = [table for table in module_macro_attr_tables if not table['isMacro']] expanded_tables = expand_all_macros(module_attr_tables, id_to_table) preprocessed_tables = preprocess_attribute_fields(expanded_tables) tables_with_hierarchy = expand_hierarchy(preprocessed_tables) pl.write_pretty_json(tables_with_hierarchy)
parsed_description = BeautifulSoup(pair['description'], 'html.parser') references = get_valid_reference_anchors(parsed_description) external_references = list(map(reference_structure_from_anchor, references)) for ref in references: mark_as_recorded(ref) pair['externalReferences'] = [] if len( external_references) < 1 else external_references pair['description'] = str(parsed_description) finalize_descriptions(pair) return pair def finalize_descriptions(pair): pair['description'] = pl.clean_html(pair['description']) def reference_structure_from_anchor(reference): return {"sourceUrl": reference.get('href'), "title": reference.get_text()} def mark_as_recorded(anchor): anchor['href'] = '' anchor.name = 'span' if __name__ == '__main__': pairs = pl.read_json_data(sys.argv[1]) updated_pairs = record_references_inside_pairs(pairs) pl.write_pretty_json(updated_pairs)
# Standard workaround: Remove "Macro" from "Frame VOI LUT With LUT Macro" in Table A.84.3.2-1 # http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.84.3.2.html#table_A.84.3.2-1 def clean_macro_name(text): return re.sub(' Macro', '', text).strip() def define_all_relationships(ciod_macro_list): all_relationships = [] for table in ciod_macro_list: ciod = table['name'] macros = table['macros'] all_relationships.extend([define_ciod_macro_relationship(ciod, macro) for macro in macros]) return all_relationships def define_ciod_macro_relationship(ciod, macro): usage, conditional_statement = expand_conditional_statement(macro['usage']) return { "ciodId": pl.create_slug(ciod), "macroId": pl.create_slug(clean_macro_name(pl.text_from_html_string(macro['macro']))), "usage": usage, "conditionalStatement": conditional_statement } if __name__ == '__main__': ciod_macro_list = pl.read_json_data(sys.argv[1]) ciod_macro_relationships = define_all_relationships(ciod_macro_list) pl.write_pretty_json(ciod_macro_relationships)
module.pop('isMacro', None) modules.append(module) return modules def create_ciod_specific_modules(ciods, module, module_id): modules = [] for ciod in ciods: ciod_specific_module = deepcopy(module) ciod_specific_module['id'] = f'{ciod}-{module_id}' modules.append(ciod_specific_module) return modules if __name__ == '__main__': module_attr_tables = pl.read_json_data(sys.argv[1]) ciod_to_macro = cast(List[MetadataTableType], pl.read_json_data(sys.argv[2])) modules = modules_from_tables(module_attr_tables) ciods_with_mffg_macros = list( set([ rel['ciodId'] for rel in ciod_to_macro if rel['moduleType'] == 'Multi-frame' ])) ciods_with_cffg_macros = list( set([ rel['ciodId'] for rel in ciod_to_macro if rel['moduleType'] == 'Current Frame' ])) multi_frame_func_group_module = next( filter(lambda rel: rel['id'] == MF_FUNC_GROUP_MODULE_ID, modules),
def section_parent_page(sect_div): parent_section_id = sect_div.parent.div.div.div.find('a').get('id') sections = parent_section_id.split('.') try: cutoff_index = sections.index('1') return '.'.join(sections[0:cutoff_index]) except ValueError: return parent_section_id def get_refs_from_pairs(pairs): refs_to_record = set() for pair in pairs: ref_page_id_pairs = map(get_location_from_ref, pair['externalReferences']) for ref in ref_page_id_pairs: refs_to_record.add(ref) return refs_to_record def get_location_from_ref(ref): return tuple(pl.get_short_html_location(ref['sourceUrl']).split('/')) if __name__ == '__main__': module_attr_pairs = pl.read_json_data(sys.argv[1]) section_listing = pl.read_json_data(sys.argv[2]) references = find_reference_html_in_sections(module_attr_pairs, section_listing) pl.write_pretty_json(references)
def test_valid_standard_links(): module_to_attributes = pl.read_json_data('standard/module_to_attributes.json') links = set([rel['linkToStandard'] for rel in module_to_attributes]) assert not get_invalid_urls(links)
def macros(make_standard): return pl.read_json_data('standard/macros.json')
def ciods(make_standard): return pl.read_json_data('standard/ciods.json')
def attributes(make_standard): return pl.read_json_data('standard/attributes.json')
duplicate_paths = [k for k, v in Counter(path_list).items() if v > 1] path_to_node = {} for node in node_list: path = node['path'] if path in path_to_node: # Standard workaround: Catch inconsistency in Table C.36.8-1 where "Content Creator's Name" attribute # appears twice in same hierarchy without a conditional (once in Table C.36.8-1 and once in included Table 10.9.2-1) # http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_C.36.8.html#table_C.36.8-1 # http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_10.9.2.html#table_10.9.2-1 if path not in DUPLICATE_PATH_EXCEPTIONS: # Add conditional to description only if the duplicates do not have identical descriptions if is_duplicate_node(path, node_list): add_conditional_to_description(node) path_to_node[path]['description'] += node['description'] path_to_node[path]['externalReferences'].extend( node['externalReferences']) else: if path in duplicate_paths and path not in DUPLICATE_PATH_EXCEPTIONS: # Add conditional to description only if the duplicates do not have identical descriptions if is_duplicate_node(path, node_list): add_conditional_to_description(node) path_to_node[path] = node path_to_node[path].pop('conditional', None) return list(path_to_node.values()) if __name__ == "__main__": node_list = pl.read_json_data(sys.argv[1]) processed_node_list = merge_duplicate_nodes(node_list) pl.write_pretty_json(processed_node_list)
def macro_attr_relationship_table(macro_attr_list): entries = [] for macro in macro_attr_list: for attribute in macro['attributes']: entries.append({ 'macroId': macro['id'], 'path': attribute['id'], 'tag': attribute['tag'], 'type': attribute['type'], 'linkToStandard': get_standard_link(macro, attribute), 'description': attribute['description'], 'conditional': attribute.get('conditional'), }) return entries if __name__ == "__main__": macro_attr_list = pl.read_json_data(sys.argv[1]) macro_attr_relationship_list = macro_attr_relationship_table( macro_attr_list) pl.write_pretty_json(macro_attr_relationship_list)
'moduleId': module['id'], 'path': attribute['id'], 'tag': attribute['tag'], 'type': attribute['type'], 'linkToStandard': get_standard_link(module, attribute), 'description': attribute['description'], 'conditional': attribute.get('conditional'), }) return entries def get_standard_link(module, attribute): if 'linkToStandard' not in attribute.keys(): return module['linkToStandard'] else: return attribute['linkToStandard'] if __name__ == "__main__": module_attr_list = pl.read_json_data(sys.argv[1]) module_attr_relationship_list = module_attr_relationship_table( module_attr_list) pl.write_pretty_json(module_attr_relationship_list)
''' Convert the processed macro-attribute JSON data into a normalized listing of all macros in the DICOM Standard. ''' import sys from dicom_standard import parse_lib as pl from dicom_standard.process_modules import modules_from_tables if __name__ == '__main__': macro_attr_tables = pl.read_json_data(sys.argv[1]) macros = modules_from_tables(macro_attr_tables) pl.write_pretty_json(macros)
def test_valid_references(): references_dict = pl.read_json_data('standard/references.json') urls = list(references_dict.keys()) assert not get_invalid_urls(urls)
def modules(make_standard): return pl.read_json_data('standard/modules.json')
def references(make_standard): return pl.read_json_data('standard/references.json')
def sops(make_standard): return pl.read_json_data('standard/sops.json')
try: cutoff_index = sections.index('1') return '.'.join(sections[0:cutoff_index]) except ValueError: return parent_section_id def get_refs_from_pairs(pairs): refs_to_record = set() for pair in pairs: ref_page_id_pairs = map(get_location_from_ref, pair['externalReferences']) for ref in ref_page_id_pairs: refs_to_record.add(ref) return refs_to_record def get_location_from_ref(ref): return tuple(pl.get_short_html_location(ref['sourceUrl']).split('/')) if __name__ == '__main__': module_attr_pairs = cast(List[MetadataTableType], pl.read_json_data(sys.argv[1])) macro_attr_pairs = cast(List[MetadataTableType], pl.read_json_data(sys.argv[2])) section_listing = pl.read_json_data(sys.argv[3]) references = find_reference_html_in_sections( module_attr_pairs + macro_attr_pairs, section_listing) pl.write_pretty_json({r[0]: r[1] for r in sorted(references.items())})
def ciod_fg_macro_relationship(make_standard): return pl.read_json_data('standard/ciod_to_func_group_macros.json')
errors = [] for attr in parsed_table_data: attr_name = attr['name'] retired = attr['retired'] == 'Y' if retired and attr['name'] not in retired_attrs: errors.append( f'Attribute "{attr_name}" {attr["tag"]} is retired in Table ' 'E.1-1 but not in Table 6-1.') if not retired and attr['name'] in retired_attrs: errors.append( f'Attribute "{attr_name}" {attr["tag"]} is retired in Table ' '6-1 but not in Table E.1-1.') if errors: errors.insert( 0, 'One or more attributes in tables 6-1 and E.1-1 have inconsistent properties between tables:' ) error_msg = '\n'.join(errors) raise Exception(error_msg) if __name__ == '__main__': standard = pl.parse_html_file(sys.argv[1]) attributes = pl.read_json_data(sys.argv[2]) table = get_conf_profile_table(standard) parsed_table_data = table_to_json(table) verify_table_integrity(parsed_table_data, cast(AttrTableType, attributes)) for attr in parsed_table_data: del attr['retired'] pl.write_pretty_json(parsed_table_data)
def ciod_module_relationship(make_standard): return pl.read_json_data('standard/ciod_to_modules.json')
def expand_conditional_statement(usage_field_html): usage_field = process_usage_html(usage_field_html) conditional_statement = extract_conditional_statement(usage_field) usage = usage_field[0] return usage, conditional_statement def process_usage_html(usage_field_html): usage_field = pl.text_from_html_string(usage_field_html) processed_usage_field = usage_field.strip() if len(processed_usage_field) == 0: raise Exception('Empty module usage field') return processed_usage_field def extract_conditional_statement(usage_field): if usage_field.startswith('C - '): conditional_statement = usage_field[4:].strip() elif usage_field.startswith('C') and len(usage_field) > 1: conditional_statement = usage_field[1:].strip() else: conditional_statement = None return conditional_statement if __name__ == '__main__': ciod_module_list = pl.read_json_data(sys.argv[1]) ciod_module_relationships = define_all_relationships(ciod_module_list) pl.write_pretty_json(ciod_module_relationships)
def module_attribute_relationship(make_standard): return pl.read_json_data('standard/module_to_attributes.json')
set([ rel['ciodId'] for rel in ciod_to_macro if rel['moduleType'] == 'Multi-frame' ])) ciod_specific_attrs += process_fg_attributes(module_to_attr, ciods_with_mffg_macros, MF_FUNC_GROUP_MODULE_ID) ciods_with_cffg_macros = list( set([ rel['ciodId'] for rel in ciod_to_macro if rel['moduleType'] == 'Current Frame' ])) ciod_specific_attrs += process_fg_attributes(module_to_attr, ciods_with_cffg_macros, CF_FUNC_GROUP_MODULE_ID) return ciod_specific_attrs if __name__ == '__main__': module_to_attributes = pl.read_json_data(sys.argv[1]) macros = pl.read_json_data(sys.argv[2]) ciod_to_macro = pl.read_json_data(sys.argv[3]) macro_to_attributes = pl.read_json_data(sys.argv[4]) new_attributes = process_ciod_specific_attributes(module_to_attributes, macros, ciod_to_macro, macro_to_attributes) sorted_modules_to_attributes = sorted(module_to_attributes + new_attributes, key=itemgetter('path')) pl.write_pretty_json(sorted_modules_to_attributes)
parsed_description = BeautifulSoup(pair['description'], 'html.parser') references = get_valid_reference_anchors(parsed_description) external_references = list(map(reference_structure_from_anchor, references)) for ref in references: mark_as_recorded(ref) pair['externalReferences'] = [] if len( external_references) < 1 else external_references pair['description'] = str(parsed_description) finalize_descriptions(pair) return pair def finalize_descriptions(pair): pair['description'] = pl.clean_html(pair['description']) def reference_structure_from_anchor(reference): return {"sourceUrl": reference.get('href'), "title": reference.get_text()} def mark_as_recorded(anchor): anchor['href'] = '' anchor.name = 'span' if __name__ == '__main__': module_attr_pairs = pl.read_json_data(sys.argv[1]) updated_pairs = record_references_inside_pairs(module_attr_pairs) pl.write_pretty_json(updated_pairs)