예제 #1
0
    parsed_description = BeautifulSoup(pair['description'], 'html.parser')
    references = get_valid_reference_anchors(parsed_description)
    external_references = list(map(reference_structure_from_anchor,
                                   references))
    for ref in references:
        mark_as_recorded(ref)
    pair['externalReferences'] = [] if len(
        external_references) < 1 else external_references
    pair['description'] = str(parsed_description)
    finalize_descriptions(pair)
    return pair


def finalize_descriptions(pair):
    pair['description'] = pl.clean_html(pair['description'])


def reference_structure_from_anchor(reference):
    return {"sourceUrl": reference.get('href'), "title": reference.get_text()}


def mark_as_recorded(anchor):
    anchor['href'] = ''
    anchor.name = 'span'


if __name__ == '__main__':
    module_attr_pairs = pl.read_json_to_dict(sys.argv[1])
    updated_pairs = record_references_inside_pairs(module_attr_pairs)
    pl.write_pretty_json(updated_pairs)
예제 #2
0
def section_parent_page(sect_div):
    parent_section_id = sect_div.parent.div.div.div.find('a').get('id')
    sections = parent_section_id.split('.')
    try:
        cutoff_index = sections.index('1')
        return '.'.join(sections[0:cutoff_index])
    except ValueError:
        return parent_section_id


def get_refs_from_pairs(pairs):
    refs_to_record = set()
    for pair in pairs:
        ref_page_id_pairs = map(get_location_from_ref,
                                pair['externalReferences'])
        for ref in ref_page_id_pairs:
            refs_to_record.add(ref)
    return refs_to_record


def get_location_from_ref(ref):
    return tuple(pl.get_short_html_location(ref['sourceUrl']).split('/'))


if __name__ == '__main__':
    module_attr_pairs = pl.read_json_to_dict(sys.argv[1])
    section_listing = pl.read_json_to_dict(sys.argv[2])
    references = find_reference_html_in_sections(module_attr_pairs,
                                                 section_listing)
    pl.write_pretty_json(references)
예제 #3
0
def module_attribute_relationship(make_standard):
    return pl.read_json_to_dict('standard/module_to_attributes.json')
예제 #4
0
        for attribute in module['attributes']:
            entries.append({
                'module':
                module['id'],
                'path':
                attribute['id'],
                'tag':
                attribute['tag'],
                'type':
                attribute['type'],
                'linkToStandard':
                get_standard_link(module, attribute),
                'description':
                attribute['description']
            })
    return entries


def get_standard_link(module, attribute):
    if 'linkToStandard' not in attribute.keys():
        return module['linkToStandard']
    else:
        return attribute['linkToStandard']


if __name__ == "__main__":
    module_attr_list = pl.read_json_to_dict(sys.argv[1])
    module_attr_relationship_list = module_attr_relationship_table(
        module_attr_list)
    pl.write_pretty_json(module_attr_relationship_list)
예제 #5
0
def attributes(make_standard):
    return pl.read_json_to_dict('standard/attributes.json')
예제 #6
0
def ciod_module_relationship(make_standard):
    return pl.read_json_to_dict('standard/ciod_to_modules.json')
예제 #7
0
def modules(make_standard):
    return pl.read_json_to_dict('standard/modules.json')
예제 #8
0
def ciods(make_standard):
    return pl.read_json_to_dict('standard/ciods.json')
예제 #9
0
'''
Convert the processed module-attribute JSON data into a
normalized listing of all modules in the DICOM Standard.
'''
import sys

from dicom_standard import parse_lib as pl


def modules_from_tables(tables):
    modules = {}
    for module in tables:
        modules[module['id']] = {
            'id': module['id'],
            'name': module['name'],
            'description': pl.clean_html(module['description']),
            'linkToStandard': module['linkToStandard']
        }
    return modules


if __name__ == '__main__':
    module_attr_tables = pl.read_json_to_dict(sys.argv[1])
    modules = modules_from_tables(module_attr_tables)
    pl.write_pretty_json(modules)
예제 #10
0
'''
Takes the extracted CIOD information and processes it to produce a
dictionary of all CIODs in the DICOM Standard.
'''
import sys

from dicom_standard import parse_lib as pl


def ciods_from_extracted_list(ciod_module_list):
    ciods = {}
    for ciod in ciod_module_list:
        ciods[ciod['id']] = {
            'id': ciod['id'],
            'description': pl.clean_html(ciod['description']),
            'linkToStandard': ciod['linkToStandard'],
            'name': ciod['name']
        }
    return ciods


if __name__ == '__main__':
    ciod_module_list = pl.read_json_to_dict(sys.argv[1])
    ciods = ciods_from_extracted_list(ciod_module_list)
    pl.write_pretty_json(ciods)
import sys

from dicom_standard import parse_lib as pl


def update_sourceurls(module_attr_pairs, references):
    for pair in module_attr_pairs:
        for ref in pair['externalReferences']:
            for source_url in references.keys():
                reference_fragment = source_url.split('#')[-1]
                pair_fragment = ref['sourceUrl'].split('#')[-1]
                if pair_fragment == reference_fragment:
                    ref['sourceUrl'] = source_url
                    break
    return module_attr_pairs


if __name__ == '__main__':
    module_attr_pairs = pl.read_json_to_dict(sys.argv[1])
    references = pl.read_json_to_dict(sys.argv[2])
    updated_pairs = update_sourceurls(module_attr_pairs, references)
    pl.write_pretty_json(updated_pairs)