def compile_structmap(**kwargs): """Generate METS file section and structural map based on created/imported administrative metada and descriptive metadata. :kwargs: Given arguments: workspace: Workspace directory structmap_type: Type of structmap root_type: Type of root div dmdsec_loc: Location of structured descriptive metadata file_ids: Dict to be populated with file paths and IDs stdout: True to print output to stdout """ attributes = _attribute_values(kwargs) # Create an event documenting the structmap creation _create_event(workspace=attributes["workspace"], structmap_type=attributes["structmap_type"], root_type=attributes["root_type"]) # Get reference list only after the structmap creation event attributes = get_reference_lists(**attributes) if attributes["structmap_type"] == 'EAD3-logical': # If structured descriptive metadata for structMap divs is used, also # the fileSec element (apparently?) is different. The # create_ead3_structmap function populates the fileGrp element. filegrp = mets.filegrp() filesec_element = mets.filesec(child_elements=[filegrp]) filesec = mets.mets(child_elements=[filesec_element]) structmap = create_ead3_structmap(filegrp, attributes) else: (filesec, file_ids) = create_filesec(**attributes) # Add file path and ID dict to attributes attributes['file_ids'] = file_ids structmap = create_structmap(filesec.getroot(), **attributes) if attributes["stdout"]: print(xml_utils.serialize(filesec).decode("utf-8")) print(xml_utils.serialize(structmap).decode("utf-8")) output_sm_file = os.path.join(attributes["workspace"], 'structmap.xml') output_fs_file = os.path.join(attributes["workspace"], 'filesec.xml') if not os.path.exists(os.path.dirname(output_sm_file)): os.makedirs(os.path.dirname(output_sm_file)) if not os.path.exists(os.path.dirname(output_fs_file)): os.makedirs(os.path.dirname(output_fs_file)) with open(output_sm_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(structmap)) with open(output_fs_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(filesec)) print("compile_structmap created files: %s %s" % (output_sm_file, output_fs_file))
def create_filesec(workspace, filelist): """Creates METS document element tree that contains fileSec element. """ filegrp = mets.filegrp() filesec = mets.filesec(child_elements=[filegrp]) create_filegrp(workspace, filegrp, filelist) mets_element = mets.mets(child_elements=[filesec]) ET.cleanup_namespaces(mets_element) return ET.ElementTree(mets_element)
def main(arguments=None): """The main method for compile_sturctmap""" args = parse_arguments(arguments) structmap = mets.structmap(type_attr=args.type_attr) mets_structmap = mets.mets(child_elements=[structmap]) filegrp = mets.filegrp() filesec = mets.filesec(child_elements=[filegrp]) mets_filesec = mets.mets(child_elements=[filesec]) _, dmdsec_id = ids_for_files(args.workspace, None, 'dmdsec.xml', dash_count=0) if args.dmdsec_struct == 'ead3': container_div = mets.div(type_attr='logical') structmap.append(container_div) create_ead3_structmap(args.dmdsec_loc, args.workspace, container_div, filegrp, dmdsec_id) else: amdids = get_links_event_agent(args.workspace, None) container_div = mets.div(type_attr='directory', dmdid=dmdsec_id, admid=amdids) structmap.append(container_div) divs = div_structure(args.workspace) create_structmap(args.workspace, divs, container_div, filegrp) if args.stdout: print h.serialize(mets_filesec) print h.serialize(mets_structmap) output_sm_file = os.path.join(args.workspace, 'structmap.xml') output_fs_file = os.path.join(args.workspace, 'filesec.xml') if not os.path.exists(os.path.dirname(output_sm_file)): os.makedirs(os.path.dirname(output_sm_file)) if not os.path.exists(os.path.dirname(output_fs_file)): os.makedirs(os.path.dirname(output_fs_file)) with open(output_sm_file, 'w+') as outfile: outfile.write(h.serialize(mets_structmap)) with open(output_fs_file, 'w+') as outfile: outfile.write(h.serialize(mets_filesec)) print "compile_structmap created files: %s %s" % (output_sm_file, output_fs_file) return 0
def compile_structmap(workspace="./workspace/", structmap_type=None, root_type=None, dmdsec_loc=None, stdout=False): """Generate METS file section and structural map based on created/imported administrative metada and descriptive metadata. """ filelist = get_objectlist(workspace) if structmap_type == 'EAD3-logical': # If structured descriptive metadata for structMap divs is used, also # the fileSec element (apparently?) is different. The # create_ead3_structmap function populates the fileGrp element. filegrp = mets.filegrp() filesec_element = mets.filesec(child_elements=[filegrp]) filesec = mets.mets(child_elements=[filesec_element]) structmap = create_ead3_structmap(dmdsec_loc, workspace, filegrp, filelist, structmap_type) else: filesec = create_filesec(workspace, filelist) structmap = create_structmap(workspace, filesec.getroot(), filelist, structmap_type, root_type) if stdout: print(xml_utils.serialize(filesec).decode("utf-8")) print(xml_utils.serialize(structmap).decode("utf-8")) output_sm_file = os.path.join(workspace, 'structmap.xml') output_fs_file = os.path.join(workspace, 'filesec.xml') if not os.path.exists(os.path.dirname(output_sm_file)): os.makedirs(os.path.dirname(output_sm_file)) if not os.path.exists(os.path.dirname(output_fs_file)): os.makedirs(os.path.dirname(output_fs_file)) with open(output_sm_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(structmap)) with open(output_fs_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(filesec)) print("compile_structmap created files: %s %s" % (output_sm_file, output_fs_file))
def test_get_fileid(): """Test get_fileid function. Create a fileGrp element with few files and test that the function finds correct file IDs. """ # Create fileGrp element that contains three file elements with different # identifiers and paths files = [ mets.file_elem(file_id='identifier%s' % num, admid_elements=['foo', 'bar'], loctype='foo', xlink_href='file://path/to/file+name%s' % num, xlink_type='foo') for num in range(3) ] filegrp = mets.filegrp(child_elements=files) assert compile_structmap.get_fileid(filegrp, 'path/to/file name1') \ == 'identifier1'
def test_add_fptrs_div_ead(testpath, run_cli, hrefs, length, child_elem, order): """Tests the add_fptrs_div_ead function by asserting that the c_div element has been modified with fptrs and divs correctly according to the test cases. """ create_test_data(testpath, run_cli, order=order) div_elem = '<mets:div xmlns:mets="http://www.loc.gov/METS/"></mets:div>' xml = ET.fromstring(div_elem) attrs = {} attrs["all_amd_refs"] = read_md_references( testpath, "import-object-md-references.jsonl") attrs["object_refs"] = attrs["all_amd_refs"] attrs["workspace"] = testpath attrs["filelist"] = [ 'tests/data/structured/Publication files/publication.txt', 'tests/data/structured/Software files/koodi.java' ] filegrp = filegrp = mets.filegrp() c_div = compile_structmap.add_fptrs_div_ead(xml, hrefs, filegrp, attrs) # Child elements are either new divs or fptrs assert c_div.xpath( './*')[0].tag == '{http://www.loc.gov/METS/}%s' % child_elem # Number of child elements should equal the number of valid hrefs assert len(c_div.xpath('./*')) == length # Number of fptr elements should equal the number of valid hrefs assert len(c_div.findall('.//{http://www.loc.gov/METS/}fptr')) == length # If file properties exist, it is written to the divs if order and length == 1: assert 'ORDER' in c_div.attrib elif order: assert 'ORDER' in c_div.xpath('./*')[0].attrib assert c_div.xpath('./*')[0].get('TYPE') == 'dao' else: assert 'ORDER' not in c_div.attrib
def create_filesec(**attributes): """ Creates METS document element tree that contains fileSec element. :attributes: Attribute values as a dict all_amd_refs: XML element tree of administrative metadata references filelist: Sorted list of digital objects (file paths) :returns: A tuple of METS XML Element tree including file section element and a dict of file paths and identifiers """ attributes = get_reference_lists(**_attribute_values(attributes)) filegrp = mets.filegrp() filesec = mets.filesec(child_elements=[filegrp]) file_ids = {} for path in attributes["filelist"]: fileid = add_file_to_filesec(attributes["all_amd_refs"], attributes["object_refs"], path, filegrp) file_ids[path] = fileid mets_element = mets.mets(child_elements=[filesec]) ET.cleanup_namespaces(mets_element) return (ET.ElementTree(mets_element), file_ids)