def compile_structmap(**kwargs): """Generate METS file section and structural map based on created/imported administrative metada and descriptive metadata. :kwargs: Given arguments: workspace: Workspace directory structmap_type: Type of structmap root_type: Type of root div dmdsec_loc: Location of structured descriptive metadata file_ids: Dict to be populated with file paths and IDs stdout: True to print output to stdout """ attributes = _attribute_values(kwargs) # Create an event documenting the structmap creation _create_event(workspace=attributes["workspace"], structmap_type=attributes["structmap_type"], root_type=attributes["root_type"]) # Get reference list only after the structmap creation event attributes = get_reference_lists(**attributes) if attributes["structmap_type"] == 'EAD3-logical': # If structured descriptive metadata for structMap divs is used, also # the fileSec element (apparently?) is different. The # create_ead3_structmap function populates the fileGrp element. filegrp = mets.filegrp() filesec_element = mets.filesec(child_elements=[filegrp]) filesec = mets.mets(child_elements=[filesec_element]) structmap = create_ead3_structmap(filegrp, attributes) else: (filesec, file_ids) = create_filesec(**attributes) # Add file path and ID dict to attributes attributes['file_ids'] = file_ids structmap = create_structmap(filesec.getroot(), **attributes) if attributes["stdout"]: print(xml_utils.serialize(filesec).decode("utf-8")) print(xml_utils.serialize(structmap).decode("utf-8")) output_sm_file = os.path.join(attributes["workspace"], 'structmap.xml') output_fs_file = os.path.join(attributes["workspace"], 'filesec.xml') if not os.path.exists(os.path.dirname(output_sm_file)): os.makedirs(os.path.dirname(output_sm_file)) if not os.path.exists(os.path.dirname(output_fs_file)): os.makedirs(os.path.dirname(output_fs_file)) with open(output_sm_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(structmap)) with open(output_fs_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(filesec)) print("compile_structmap created files: %s %s" % (output_sm_file, output_fs_file))
def main(arguments=None): """The main method for import_description""" args = parse_arguments(arguments) if args.dmdsec_target: url_t_path = encode_path(args.dmdsec_target, suffix='-dmdsec.xml') else: url_t_path = 'dmdsec.xml' with open(args.dmdsec_location, 'r') as content_file: content = content_file.read() _mets = mets.mets() tree = lxml.etree.fromstring(content) if args.desc_root == 'remove': childs = tree.findall('*') else: childs = [tree] xmldata_e = mets.xmldata(child_elements=childs) ns = h.get_namespace(childs[0]) if ns in METS_MDTYPES.keys(): mdt = METS_MDTYPES[ns]['mdtype'] if 'othermdtype' in METS_MDTYPES[ns]: mdo = METS_MDTYPES[ns]['othermdtype'] else: mdo = None mdv = METS_MDTYPES[ns]['version'] else: raise TypeError("Invalid namespace: %s" % ns) mdwrap_e = mets.mdwrap(mdtype=mdt, othermdtype=mdo, mdtypeversion=mdv, child_elements=[xmldata_e]) dmdsec_e = mets.dmdsec(encode_id(url_t_path), child_elements=[mdwrap_e]) _mets.append(dmdsec_e) if args.stdout: print h.serialize(_mets) output_file = os.path.join(args.workspace, url_t_path) if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) with open(output_file, 'w+') as outfile: outfile.write(h.serialize(_mets)) print "import_description created file: %s" % output_file return 0
def compile_mets(**kwargs): """ Merge partial METS documents in workspace directory into one METS document. :kwargs: Given arguments: mets_profile: METS profile (mandatory) organization_name: Creator name (mandatory) contractid: Contract ID (mandatory) objid: Unique identifier for the package contentid: Identifier of the content create_date: Package creation date last_moddate: Last modification date workspace: Workspace path base_path: Base path of the digital objects record_status: Record status label: Short description about the package clean: True for cleaning the workspace from temporary files copy_files: True copies the digital objects from base_path to workspace stdout: True prints the output to stdout packagingservice: Packaging service specific parameter """ attributes = _attribute_values(kwargs, True) mets_document = create_mets(**attributes) if attributes["stdout"]: print(xml_utils.serialize(mets_document.getroot())) output_file = os.path.join(attributes["workspace"], 'mets.xml') if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) with open(output_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(mets_document.getroot())) print("compile_mets created file: %s" % output_file) if attributes["copy_files"]: copy_objects(attributes["workspace"], attributes["base_path"]) print("compile_mets copied objects from %s to " "workspace" % attributes["base_path"]) if attributes["clean"]: clean_metsparts(attributes["workspace"]) print("compile_mets cleaned work files from workspace.")
def test_serialize(): """test serialize""" xml = '<a:x xmlns:a="b"><a:y/></a:x>' ser_xml = (b'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n' b'<a:x xmlns:a="b">\n <a:y/>\n</a:x>\n') result = u.serialize(ET.fromstring(xml)) assert result == ser_xml
def compile_structmap(workspace="./workspace/", structmap_type=None, root_type=None, dmdsec_loc=None, stdout=False): """Generate METS file section and structural map based on created/imported administrative metada and descriptive metadata. """ filelist = get_objectlist(workspace) if structmap_type == 'EAD3-logical': # If structured descriptive metadata for structMap divs is used, also # the fileSec element (apparently?) is different. The # create_ead3_structmap function populates the fileGrp element. filegrp = mets.filegrp() filesec_element = mets.filesec(child_elements=[filegrp]) filesec = mets.mets(child_elements=[filesec_element]) structmap = create_ead3_structmap(dmdsec_loc, workspace, filegrp, filelist, structmap_type) else: filesec = create_filesec(workspace, filelist) structmap = create_structmap(workspace, filesec.getroot(), filelist, structmap_type, root_type) if stdout: print(xml_utils.serialize(filesec).decode("utf-8")) print(xml_utils.serialize(structmap).decode("utf-8")) output_sm_file = os.path.join(workspace, 'structmap.xml') output_fs_file = os.path.join(workspace, 'filesec.xml') if not os.path.exists(os.path.dirname(output_sm_file)): os.makedirs(os.path.dirname(output_sm_file)) if not os.path.exists(os.path.dirname(output_fs_file)): os.makedirs(os.path.dirname(output_fs_file)) with open(output_sm_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(structmap)) with open(output_fs_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(filesec)) print("compile_structmap created files: %s %s" % (output_sm_file, output_fs_file))
def main(arguments=None): """The main method for argparser""" args = parse_arguments(arguments) # Loop files and create premis objects files = collect_filepaths(dirs=args.files, base=args.base_path) for filename in files: if args.base_path != '': filerel = os.path.relpath(filename, args.base_path) else: filerel = filename xmldata = mets.xmldata() premis_object = create_premis_object( xmldata, filename, args.skip_inspection, args.format_name, args.format_version, args.digest_algorithm, args.message_digest, args.date_created, args.charset) mdwrap = mets.mdwrap('PREMIS:OBJECT', '2.3', child_elements=[xmldata]) techmd = mets.techmd(encode_id( encode_path(filerel, suffix="-techmd.xml")), child_elements=[mdwrap]) amdsec = mets.amdsec(child_elements=[techmd]) _mets = mets.mets(child_elements=[amdsec]) if args.stdout: print h.serialize(_mets) if not os.path.exists(args.workspace): os.makedirs(args.workspace) filename = encode_path(filerel, suffix="-techmd.xml") with open(os.path.join(args.workspace, filename), 'w+') as outfile: outfile.write(h.serialize(_mets)) print "Wrote METS technical metadata to file %s" % outfile.name return 0
def main(arguments=None): """The main method for compile_sturctmap""" args = parse_arguments(arguments) structmap = mets.structmap(type_attr=args.type_attr) mets_structmap = mets.mets(child_elements=[structmap]) filegrp = mets.filegrp() filesec = mets.filesec(child_elements=[filegrp]) mets_filesec = mets.mets(child_elements=[filesec]) _, dmdsec_id = ids_for_files(args.workspace, None, 'dmdsec.xml', dash_count=0) if args.dmdsec_struct == 'ead3': container_div = mets.div(type_attr='logical') structmap.append(container_div) create_ead3_structmap(args.dmdsec_loc, args.workspace, container_div, filegrp, dmdsec_id) else: amdids = get_links_event_agent(args.workspace, None) container_div = mets.div(type_attr='directory', dmdid=dmdsec_id, admid=amdids) structmap.append(container_div) divs = div_structure(args.workspace) create_structmap(args.workspace, divs, container_div, filegrp) if args.stdout: print h.serialize(mets_filesec) print h.serialize(mets_structmap) output_sm_file = os.path.join(args.workspace, 'structmap.xml') output_fs_file = os.path.join(args.workspace, 'filesec.xml') if not os.path.exists(os.path.dirname(output_sm_file)): os.makedirs(os.path.dirname(output_sm_file)) if not os.path.exists(os.path.dirname(output_fs_file)): os.makedirs(os.path.dirname(output_fs_file)) with open(output_sm_file, 'w+') as outfile: outfile.write(h.serialize(mets_structmap)) with open(output_fs_file, 'w+') as outfile: outfile.write(h.serialize(mets_filesec)) print "compile_structmap created files: %s %s" % (output_sm_file, output_fs_file) return 0
def test_construct_catalog_xml(tmpdir, rewrite_rules, next_catalogs): """Tests that the catalog has been constructed correctly.""" filename = tmpdir.mkdir('test').join('foo.xml') base_dir = tmpdir.mkdir('base_catalog') catalog = construct_catalog_xml(base_path=base_dir.strpath, rewrite_rules=rewrite_rules, next_catalogs=next_catalogs) with open(filename.strpath, 'wb') as in_file: in_file.write(serialize(catalog)) with open(filename.strpath, 'rb') as out_file: tree = ET.fromstring(out_file.read()) for key in tree.attrib: if key.endswith('base'): assert tree.attrib[key].rstrip('/') == base_dir.strpath assert len(tree) == len(rewrite_rules) + len(next_catalogs) # Ensure that the keys and values of the input dict are text (unless it # is None), so that we can compare the input with the output decoded_rules = None if rewrite_rules: decoded_rules = dict([(ensure_text(k), ensure_text(v)) for k, v in rewrite_rules.items()]) for element in tree: if 'rewriteURI' in element.tag: assert element.attrib['rewritePrefix'] == decoded_rules[ element.attrib['uriStartString']] # Remove the entry from the parameter to signify that we've # evaluated it. del decoded_rules[element.attrib['uriStartString']] if 'nextCatalog' in element.tag: assert element.attrib['catalog'] in next_catalogs # Remove the entry from the parameter to signify that we've # evaluated it. next_catalogs.remove(element.attrib['catalog']) # These two parameters have to be Falsey at the end of the test. assert not decoded_rules assert not next_catalogs
def run(self): """Create a METS document that contains logical structural map. Logical structural map is based on dataset metadata retrieved from Metax. :returns: ``None`` """ # Read the generated physical structmap from file physical_structmap = ET.parse( os.path.join(self.sip_creation_path, 'structmap.xml')) # Get dmdsec id from physical_structmap dmdsec_id = physical_structmap.getroot()[0][0].attrib['DMDID'] # Get provenance id's provenance_ids = self.get_provenance_ids() # Init logical structmap logical_structmap = mets.structmap(type_attr='Fairdata-logical') mets_structmap = mets.mets(child_elements=[logical_structmap]) # Create logical structmap categories = self.find_file_categories() wrapper_div = mets.div(type_attr='logical', dmdid=[dmdsec_id], admid=provenance_ids) for category in categories: div = mets.div(type_attr=category) for filename in categories.get(category): fileid = self.get_fileid(encode_path(filename, safe='/')) div.append(mets.fptr(fileid)) wrapper_div.append(div) logical_structmap.append(wrapper_div) with self.output().open('wb') as output: output.write(h.serialize(mets_structmap))
def main(arguments=None): """The main method for premis_event""" args = parse_arguments(arguments) if args.agent_name: _mets = mets.mets() amdsec = mets.amdsec() _mets.append(amdsec) if args.event_target: agent_id = encode_id( encode_path('%s-%s-agent.xml' % (args.event_target, args.event_type))) output_file = os.path.join( args.workspace, encode_path('%s-%s-agent.xml' % (args.event_target, args.event_type))) else: agent_id = encode_id( encode_path('%s-agent.xml' % (args.event_type))) output_file = os.path.join( args.workspace, encode_path('%s-agent.xml' % (args.event_type))) linking_agent_identifier = create_premis_agent(amdsec, agent_id, args.agent_name, args.agent_type) if args.stdout: print h.serialize(_mets) if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) with open(output_file, 'w+') as outfile: outfile.write(h.serialize(_mets)) print "premis_event created file: %s" % output_file else: linking_agent_identifier = None # Create event _mets = mets.mets() amdsec = mets.amdsec() _mets.append(amdsec) if args.event_target: event_id = encode_id( encode_path('%s-%s-event.xml' % (args.event_target, args.event_type))) output_file = os.path.join( args.workspace, encode_path('%s-%s-event.xml' % (args.event_target, args.event_type))) else: event_id = encode_id(encode_path('%s-event.xml' % (args.event_type))) output_file = os.path.join( args.workspace, encode_path('%s-event.xml' % (args.event_type))) create_premis_event(amdsec, args.event_type, args.event_datetime, args.event_detail, args.event_outcome, args.event_outcome_detail, linking_agent_identifier, event_id) if args.stdout: print h.serialize(_mets) if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) with open(output_file, 'w+') as outfile: outfile.write(h.serialize(_mets)) print "premis_event created file: %s" % output_file return 0
def main(arguments=None): """The main method """ args = parse_arguments(arguments) # Create mets header _mets = mets.mets(METS_PROFILE[args.mets_profile], objid=args.objid, label=args.label, namespaces=NAMESPACES) _mets = mets_extend(_mets, METS_CATALOG, METS_SPECIFICATION, args.contentid, args.contractid) # Create list of additional agent elements if packagingservice is defined _agents = [mets.agent(args.organization_name)] if args.packagingservice: _agents.append( mets.agent(args.organization_name, agent_role='ARCHIVIST')) _agents.append( mets.agent(args.packagingservice, agent_type='OTHER', agent_role='CREATOR', othertype='SOFTWARE')) _metshdr = mets.metshdr(args.create_date, args.last_moddate, args.record_status, _agents) _mets.append(_metshdr) # Collect elements from workspace XML files elements = [] for entry in scandir(args.workspace): if entry.name.endswith( ('-techmd.xml', '-agent.xml', '-event.xml', 'dmdsec.xml', 'structmap.xml', 'filesec.xml', 'rightsmd.xml', '-othermd.xml')) and entry.is_file(): element = lxml.etree.parse(entry.path).getroot()[0] elements.append(element) elements = mets.merge_elements('{%s}amdSec' % NAMESPACES['mets'], elements) elements.sort(key=mets.order) for element in elements: _mets.append(element) if args.stdout: print h.serialize(_mets) output_file = os.path.join(args.workspace, 'mets.xml') if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) with open(output_file, 'w+') as outfile: outfile.write(h.serialize(_mets)) print "compile_mets created file: %s" % output_file if args.copy_files: copy_files(args.workspace, args.base_path) print "compile_mets copied objects from %s to workspace" % \ args.base_path if args.clean: clean_metsparts(args.workspace) print "compile_mets cleaned work files from workspace" return 0
def compile_mets(mets_profile, organization_name, contractid, objid=None, label=None, contentid=None, create_date=None, last_moddate=None, record_status="submission", workspace="./workspace", clean=False, copy_files=False, base_path=".", stdout=False, packagingservice=None): """Merge partial METS documents in workspace directory into one METS document.""" contract = "urn:uuid:%s" % contractid if not objid: objid = six.text_type(uuid.uuid4()) if not create_date: create_date = datetime.datetime.utcnow().isoformat() mets_document = create_mets(workspace, mets_attributes={ 'PROFILE': mets_profile, 'OBJID': objid, 'LABEL': label, "CONTENTID": contentid, "CONTRACTID": contract }, metshdr_attributes={ "CREATEDATE": create_date, "LASTMODDATE": last_moddate, "RECORDSTATUS": record_status }, organization=organization_name, packagingservice=packagingservice) if stdout: print(xml_utils.serialize(mets_document.getroot())) output_file = os.path.join(workspace, 'mets.xml') if not os.path.exists(os.path.dirname(output_file)): os.makedirs(os.path.dirname(output_file)) with open(output_file, 'wb+') as outfile: outfile.write(xml_utils.serialize(mets_document.getroot())) print("compile_mets created file: %s" % output_file) if copy_files: copy_objects(workspace, base_path) print("compile_mets copied objects from %s to workspace" % base_path) if clean: clean_metsparts(workspace) print("compile_mets cleaned work files from workspace")
def run(self): """Create structural map. Creates METS fileSec element based on contents of `sip-in-progress` directory and writes it to METS document `filesec.xml`. FileSec element is used to create physical structure map which is written to METS document `structmap.xml`. :returns: ``None`` """ # Merge premis event reference files md_ids = [] for input_target in ('create_provenance_information', 'create_descriptive_metadata', 'create_technical_metadata'): md_ids += (read_md_references( self.workspace, self.input()[input_target].path)['.']['md_ids']) with open(os.path.join(self.sip_creation_path, 'premis-event-md-references.jsonl'), 'w') \ as references: references.write( json.dumps({ ".": { "path_type": "directory", "streams": {}, "md_ids": md_ids } })) # Setup required reference list and supplementary files information. (all_amd_refs, all_dmd_refs, object_refs, filelist, file_properties) = get_reference_lists( workspace=self.sip_creation_path) (supplementary_files, supplementary_types) = iter_supplementary( file_properties=file_properties) # Create fileSec (filesec, file_ids) = compile_structmap.create_filesec( all_amd_refs=all_amd_refs, object_refs=object_refs, file_properties=file_properties, supplementary_files=supplementary_files, supplementary_types=supplementary_types) with self.output()[0].open('wb') as filesecxml: filesecxml.write(serialize(filesec)) # Create physical structmap structmap = compile_structmap.create_structmap( filesec=filesec, structmap_type='Fairdata-physical', file_ids=file_ids, all_amd_refs=all_amd_refs, all_dmd_refs=all_dmd_refs, filelist=filelist, supplementary_files=supplementary_files, supplementary_types=supplementary_types, file_properties=file_properties, workspace=self.sip_creation_path) with self.output()[1].open('wb') as structmapxml: structmap.write(structmapxml, pretty_print=True, xml_declaration=True, encoding='UTF-8')