def get_rml_images(rml, role): """ image -> resources[0] """ result = {} rml_images = rml.findall(xmletree.prefixtag("rml", "image")) if rml_images is not None: resources = [] for rml_image in rml_images: if rml_image is not None: url = xmletree.get_element_text(rml_image) resource = metajson_service.create_resource_remote(url, None, role) if resource is not None: resources.append(resource) if resources: result["resources"] = resources return result
def didl_xmletree_to_metajson(root_item, source, rec_id_prefix): document = None resources = [] items = root_item.findall(xmletree.prefixtag("didl", "Item")) if items: for item in items: # item types item_types = [] item_date_modified = None descriptors = item.findall(xmletree.prefixtag("didl", "Descriptor")) if descriptors: for descriptor in descriptors: statements = descriptor.findall(xmletree.prefixtag("didl", "Statement")) if statements: for statement in statements: rdf_type = statement.find(xmletree.prefixtag("rdf", "type")) if rdf_type is not None: item_types.append(rdf_type.text) dcterms_modified = statement.find(xmletree.prefixtag("dcterms", "modified")) if dcterms_modified is not None: item_date_modified = dcterms_modified.text # logging.debug("item_types: {}".format(item_types)) if "info:eu-repo/semantics/descriptiveMetadata" in item_types: # metadata # logging.debug("metadata") component = item.find(xmletree.prefixtag("didl", "Component")) if component is not None: resource = component.find(xmletree.prefixtag("didl", "Resource")) if resource is not None: mods = resource.find(xmletree.prefixtag("mods", "mods")) if mods is not None: # logging.debug("mods") document = mods_crosswalk.mods_xmletree_to_metajson(mods, source, rec_id_prefix) if item_date_modified: document["rec_modified_date"] = item_date_modified elif "info:eu-repo/semantics/objectFile" in item_types: # resource # logging.debug("resource") url = None date_last_accessed = None relation_type = "publication" relation_version = None access_rights = "openAccess" rec_state = "published" format_mimetype = None rec_created_date = None rec_modified_date = None if "info:eu-repo/semantics/publishedVersion" in item_types: relation_version = "publishedVersion" elif "info:eu-repo/semantics/authorVersion" in item_types: relation_version = "authorVersion" if item_date_modified: rec_modified_date = item_date_modified component = item.find(xmletree.prefixtag("didl", "Component")) if component is not None: didl_resource = component.find(xmletree.prefixtag("didl", "Resource")) if didl_resource is not None: url = didl_resource.get("ref") format_mimetype = didl_resource.get("mimeType") resource = metajson_service.create_resource_remote( url, date_last_accessed, relation_type, relation_version, access_rights, rec_state, format_mimetype, rec_created_date, rec_modified_date, ) resources.append(resource) if document and resources: document["resources"] = resources return document
def didl_xmletree_to_metajson(root_item, source, rec_id_prefix): document = None resources = [] items = root_item.findall(xmletree.prefixtag("didl", "Item")) if items: for item in items: # item types item_types = [] item_date_modified = None descriptors = item.findall(xmletree.prefixtag("didl", "Descriptor")) if descriptors: for descriptor in descriptors: statements = descriptor.findall(xmletree.prefixtag("didl", "Statement")) if statements: for statement in statements: rdf_type = statement.find(xmletree.prefixtag("rdf", "type")) if rdf_type is not None: item_types.append(rdf_type.text) dcterms_modified = statement.find(xmletree.prefixtag("dcterms", "modified")) if dcterms_modified is not None: item_date_modified = dcterms_modified.text #logging.debug("item_types: {}".format(item_types)) if 'info:eu-repo/semantics/descriptiveMetadata' in item_types: # metadata #logging.debug("metadata") component = item.find(xmletree.prefixtag("didl", "Component")) if component is not None: resource = component.find(xmletree.prefixtag("didl", "Resource")) if resource is not None: mods = resource.find(xmletree.prefixtag("mods", "mods")) if mods is not None: #logging.debug("mods") document = mods_crosswalk.mods_xmletree_to_metajson(mods, source, rec_id_prefix) if item_date_modified: document["rec_modified_date"] = item_date_modified elif 'info:eu-repo/semantics/objectFile' in item_types: # resource #logging.debug("resource") url = None date_last_accessed = None relation_type = "publication" relation_version = None access_rights = "openAccess" rec_state = "published" format_mimetype = None rec_created_date = None rec_modified_date = None if 'info:eu-repo/semantics/publishedVersion' in item_types: relation_version = "publishedVersion" elif 'info:eu-repo/semantics/authorVersion' in item_types: relation_version = "authorVersion" if item_date_modified: rec_modified_date = item_date_modified component = item.find(xmletree.prefixtag("didl", "Component")) if component is not None: didl_resource = component.find(xmletree.prefixtag("didl", "Resource")) if didl_resource is not None: url = didl_resource.get("ref") format_mimetype = didl_resource.get("mimeType") resource = metajson_service.create_resource_remote(url, date_last_accessed, relation_type, relation_version, access_rights, rec_state, format_mimetype, rec_created_date, rec_modified_date) resources.append(resource) if document and resources: document["resources"] = resources return document