def __init__(self, record_element, strip_ns=True): super(Record, self).__init__(record_element, strip_ns=strip_ns) self.header = Header( self.xml.find('.//' + self._oai_namespace + 'header')) self.deleted = self.header.deleted self.metadata = {} tree = self.xml.find(".//" + self._oai_namespace + "metadata/{http://arxiv.org/OAI/arXiv/}arXiv") if self.deleted is False and tree is not None: for element in tree.getchildren(): # remove namespace from tag tag = element.tag.replace("{http://arxiv.org/OAI/arXiv/}", "") text = element.text if tag == "authors": text = self.parse_authors(element) elif tag == "created" or tag == "updated": text = datetime.strptime(text, "%Y-%m-%d") elif tag == "title": text = text.strip() elif tag == "id": # rename tag = "identifier" self.metadata[tag] = text self.metadata["mdate"] = datetime.strptime(self.header.datestamp, "%Y-%m-%d")
def __init__(self, record_element, strip_ns=True): super(LltRecord, self).__init__(record_element, strip_ns=strip_ns) self.header = Header( self.xml.find('.//' + self._oai_namespace + 'header')) if not self.header.deleted: tree = self.xml.find('.//' + self._oai_namespace + 'metadata').getchildren()[0] self.metadata = dim_xml_to_dict(tree)
def __init__(self, record_element, strip_ns=False): super(FRDRRecord, self).__init__(record_element, strip_ns=strip_ns) self.header = Header( self.xml.find('.//' + self._oai_namespace + 'header')) self.deleted = self.header.deleted if not self.deleted: self.metadata = self.xml_to_dict( self.xml.find('.//' + self._oai_namespace + 'metadata').getchildren()[0])
def get_oai_header_data(header: Header = None, xml: _Element = None): if not (header or xml): # pragma: no cover raise Exception("Must provide header or xml") if header and xml: # pragma: no cover raise Exception("You must provide only header or xml") if xml: header = Header(xml.find('.//' + get_namespace(xml) + 'header')) datestamp = header.datestamp oai_identifier = header.identifier deleted = header.deleted return datestamp, deleted, oai_identifier
def test_get_oai_header_data(load_entry_points, app, db, record_xml): header_xml = record_xml[0] header = Header(header_xml) res_tuple = get_oai_header_data(header) assert res_tuple == ('2017-09-11T08:12:53Z', False, 'oai:dspace.cuni.cz:20.500.11956/2623')