Esempio n. 1
0
    def __init__(self, record_element, strip_ns=True):
        super(Record, self).__init__(record_element, strip_ns=strip_ns)
        self.header = Header(
            self.xml.find('.//' + self._oai_namespace + 'header'))
        self.deleted = self.header.deleted
        self.metadata = {}
        tree = self.xml.find(".//" + self._oai_namespace +
                             "metadata/{http://arxiv.org/OAI/arXiv/}arXiv")
        if self.deleted is False and tree is not None:

            for element in tree.getchildren():
                # remove namespace from tag
                tag = element.tag.replace("{http://arxiv.org/OAI/arXiv/}", "")
                text = element.text
                if tag == "authors":
                    text = self.parse_authors(element)
                elif tag == "created" or tag == "updated":
                    text = datetime.strptime(text, "%Y-%m-%d")
                elif tag == "title":
                    text = text.strip()
                elif tag == "id":
                    # rename
                    tag = "identifier"

                self.metadata[tag] = text
            self.metadata["mdate"] = datetime.strptime(self.header.datestamp,
                                                       "%Y-%m-%d")
Esempio n. 2
0
File: utils.py Progetto: llcit/llt
 def __init__(self, record_element, strip_ns=True):
     super(LltRecord, self).__init__(record_element, strip_ns=strip_ns)
     self.header = Header(
         self.xml.find('.//' + self._oai_namespace + 'header'))
     if not self.header.deleted:
         tree = self.xml.find('.//' + self._oai_namespace +
                              'metadata').getchildren()[0]
         self.metadata = dim_xml_to_dict(tree)
Esempio n. 3
0
 def __init__(self, record_element, strip_ns=False):
     super(FRDRRecord, self).__init__(record_element, strip_ns=strip_ns)
     self.header = Header(
         self.xml.find('.//' + self._oai_namespace + 'header'))
     self.deleted = self.header.deleted
     if not self.deleted:
         self.metadata = self.xml_to_dict(
             self.xml.find('.//' + self._oai_namespace +
                           'metadata').getchildren()[0])
Esempio n. 4
0
def get_oai_header_data(header: Header = None, xml: _Element = None):
    if not (header or xml):  # pragma: no cover
        raise Exception("Must provide header or xml")
    if header and xml:  # pragma: no cover
        raise Exception("You must provide only header or xml")
    if xml:
        header = Header(xml.find('.//' + get_namespace(xml) + 'header'))
    datestamp = header.datestamp
    oai_identifier = header.identifier
    deleted = header.deleted
    return datestamp, deleted, oai_identifier
Esempio n. 5
0
def test_get_oai_header_data(load_entry_points, app, db, record_xml):
    header_xml = record_xml[0]
    header = Header(header_xml)
    res_tuple = get_oai_header_data(header)
    assert res_tuple == ('2017-09-11T08:12:53Z', False,
                         'oai:dspace.cuni.cz:20.500.11956/2623')