def _parse_xml_document(self): if self.xml_document is not None: try: s_l.info("Attempting to parse the Statement XML document") self.dom = etree.fromstring(self.xml_document) self.parsed = True except Exception, e: s_l.error("Failed to parse document - %s" % e) s_l.error("XML document begins:\n %s" % self.xml_document[:300])
def __init__(self, **kw): """Create a basic `Entry` document, setting the generator and a timestamp for the updated element value. Any keyword parameters passed in will be passed to the add_fields method and added to the entry bootstrap document. It's currently not possible to add a namespace and use it within the init call.""" self.entry = etree.fromstring(self.bootstrap) if not 'updated' in kw.keys(): kw['updated'] = datetime.now().isoformat() self.add_fields(**kw)
def __init__(self, **kw): """Create a basic `Entry` document, setting the generator and a timestamp for the updated element value. Any keyword parameters passed in will be passed to the add_fields method and added to the entry bootstrap document. It's currently not possible to add a namespace and use it within the init call.""" # create a namespace map which we'll use in all of the elements self.nsmap = {"dcterms" : "http://purl.org/dc/terms/", "atom" : "http://www.w3.org/2005/Atom"} self.entry = etree.fromstring(self.bootstrap) if not 'updated' in kw.keys(): kw['updated'] = datetime.now().isoformat() self.add_fields(**kw)
def load_document(self, xml_response): #try: if True: if self.sd_uri: sd_l.debug("Attempting to load service document for %s" % self.sd_uri) else: sd_l.debug("Attempting to load service document") self.raw_response = xml_response self.service_dom = etree.fromstring(xml_response) self.parsed = True self.valid = self.validate() sd_l.info("Initial SWORD2 validation checks on service document - Valid document? %s" % self.valid) self._enumerate_workspaces() """
def __init__(self, atomEntryXml=None, **kw): """Create a basic `Entry` document, setting the generator and a timestamp for the updated element value. Any keyword parameters passed in will be passed to the add_fields method and added to the entry bootstrap document. It's currently not possible to add a namespace and use it within the init call.""" # create a namespace map which we'll use in all of the elements self.nsmap = { "dcterms": "http://purl.org/dc/terms/", "atom": "http://www.w3.org/2005/Atom" } self.entry = etree.fromstring( self.bootstrap if not atomEntryXml else atomEntryXml) if not 'updated' in kw.keys(): kw['updated'] = datetime.now().isoformat() self.add_fields(**kw)
def __init__(self, xml_document): self.xml_document = xml_document self.parsed = False self.first = None self.next = None self.previous = None self.last = None self.categories = [] self.entries = [] try: coll_l.info("Attempting to parse the Feed XML document") self.feed = etree.fromstring(xml_document) self.parsed = True except Exception, e: coll_l.error("Failed to parse document - %s" % e) coll_l.error("XML document begins:\n %s" % xml_document[:300])
def load_document(self, xml_response): #try: if True: if self.sd_uri: sd_l.debug("Attempting to load service document for %s" % self.sd_uri) else: sd_l.debug("Attempting to load service document") self.raw_response = xml_response self.service_dom = etree.fromstring(xml_response) self.parsed = True self.valid = self.validate() sd_l.info( "Initial SWORD2 validation checks on service document - Valid document? %s" % self.valid) self._enumerate_workspaces() """
def load_document(self, xml_response): try: if self.sd_uri: sd_l.debug("Attempting to load service document for %s" % self.sd_uri) else: sd_l.debug("Attempting to load service document") self.raw_response = xml_response self.service_dom = etree.fromstring(xml_response) self.parsed = True self.valid = self.validate() sd_l.info("Initial SWORD2 validation checks on service document - Valid document? %s" % self.valid) self._enumerate_workspaces() except Exception, e: # Due to variability of underlying etree implementations, catching all # exceptions... sd_l.error("Could not parse the Service Document response from the server - %s" % e) sd_l.debug("Received the following raw response:") sd_l.debug(self.raw_response)
def load_document(self, xml_response): try: if self.sd_uri: sd_l.debug("Attempting to load service document for %s" % self.sd_uri) else: sd_l.debug("Attempting to load service document") self.raw_response = xml_response self.service_dom = etree.fromstring(xml_response) self.parsed = True self.valid = self.validate() sd_l.info( "Initial SWORD2 validation checks on service document - Valid document? %s" % self.valid) self._enumerate_workspaces() except Exception, e: # Due to variability of underlying etree implementations, catching all # exceptions... sd_l.error( "Could not parse the Service Document response from the server - %s" % e) sd_l.debug("Received the following raw response:") sd_l.debug(self.raw_response)
def __init__(self, xml_deposit_receipt=None, dom=None, response_headers={}, location=None, code=0): """ `Deposit_Receipt` - provides convenience methods for extracting information from the Deposit Receipts sent back by the SWORD2-compliant server for many transactions. #BETASWORD2URL See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt Transactions carried out by `sword2.Connection` will return a `Deposit_Receipt` object, if a deposit receipt document is sent back by the server. Usage: >>> from sword2 import Deposit_Receipt .... get the XML text for a Deposit Receipt in the variable `doc` # Parse the response: >>> dr = Deposit_Receipt(xml_deposit_receipt = doc) # Check that the response is parsable (valid XML) and is SWORD2-compliant >>> assert dr.parsed == True >>> assert dr.valid == True Availible attributes: Atom convenience attribs -- corresponds to (type of object that is held) `self.title` -- <atom:title> (`str`) `self.id` -- <id> (`str`) `self.updated` -- <updated> (`str`) `self.summary` -- <atom:summary> (`str`) `self.categories` -- <category> (`list` of `sword2.Category`) IRI/URIs `self.edit` -- The Edit-IRI (`str`) <link rel="edit"> `self.edit_media` -- The Edit-Media-IRI (`str`) <link rel="edit-media"> `self.edit_media_feed` -- The Edit-Media-IRI [Atom Feed] (`str`) <link rel="edit-media" type="application/atom+xml;type=feed"> `self.alternate` -- A link which, according to the spec, (`str`) "points to the splash page of the item on the server" `self.se_iri` -- The SWORD2 Edit IRI (SE-IRI), defined by (`str`) <link rel="http://purl.org/net/sword/terms/add"> which MAY be the same as the Edit-IRI `self.cont_iri` -- The Content-IRI (`str`) eg `src` from <content type="application/zip" src="http://swordapp.org/cont-IRI/43/my_deposit"/> `self.content` -- All Content-IRIs (`dict` with the src or Content-IRI as the key, with a `dict` of the other attributes as its value `self.links` -- All links elements in a `dict`, with the 'rel' value being used as its key. The values of this are `list`s with a `dict` of attributes for each item, corresponding to the information in a single <link> element. SWORD2 links for "http://purl.org/net/sword/terms/originalDeposit" and "http://purl.org.net/sword/terms/derivedResource" are to be found in `self.links` eg >>> dr.links.get("http://purl.org.net/sword/terms/derivedResource") {'href': "....", 'type':'application/pdf'} General metadata: `self.metadata` -- Simple metadata access. A `dict` where the keys are equivalent to the prefixed element names, with an underscore(_) replacing the colon (:) eg "<dcterms:title>" in the deposit receipt would be accessible in this attribute, under the key of 'dcterms_title' eg >>> dr.metadata.get("dcterms_title") "The Origin of Species" >>> dr.metadata.get("dcterms_madeupelement") `None` `self.packaging` -- sword:packaging elements declaring the formats that the Media Resource can be retrieved in (`list` of `str`) `self.response_headers` -- The HTTP response headers that accompanied this receipt `self.location` -- The location, if given (from HTTP Header: "Location: ....") """ self.dom = None # this will be populated below self.parsed = False self.valid = False self.response_headers=response_headers self.location = location self.content = None self.code = code self.metadata = {} self.links = {} self.edit = location # default to the location, which should always be the same as the edit-iri self.edit_media = None self.edit_media_feed = None self.alternate = None self.se_iri = None self.atom_statement_iri = None self.ore_statement_iri = None # Atom convenience attribs self.title = None self.id = None self.updated = None self.summary = None self.packaging = [] self.categories = [] self.content = {} self.cont_iri = None # first construct or set the dom if xml_deposit_receipt: try: self.dom = etree.fromstring(xml_deposit_receipt) self.parsed = True except Exception, e: d_l.error("Was not able to parse the deposit receipt as XML.") return
def __init__(self, xml_deposit_receipt=None, dom=None, response_headers={}, location=None, code=0): """ `Deposit_Receipt` - provides convenience methods for extracting information from the Deposit Receipts sent back by the SWORD2-compliant server for many transactions. #BETASWORD2URL See Section 10. Deposit Receipt: http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#depositreceipt Transactions carried out by `sword2.Connection` will return a `Deposit_Receipt` object, if a deposit receipt document is sent back by the server. Usage: >>> from sword2 import Deposit_Receipt .... get the XML text for a Deposit Receipt in the variable `doc` # Parse the response: >>> dr = Deposit_Receipt(xml_deposit_receipt = doc) # Check that the response is parsable (valid XML) and is SWORD2-compliant >>> assert dr.parsed == True >>> assert dr.valid == True Availible attributes: Atom convenience attribs -- corresponds to (type of object that is held) `self.title` -- <atom:title> (`str`) `self.id` -- <id> (`str`) `self.updated` -- <updated> (`str`) `self.summary` -- <atom:summary> (`str`) `self.categories` -- <category> (`list` of `sword2.Category`) IRI/URIs `self.edit` -- The Edit-IRI (`str`) <link rel="edit"> `self.edit_media` -- The Edit-Media-IRI (`str`) <link rel="edit-media"> `self.edit_media_feed` -- The Edit-Media-IRI [Atom Feed] (`str`) <link rel="edit-media" type="application/atom+xml;type=feed"> `self.alternate` -- A link which, according to the spec, (`str`) "points to the splash page of the item on the server" `self.se_iri` -- The SWORD2 Edit IRI (SE-IRI), defined by (`str`) <link rel="http://purl.org/net/sword/terms/add"> which MAY be the same as the Edit-IRI `self.cont_iri` -- The Content-IRI (`str`) eg `src` from <content type="application/zip" src="http://swordapp.org/cont-IRI/43/my_deposit"/> `self.content` -- All Content-IRIs (`dict` with the src or Content-IRI as the key, with a `dict` of the other attributes as its value `self.links` -- All links elements in a `dict`, with the 'rel' value being used as its key. The values of this are `list`s with a `dict` of attributes for each item, corresponding to the information in a single <link> element. SWORD2 links for "http://purl.org/net/sword/terms/originalDeposit" and "http://purl.org.net/sword/terms/derivedResource" are to be found in `self.links` eg >>> dr.links.get("http://purl.org.net/sword/terms/derivedResource") {'href': "....", 'type':'application/pdf'} General metadata: `self.metadata` -- Simple metadata access. A `dict` where the keys are equivalent to the prefixed element names, with an underscore(_) replacing the colon (:) eg "<dcterms:title>" in the deposit receipt would be accessible in this attribute, under the key of 'dcterms_title' eg >>> dr.metadata.get("dcterms_title") "The Origin of Species" >>> dr.metadata.get("dcterms_madeupelement") `None` `self.packaging` -- sword:packaging elements declaring the formats that the Media Resource can be retrieved in (`list` of `str`) `self.response_headers` -- The HTTP response headers that accompanied this receipt `self.location` -- The location, if given (from HTTP Header: "Location: ....") """ self.dom = None # this will be populated below self.parsed = False self.valid = False self.response_headers = response_headers self.location = location self.content = None self.code = code self.metadata = {} self.links = {} self.edit = location # default to the location, which should always be the same as the edit-iri self.edit_media = None self.edit_media_feed = None self.alternate = None self.se_iri = None self.atom_statement_iri = None self.ore_statement_iri = None # Atom convenience attribs self.title = None self.id = None self.updated = None self.summary = None self.packaging = [] self.categories = [] self.content = {} self.cont_iri = None # first construct or set the dom if xml_deposit_receipt: try: # convert the string to a byte array so that it doesn't matter whether it has encoding declared or not # self.dom = etree.fromstring(bytes(xml_deposit_receipt)) if isinstance(xml_deposit_receipt, unicode): self.dom = etree.fromstring( xml_deposit_receipt.encode("utf-8")) else: self.dom = etree.fromstring(xml_deposit_receipt) self.parsed = True except Exception, e: d_l.error("Was not able to parse the deposit receipt as XML.") return