def _getHumanStartPage(self, lxmlNode): didl_hsp_item = lxmlNode.xpath( '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/humanStartPage"]', namespaces=self._nsMap) if len(didl_hsp_item) == 0: didl_hsp_item = lxmlNode.xpath( '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/humanStartPage"]', namespaces=self._nsMap) if len(didl_hsp_item) > 0: self.do.logMsg(self._uploadid, LOGGER9, prefix=STR_DIDL) if len(didl_hsp_item) == 0: didl_hsp_item = lxmlNode.xpath( '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/humanStartPage"]', namespaces=self._nsMap) if len(didl_hsp_item) > 0: self.do.logMsg(self._uploadid, LOGGER10, prefix=STR_DIDL) if len(didl_hsp_item) == 0: self.do.logMsg(self._uploadid, LOGGER11, prefix=STR_DIDL) return "" uriref = didl_hsp_item[0].xpath( 'self::didl:Item/didl:Component/didl:Resource/@ref', namespaces=self._nsMap) mimetype = didl_hsp_item[0].xpath( 'self::didl:Item/didl:Component/didl:Resource/@mimeType', namespaces=self._nsMap) if len(mimetype) == 0: self.do.logMsg(self._uploadid, LOGGER13, prefix=STR_DIDL) if len(mimetype) > 0 and not comm.isMimeType(mimetype[0]): self.do.logMsg(self._uploadid, LOGGER12 + mimetype[0], prefix=STR_DIDL) if len(uriref) == 0 or not comm.isURL(uriref[0]): raise ValidateException( formatExceptionLine(EXCEPTION11, prefix=STR_DIDL)) return """<didl:Item> <didl:Descriptor> <didl:Statement mimeType="application/xml"> <rdf:type rdf:resource="info:eu-repo/semantics/humanStartPage"/> </didl:Statement> </didl:Descriptor> <didl:Component> <didl:Resource ref="%s" mimeType="%s"/> </didl:Component> </didl:Item>""" % (escapeXml(comm.urlQuote( uriref[0].strip())), escapeXml(mimetype[0]))
def _getObjectfiles(self, lxmlNode): of_container = '' objectfiles = lxmlNode.xpath( '//didl:DIDL/didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/objectFile"]', namespaces=self._nsMap) if len(objectfiles) == 0: objectfiles = lxmlNode.xpath( '//didl:DIDL/didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/objectFile"]', namespaces=self._nsMap) if len(objectfiles) > 0: self.do.logMsg(self._uploadid, LOGGER6, prefix=STR_DIDL) if len(objectfiles) == 0: objectfiles = lxmlNode.xpath( '//didl:DIDL/didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/objectFile"]', namespaces=self._nsMap) if len(objectfiles) > 0: self.do.logMsg(self._uploadid, LOGGER7, prefix=STR_DIDL) for objectfile in objectfiles: #1:Define correct ObjectFile descriptor: of_container += '<didl:Item><didl:Descriptor><didl:Statement mimeType="application/xml"><rdf:type rdf:resource="info:eu-repo/semantics/objectFile"/></didl:Statement></didl:Descriptor>' #2: Check geldige Identifier (feitelijk verplicht, hoewel vaak niet geimplemeteerd...) pi = objectfile.xpath( 'self::didl:Item/didl:Descriptor/didl:Statement/dii:Identifier/text()', namespaces=self._nsMap) if len(pi) > 0: of_container += descr_templ % ('<dii:Identifier>' + escapeXml( pi[0].strip()) + '</dii:Identifier>') #3: Check op geldige AccessRights: arights = objectfile.xpath( 'self::didl:Item/didl:Descriptor/didl:Statement/dcterms:accessRights/text()', namespaces=self._nsMap) if len(arights) > 0: for key, value in accessRights.iteritems(): if arights[0].strip().lower().find(key) >= 0: of_container += descr_templ % ( '<dcterms:accessRights>' + value + '</dcterms:accessRights>') break else: raise ValidateException( formatExceptionLine(arights[0] + EXCEPTION12, prefix=STR_DIDL)) else: raise ValidateException( formatExceptionLine(EXCEPTION8, prefix=STR_DIDL)) #4: Check geldige datemodified (feitelijk verplicht, hoewel vaak niet geimplemeteerd...) modified = objectfile.xpath( 'self::didl:Item/didl:Descriptor/didl:Statement/dcterms:modified/text()', namespaces=self._nsMap) if len(modified) > 0 and comm.isISO8601(modified[0]): of_container += descr_templ % ('<dcterms:modified>' + modified[0].strip() + '</dcterms:modified>') #5: Check for 'file' description: descr = objectfile.xpath( 'self::didl:Item/didl:Descriptor/didl:Statement/dc:description/text()', namespaces=self._nsMap) if len(descr) > 0: of_container += descr_templ % ('<dc:description>' + escapeXml( descr[0].strip()) + '</dc:description>') ## SKIPPING: Not in EduStandaard. #6.0: Check for embargo: # embargo = objectfile.xpath('self::didl:Item/didl:Descriptor/didl:Statement/dcterms:available/text()', namespaces=self._nsMap) # if len(embargo) > 0 and comm.isISO8601(embargo[0]): # of_container += descr_templ % ('<dcterms:available>'+embargo[0].strip()+'</dcterms:available>') ## SKIPPING: Not in EduStandaard. #6.1: Check for dateSubmitted: # dembargo = objectfile.xpath('self::didl:Item/didl:Descriptor/didl:Statement/dcterms:dateSubmitted/text()', namespaces=self._nsMap) # if len(dembargo) > 0 and comm.isISO8601(dembargo[0]): # of_container += descr_templ % ('<dcterms:dateSubmitted>'+dembargo[0].strip()+'</dcterms:dateSubmitted>') # else: # #6.2: Check for issued (depricated, normalize to dateSubmitted): # issued = objectfile.xpath('self::didl:Item/didl:Descriptor/didl:Statement/dcterms:issued/text()', namespaces=self._nsMap) # if len(issued) > 0 and comm.isISO8601(issued[0]): # of_container += descr_templ % ('<dcterms:dateSubmitted>'+issued[0].strip()+'</dcterms:dateSubmitted>') #7: Check for published version(author/publisher): pubVersion = objectfile.xpath( 'self::didl:Item/didl:Descriptor/didl:Statement/rdf:type/@rdf:resource', namespaces=self._nsMap) if len( pubVersion ) > 0: ## Both (author/publisher) may be available: we'll take the first one... for key, value in pubVersions.iteritems(): if pubVersion[0].strip().lower().find(key) >= 0: of_container += descr_templ % ( '<rdf:type rdf:resource="' + value + '"/>') break #8:Check for MANDATORY resources and mimetypes: didl_resources = objectfile.xpath( 'self::didl:Item/didl:Component/didl:Resource[@mimeType and @ref]', namespaces=self._nsMap) resources = '' _url_list = [] for resource in didl_resources: mimeType = resource.xpath('self::didl:Resource/@mimeType', namespaces=self._nsMap) uri = resource.xpath('self::didl:Resource/@ref', namespaces=self._nsMap) ## We need both mimeType and URI: (MIMETYPE is required by DIDL schema, @ref not). if len(mimeType) > 0 and len(uri) > 0: if not comm.isMimeType(mimeType[0]): self.do.logMsg(self._uploadid, LOGGER8 + mimeType[0], prefix=STR_DIDL) if comm.isURL(uri[0].strip()): resources += """<didl:Resource mimeType="%s" ref="%s"/>""" % ( escapeXml(mimeType[0].strip()), escapeXml(comm.urlQuote(uri[0].strip()))) _url_list.append( """<didl:Resource mimeType="%s" ref="%s"/>""" % (escapeXml(mimeType[0].strip()), escapeXml(comm.urlQuote(uri[0].strip())))) else: raise ValidateException( formatExceptionLine(EXCEPTION9 + uri[0], prefix=STR_DIDL)) if resources != '': of_container += """<didl:Component> %s </didl:Component>""" % (resources) else: raise ValidateException( formatExceptionLine(EXCEPTION10, prefix=STR_DIDL)) of_container += '</didl:Item>' return of_container
def _getTopItem(self, lxmlNode): ## Wrappers: pid, modified, mimetype, pidlocation = '', '', "application/xml", '' #1: Get persistentIdentifier: pidlist = lxmlNode.xpath( '//didl:DIDL/didl:Item/didl:Descriptor/didl:Statement/dii:Identifier/text()', namespaces=self._nsMap) if len(pidlist) > 0: pid = pidlist[0].strip() if not comm.isURNNBN(pid): raise ValidateException( formatExceptionLine(EXCEPTION0 + pid, prefix=STR_DIDL)) else: raise ValidateException( formatExceptionLine(EXCEPTION1, prefix=STR_DIDL)) #2: Get toplevel modificationDate: comm.isISO8601() tl_modified = lxmlNode.xpath( '//didl:DIDL/didl:Item/didl:Descriptor/didl:Statement/dcterms:modified/text()', namespaces=self._nsMap) ## Check op geldig/aanwezigheid tlModified, anders exception: if len(tl_modified) > 0 and not comm.isISO8601(tl_modified[0]): raise ValidateException( formatExceptionLine(EXCEPTION2 + tl_modified[0], prefix=STR_DIDL)) elif len(tl_modified) == 0: raise ValidateException( formatExceptionLine(EXCEPTION3, prefix=STR_DIDL)) ## Get all modified dates: all_modified = lxmlNode.xpath( '//didl:Item/didl:Descriptor/didl:Statement/dcterms:modified/text()', namespaces=self._nsMap) ## Get most recent date from all items, to add to toplevelItem: if len(all_modified) > 0: datedict = {} for date in all_modified: if comm.isISO8601(date.strip()): #datedict[parseDate(date.strip())] = date.strip() pd = parseDate(date.strip()) datedict["%s %s" % (str(pd.date()), str(pd.time()))] = date.strip() ## Get first sorted key: for key in reversed(sorted(datedict.iterkeys())): modified = datedict[key] break if not tl_modified[0].strip() == modified: self.do.logMsg(self._uploadid, LOGGER1, prefix=STR_DIDL) #3: Get PidResourceMimetype mimetypelist = lxmlNode.xpath( '//didl:DIDL/didl:Item/didl:Component/didl:Resource/@mimeType', namespaces=self._nsMap) if len(mimetypelist) > 0: mimetype = mimetypelist[0].strip() if not comm.isMimeType(mimetype): self.do.logMsg(self._uploadid, LOGGER2 + mimetype, prefix=STR_DIDL) #4: Get PidResourceLocation: pidlocation = self._findAndBindFirst( lxmlNode, '%s', '//didl:DIDL/didl:Item/didl:Component/didl:Resource/@ref', '//didl:DIDL/didl:Item/didl:Component/didl:Resource/text()' '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/humanStartPage"]/didl:Component/didl:Resource/@ref', #DIDL 3.0 '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/humanStartPage"]/didl:Component/didl:Resource/@ref', #DIDL 3.0, without @rdf:resource '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/humanStartPage"]/didl:Component/didl:Resource/@ref', #fallback DIDL 2.3.1 '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/objectFile"]/didl:Component/didl:Resource/@ref', #fallback DIDL 3.0 '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/objectFile"]/didl:Component/didl:Resource/@ref', #fallback DIDL 3.0, without @rdf:resource '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/objectFile"]/didl:Component/didl:Resource/@ref' #fallback DIDL 2.3.1 ).strip() if pidlocation == '': raise ValidateException( formatExceptionLine(EXCEPTION4, prefix=STR_DIDL)) if not comm.isURL(pidlocation): raise ValidateException( formatExceptionLine(EXCEPTION5 + pidlocation, prefix=STR_DIDL)) return """<didl:Item> <didl:Descriptor><didl:Statement mimeType="application/xml"><dii:Identifier>%s</dii:Identifier></didl:Statement></didl:Descriptor> <didl:Descriptor><didl:Statement mimeType="application/xml"><dcterms:modified>%s</dcterms:modified></didl:Statement></didl:Descriptor> <didl:Component><didl:Resource mimeType="%s" ref="%s"/></didl:Component>""" % ( escapeXml(pid), modified, escapeXml(mimetype), comm.urlQuote(pidlocation))