Esempio n. 1
0
    def _getHumanStartPage(self, lxmlNode):

        didl_hsp_item = lxmlNode.xpath(
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/humanStartPage"]',
            namespaces=self._nsMap)
        if len(didl_hsp_item) == 0:
            didl_hsp_item = lxmlNode.xpath(
                '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/humanStartPage"]',
                namespaces=self._nsMap)
            if len(didl_hsp_item) > 0:
                self.do.logMsg(self._uploadid, LOGGER9, prefix=STR_DIDL)
            if len(didl_hsp_item) == 0:
                didl_hsp_item = lxmlNode.xpath(
                    '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/humanStartPage"]',
                    namespaces=self._nsMap)
                if len(didl_hsp_item) > 0:
                    self.do.logMsg(self._uploadid, LOGGER10, prefix=STR_DIDL)
                if len(didl_hsp_item) == 0:
                    self.do.logMsg(self._uploadid, LOGGER11, prefix=STR_DIDL)
                    return ""

        uriref = didl_hsp_item[0].xpath(
            'self::didl:Item/didl:Component/didl:Resource/@ref',
            namespaces=self._nsMap)
        mimetype = didl_hsp_item[0].xpath(
            'self::didl:Item/didl:Component/didl:Resource/@mimeType',
            namespaces=self._nsMap)

        if len(mimetype) == 0:
            self.do.logMsg(self._uploadid, LOGGER13, prefix=STR_DIDL)

        if len(mimetype) > 0 and not comm.isMimeType(mimetype[0]):
            self.do.logMsg(self._uploadid,
                           LOGGER12 + mimetype[0],
                           prefix=STR_DIDL)

        if len(uriref) == 0 or not comm.isURL(uriref[0]):
            raise ValidateException(
                formatExceptionLine(EXCEPTION11, prefix=STR_DIDL))

        return """<didl:Item>
                    <didl:Descriptor>
                        <didl:Statement mimeType="application/xml">
                            <rdf:type rdf:resource="info:eu-repo/semantics/humanStartPage"/>
                        </didl:Statement>
                    </didl:Descriptor>
                    <didl:Component>
                        <didl:Resource ref="%s" mimeType="%s"/>
                    </didl:Component>
                </didl:Item>""" % (escapeXml(comm.urlQuote(
            uriref[0].strip())), escapeXml(mimetype[0]))
Esempio n. 2
0
    def _getObjectfiles(self, lxmlNode):
        of_container = ''
        objectfiles = lxmlNode.xpath(
            '//didl:DIDL/didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/objectFile"]',
            namespaces=self._nsMap)
        if len(objectfiles) == 0:
            objectfiles = lxmlNode.xpath(
                '//didl:DIDL/didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/objectFile"]',
                namespaces=self._nsMap)
            if len(objectfiles) > 0:
                self.do.logMsg(self._uploadid, LOGGER6, prefix=STR_DIDL)
        if len(objectfiles) == 0:
            objectfiles = lxmlNode.xpath(
                '//didl:DIDL/didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/objectFile"]',
                namespaces=self._nsMap)
            if len(objectfiles) > 0:
                self.do.logMsg(self._uploadid, LOGGER7, prefix=STR_DIDL)
        for objectfile in objectfiles:
            #1:Define correct ObjectFile descriptor:
            of_container += '<didl:Item><didl:Descriptor><didl:Statement mimeType="application/xml"><rdf:type rdf:resource="info:eu-repo/semantics/objectFile"/></didl:Statement></didl:Descriptor>'

            #2: Check geldige Identifier (feitelijk verplicht, hoewel vaak niet geimplemeteerd...)
            pi = objectfile.xpath(
                'self::didl:Item/didl:Descriptor/didl:Statement/dii:Identifier/text()',
                namespaces=self._nsMap)
            if len(pi) > 0:
                of_container += descr_templ % ('<dii:Identifier>' + escapeXml(
                    pi[0].strip()) + '</dii:Identifier>')

        #3: Check op geldige AccessRights:
            arights = objectfile.xpath(
                'self::didl:Item/didl:Descriptor/didl:Statement/dcterms:accessRights/text()',
                namespaces=self._nsMap)
            if len(arights) > 0:
                for key, value in accessRights.iteritems():
                    if arights[0].strip().lower().find(key) >= 0:
                        of_container += descr_templ % (
                            '<dcterms:accessRights>' + value +
                            '</dcterms:accessRights>')
                        break
                else:
                    raise ValidateException(
                        formatExceptionLine(arights[0] + EXCEPTION12,
                                            prefix=STR_DIDL))
            else:
                raise ValidateException(
                    formatExceptionLine(EXCEPTION8, prefix=STR_DIDL))

        #4: Check geldige datemodified (feitelijk verplicht, hoewel vaak niet geimplemeteerd...)
            modified = objectfile.xpath(
                'self::didl:Item/didl:Descriptor/didl:Statement/dcterms:modified/text()',
                namespaces=self._nsMap)
            if len(modified) > 0 and comm.isISO8601(modified[0]):
                of_container += descr_templ % ('<dcterms:modified>' +
                                               modified[0].strip() +
                                               '</dcterms:modified>')

        #5: Check for 'file' description:
            descr = objectfile.xpath(
                'self::didl:Item/didl:Descriptor/didl:Statement/dc:description/text()',
                namespaces=self._nsMap)
            if len(descr) > 0:
                of_container += descr_templ % ('<dc:description>' + escapeXml(
                    descr[0].strip()) + '</dc:description>')

        ## SKIPPING: Not in EduStandaard.
        #6.0: Check for embargo:
        #    embargo = objectfile.xpath('self::didl:Item/didl:Descriptor/didl:Statement/dcterms:available/text()', namespaces=self._nsMap)
        #    if len(embargo) > 0 and comm.isISO8601(embargo[0]):
        #        of_container += descr_templ % ('<dcterms:available>'+embargo[0].strip()+'</dcterms:available>')

        ## SKIPPING: Not in EduStandaard.
        #6.1: Check for dateSubmitted:
        #    dembargo = objectfile.xpath('self::didl:Item/didl:Descriptor/didl:Statement/dcterms:dateSubmitted/text()', namespaces=self._nsMap)
        #    if len(dembargo) > 0 and comm.isISO8601(dembargo[0]):
        #        of_container += descr_templ % ('<dcterms:dateSubmitted>'+dembargo[0].strip()+'</dcterms:dateSubmitted>')
        #    else:
        #        #6.2: Check for issued (depricated, normalize to dateSubmitted):
        #        issued = objectfile.xpath('self::didl:Item/didl:Descriptor/didl:Statement/dcterms:issued/text()', namespaces=self._nsMap)
        #        if len(issued) > 0 and comm.isISO8601(issued[0]):
        #            of_container += descr_templ % ('<dcterms:dateSubmitted>'+issued[0].strip()+'</dcterms:dateSubmitted>')

        #7: Check for published version(author/publisher):
            pubVersion = objectfile.xpath(
                'self::didl:Item/didl:Descriptor/didl:Statement/rdf:type/@rdf:resource',
                namespaces=self._nsMap)
            if len(
                    pubVersion
            ) > 0:  ## Both (author/publisher) may be available: we'll take the first one...
                for key, value in pubVersions.iteritems():
                    if pubVersion[0].strip().lower().find(key) >= 0:
                        of_container += descr_templ % (
                            '<rdf:type rdf:resource="' + value + '"/>')
                        break

        #8:Check for MANDATORY resources and mimetypes:
            didl_resources = objectfile.xpath(
                'self::didl:Item/didl:Component/didl:Resource[@mimeType and @ref]',
                namespaces=self._nsMap)
            resources = ''
            _url_list = []
            for resource in didl_resources:
                mimeType = resource.xpath('self::didl:Resource/@mimeType',
                                          namespaces=self._nsMap)
                uri = resource.xpath('self::didl:Resource/@ref',
                                     namespaces=self._nsMap)
                ## We need both mimeType and URI: (MIMETYPE is required by DIDL schema, @ref not).
                if len(mimeType) > 0 and len(uri) > 0:
                    if not comm.isMimeType(mimeType[0]):
                        self.do.logMsg(self._uploadid,
                                       LOGGER8 + mimeType[0],
                                       prefix=STR_DIDL)
                    if comm.isURL(uri[0].strip()):
                        resources += """<didl:Resource mimeType="%s" ref="%s"/>""" % (
                            escapeXml(mimeType[0].strip()),
                            escapeXml(comm.urlQuote(uri[0].strip())))
                        _url_list.append(
                            """<didl:Resource mimeType="%s" ref="%s"/>""" %
                            (escapeXml(mimeType[0].strip()),
                             escapeXml(comm.urlQuote(uri[0].strip()))))
                    else:
                        raise ValidateException(
                            formatExceptionLine(EXCEPTION9 + uri[0],
                                                prefix=STR_DIDL))

            if resources != '':
                of_container += """<didl:Component>
                %s
            </didl:Component>""" % (resources)
            else:
                raise ValidateException(
                    formatExceptionLine(EXCEPTION10, prefix=STR_DIDL))
            of_container += '</didl:Item>'
        return of_container
Esempio n. 3
0
    def _getTopItem(self, lxmlNode):
        ## Wrappers:
        pid, modified, mimetype, pidlocation = '', '', "application/xml", ''

        #1:     Get persistentIdentifier:
        pidlist = lxmlNode.xpath(
            '//didl:DIDL/didl:Item/didl:Descriptor/didl:Statement/dii:Identifier/text()',
            namespaces=self._nsMap)
        if len(pidlist) > 0:
            pid = pidlist[0].strip()
            if not comm.isURNNBN(pid):
                raise ValidateException(
                    formatExceptionLine(EXCEPTION0 + pid, prefix=STR_DIDL))
        else:
            raise ValidateException(
                formatExceptionLine(EXCEPTION1, prefix=STR_DIDL))

#2:     Get toplevel modificationDate: comm.isISO8601()
        tl_modified = lxmlNode.xpath(
            '//didl:DIDL/didl:Item/didl:Descriptor/didl:Statement/dcterms:modified/text()',
            namespaces=self._nsMap)
        ## Check op geldig/aanwezigheid tlModified, anders exception:
        if len(tl_modified) > 0 and not comm.isISO8601(tl_modified[0]):
            raise ValidateException(
                formatExceptionLine(EXCEPTION2 + tl_modified[0],
                                    prefix=STR_DIDL))
        elif len(tl_modified) == 0:
            raise ValidateException(
                formatExceptionLine(EXCEPTION3, prefix=STR_DIDL))

        ## Get all modified dates:
        all_modified = lxmlNode.xpath(
            '//didl:Item/didl:Descriptor/didl:Statement/dcterms:modified/text()',
            namespaces=self._nsMap)

        ## Get most recent date from all items, to add to toplevelItem:
        if len(all_modified) > 0:
            datedict = {}
            for date in all_modified:
                if comm.isISO8601(date.strip()):
                    #datedict[parseDate(date.strip())] = date.strip()
                    pd = parseDate(date.strip())
                    datedict["%s %s" %
                             (str(pd.date()), str(pd.time()))] = date.strip()

            ## Get first sorted key:
            for key in reversed(sorted(datedict.iterkeys())):
                modified = datedict[key]
                break
        if not tl_modified[0].strip() == modified:
            self.do.logMsg(self._uploadid, LOGGER1, prefix=STR_DIDL)

#3:     Get PidResourceMimetype
        mimetypelist = lxmlNode.xpath(
            '//didl:DIDL/didl:Item/didl:Component/didl:Resource/@mimeType',
            namespaces=self._nsMap)
        if len(mimetypelist) > 0:
            mimetype = mimetypelist[0].strip()
            if not comm.isMimeType(mimetype):
                self.do.logMsg(self._uploadid,
                               LOGGER2 + mimetype,
                               prefix=STR_DIDL)

#4:     Get PidResourceLocation:
        pidlocation = self._findAndBindFirst(
            lxmlNode,
            '%s',
            '//didl:DIDL/didl:Item/didl:Component/didl:Resource/@ref',
            '//didl:DIDL/didl:Item/didl:Component/didl:Resource/text()'
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/humanStartPage"]/didl:Component/didl:Resource/@ref',  #DIDL 3.0
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/humanStartPage"]/didl:Component/didl:Resource/@ref',  #DIDL 3.0, without @rdf:resource
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/humanStartPage"]/didl:Component/didl:Resource/@ref',  #fallback DIDL 2.3.1
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@rdf:resource="info:eu-repo/semantics/objectFile"]/didl:Component/didl:Resource/@ref',  #fallback DIDL 3.0
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/rdf:type/@resource="info:eu-repo/semantics/objectFile"]/didl:Component/didl:Resource/@ref',  #fallback DIDL 3.0, without @rdf:resource
            '//didl:Item/didl:Item[didl:Descriptor/didl:Statement/dip:ObjectType/text()="info:eu-repo/semantics/objectFile"]/didl:Component/didl:Resource/@ref'  #fallback DIDL 2.3.1
        ).strip()

        if pidlocation == '':
            raise ValidateException(
                formatExceptionLine(EXCEPTION4, prefix=STR_DIDL))
        if not comm.isURL(pidlocation):
            raise ValidateException(
                formatExceptionLine(EXCEPTION5 + pidlocation, prefix=STR_DIDL))

        return """<didl:Item>
        <didl:Descriptor><didl:Statement mimeType="application/xml"><dii:Identifier>%s</dii:Identifier></didl:Statement></didl:Descriptor>
        <didl:Descriptor><didl:Statement mimeType="application/xml"><dcterms:modified>%s</dcterms:modified></didl:Statement></didl:Descriptor>
        <didl:Component><didl:Resource mimeType="%s" ref="%s"/></didl:Component>""" % (
            escapeXml(pid), modified, escapeXml(mimetype),
            comm.urlQuote(pidlocation))