Esempio n. 1
0
 def testTagToCurie(self):
     self.assertEquals('dc:title', namespaces.tagToCurie('{http://purl.org/dc/elements/1.1/}title'))
     self.assertRaises(KeyError, namespaces.tagToCurie, '{unknown}tag')
     self.assertRaises(ValueError, namespaces.tagToCurie, 'no-uri-in-tag')
     self.assertEquals('srw:records', namespaces.tagToCurie(namespaces.expandNsTag('srw:records')))
     ns2 = namespaces.copyUpdate({'new':'uri:new'})
     self.assertEquals('new:tag', ns2.tagToCurie('{uri:new}tag'))
Esempio n. 2
0
 def testTagToCurie(self):
     self.assertEqual(
         'dc:title',
         namespaces.tagToCurie('{http://purl.org/dc/elements/1.1/}title'))
     self.assertRaises(KeyError, namespaces.tagToCurie, '{unknown}tag')
     self.assertRaises(ValueError, namespaces.tagToCurie, 'no-uri-in-tag')
     self.assertEqual(
         'srw:records',
         namespaces.tagToCurie(namespaces.expandNsTag('srw:records')))
     ns2 = namespaces.copyUpdate({'new': 'uri:new'})
     self.assertEqual('new:tag', ns2.tagToCurie('{uri:new}tag'))
Esempio n. 3
0
class FieldHier(Observable):
    ns = namespaces.copyUpdate(dict(x='http://meresco.org/namespace/example'))

    def add(self, lxmlNode, **kwargs):
        hierarchicalFields = self.ns.xpath(lxmlNode, '/x:document/x:fieldHier')
        for field in hierarchicalFields:
            values = self.ns.xpath(field, 'x:value/text()')
            self.do.addField(name=UNTOKENIZED_PREFIX + 'fieldHier',
                             value=values)
        return
        yield
Esempio n. 4
0
            raise SruException(UNSUPPORTED_OPERATION, operation)
        if 'stylesheet' in arguments:
            raise SruException(UNSUPPORTED_PARAMETER, 'stylesheet')


class SoapException(Exception):
    def __init__(self, faultCode, faultString):
        self._faultCode = faultCode
        self._faultString = faultString

    def asSoap(self):
        return """<SOAP:Fault><faultcode>%s</faultcode><faultstring>%s</faultstring></SOAP:Fault>""" % (
            xmlEscape(self._faultCode), xmlEscape(self._faultString))


namespaces = _namespaces.copyUpdate(
    dict(soap="http://schemas.xmlsoap.org/soap/envelope/", ))
xpath = namespaces.xpath
xpathFirst = namespaces.xpathFirst


def localname(tag):
    _, _, name = tag.rpartition('}')
    return str(name)


SOAP_HEADER = """<SOAP:Envelope xmlns:SOAP="%(soap)s"><SOAP:Body>""" % namespaces
SOAP_FOOTER = """</SOAP:Body></SOAP:Envelope>"""

SOAP = SOAP_HEADER + "%s" + SOAP_FOOTER
#
## end license ##

from seecr.test import IntegrationTestCase
from seecr.test.utils import getRequest, sleepWheel, htmlXPath
from meresco.xml import xpathFirst, xpath, namespaces
from lxml import etree

# TODO: create UnitTestCase for o.a. writeDelete / unDelete
# TODO: SRU-throttle mogelijkheden uitzoeken.

testNamespaces = namespaces.copyUpdate({'oaibrand':'http://www.openarchives.org/OAI/2.0/branding/',
    'prs'    : 'http://www.onderzoekinformatie.nl/nod/prs',
    'proj'   : 'http://www.onderzoekinformatie.nl/nod/act',
    'org'    : 'http://www.onderzoekinformatie.nl/nod/org',
    'long'   : 'http://www.knaw.nl/narcis/1.0/long/',
    'short'  : 'http://www.knaw.nl/narcis/1.0/short/',
    'mods'   : 'http://www.loc.gov/mods/v3',
    'didl'   : 'urn:mpeg:mpeg21:2002:02-DIDL-NS',
    'norm'   : 'http://dans.knaw.nl/narcis/normalized',
    })

class SruSlaveTest(IntegrationTestCase):

    def testSruQuery(self):
        response = self.doSruQuery(query='*', recordSchema='knaw_short')
        # print "doSruQuery(query='*'):", etree.tostring(response)
        self.assertEqual('13', xpathFirst(response, '//srw:numberOfRecords/text()'))
        self.assertEqual(set([
            'Example Program 1',
            'Example Program 2',
            'RAIN: Pan-European gridded data sets of extreme weather probability of occurrence under present and future climate',
Esempio n. 6
0
from meresco.dans.logger import Logger  # Normalisation Logger.
from meresco.seecr.oai import OaiAddDeleteRecordWithPrefixesAndSetSpecs, OaiAddRecord
from meresco.dans.xlsserver import XlsServer

NL_DIDL_NORMALISED_PREFIX = 'nl_didl_norm'
NL_DIDL_COMBINED_PREFIX = 'nl_didl_combined'

NAMESPACEMAP = namespaces.copyUpdate({
    'dip':
    'urn:mpeg:mpeg21:2005:01-DIP-NS',
    'gal':
    'info:eu-repo/grantAgreement',
    'hbo':
    'info:eu-repo/xmlns/hboMODSextension',
    'wmp':
    'http://www.surfgroepen.nl/werkgroepmetadataplus',
    'gmhnorm':
    'http://gh.kb-dans.nl/normalised/v0.9/',
    'gmhcombined':
    'http://gh.kb-dans.nl/combined/v0.9/',
    'meta':
    'http://meresco.org/namespace/harvester/meta',
    'oai':
    'http://www.openarchives.org/OAI/2.0/'
})

myPath = dirname(abspath(__file__))
# dynamicHtmlPath = join(myPath, 'controlpanel', 'html', 'dynamic')
# staticHtmlPath = join(myPath, 'controlpanel', 'html', 'static')


def createDownloadHelix(reactor, periodicDownload, oaiDownload,
Esempio n. 7
0
# You should have received a copy of the GNU General Public License
# along with "Digitale Collectie ErfGeo Enrichment"; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#
## end license ##

from functools import partial

from meresco.xml import namespaces as _namespaces
from meresco.xml.utils import createElement, createSubElement


namespaces = _namespaces.copyUpdate(dict(
    dcoa="http://data.digitalecollectie.nl/ns/oa#",
    geo="http://www.w3.org/2003/01/geo/wgs84_pos#",
    geos="http://www.opengis.net/ont/geosparql#",
    hg="http://schema.histograph.io/#",
    oa="http://www.w3.org/ns/oa#",
    vcard="http://www.w3.org/2006/vcard/ns#",
))

def uriFromTag(tag):
    return namespaces.expandNsUri(namespaces.prefixedTag(tag))
namespaces.uriFromTag = uriFromTag

xpath = namespaces.xpath
xpathFirst = namespaces.xpathFirst
expandNsUri = namespaces.expandNsUri
expandNsTag = namespaces.expandNsTag
curieToUri = namespaces.curieToUri
uriToCurie = namespaces.uriToCurie
curieToTag = namespaces.curieToTag
Esempio n. 8
0
from time import sleep
from urllib import urlencode

from meresco.core import Observable
from meresco.components import lxmltostring, RetrieveToGetDataAdapter
from meresco.components.http.utils import CRLF
from meresco.sequentialstore import MultiSequentialStorage
from meresco.xml import namespaces

from meresco.oai import OaiPmh, OaiJazz, OaiBranding, SuspendRegister
from weightless.core import be, compose


namespaces = namespaces.copyUpdate({
    'toolkit': 'http://oai.dlib.vt.edu/OAI/metadata/toolkit',
    'branding': 'http://www.openarchives.org/OAI/2.0/branding/',
    'identifier': 'http://www.openarchives.org/OAI/2.0/oai-identifier',
})
xpath = namespaces.xpath
xpathFirst = namespaces.xpathFirst

BATCHSIZE = 10
HOSTNAME = gethostname()


class _OaiPmhTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz'))
        self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store'))
        self.oaipmh = self.getOaiPmh()
Esempio n. 9
0
File: tirza.py Progetto: komax/tirza
#!/usr/bin/env python
from os import listdir
from os.path import join
from lxml.etree import parse
from collections import defaultdict
from lucene import initVM
initVM()

from meresco.xml import namespaces

namespaces = namespaces.copyUpdate(dict(oa='http://www.w3.org/ns/oa#'))
xpath = namespaces.xpath
xpathFirst = namespaces.xpathFirst


def open_writer(path):
    from java.io import File
    from org.apache.lucene.analysis.core import WhitespaceAnalyzer
    from org.apache.lucene.analysis.standard import StandardAnalyzer
    from org.apache.lucene.index import IndexWriter, IndexWriterConfig
    from org.apache.lucene.store import FSDirectory
    from org.apache.lucene.util import Version
    directory = FSDirectory.open(File(path))
    analyzer = StandardAnalyzer(Version.LUCENE_43)
    config = IndexWriterConfig(Version.LUCENE_43, analyzer)
    writer = IndexWriter(directory, config)
    return writer

def open_searcher(writer):
    from org.apache.lucene.search import IndexSearcher
    reader = writer.getReader()
Esempio n. 10
0
# (at your option) any later version.
#
# "Meresco Harvester" is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with "Meresco Harvester"; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#
## end license ##

from meresco.xml import namespaces as _namespaces

namespaces = _namespaces.copyUpdate(
    dict(
        dc="http://purl.org/dc/elements/1.1/",
        oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/",
        oai="http://www.openarchives.org/OAI/2.0/",
        srw="http://www.loc.gov/zing/srw/",
        diag="http://www.loc.gov/zing/srw/diagnostic/",
        ucp="info:lc/xmlns/update-v1",
        sahara="http://sahara.cq2.org/xsd/saharaget.xsd",
        status="http://sahara.cq2.org/xsd/saharaget.xsd",
    )
)

xpath = namespaces.xpath
xpathFirst = namespaces.xpathFirst
Esempio n. 11
0
## end license ##

from seecr.test import SeecrTestCase

from meresco.xml import namespaces
from meresco.xml.subtreestreebuilder import SubTreesTreeBuilder, SimpleSaxFileParser

from lxml.etree import parse, XMLParser, tostring

from math import ceil
from StringIO import StringIO


namespaces = namespaces.copyUpdate({
    'def_': 'u:ri/default#',
    'newdef_': 'u:ri/newdefault#',
    'other_': 'u:ri/other#',
    'pre_': 'u:ri/prefixed#',
})
xpath = namespaces.xpath
xpathFirst = namespaces.xpathFirst


# lxml / LibXML implementation detail; data feed()'ed can be buffered or otherwise unprocessed, until close() is called on the feedparsing interface.
# This will result in 0...n (start|comment|data|pi|end)-calls and
# then a close-call on the TreeBuilder-interface.
#
# Therefor, getSubtrees() *must* be called after a close() on the XMLParser.

class SubTreesTreeBuilderTest(SeecrTestCase):
    def testParseAndProcessSimpleFile(self):
        builder = SubTreesTreeBuilder(elementPath=['records', 'record'])
Esempio n. 12
0
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# "Metastreams Harvester" is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with "Metastreams Harvester"; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#
## end license ##

from meresco.xml import namespaces as _namespaces

namespaces = _namespaces.copyUpdate(
    dict(
        dc="http://purl.org/dc/elements/1.1/",
        oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/",
        oai="http://www.openarchives.org/OAI/2.0/",
        srw='http://www.loc.gov/zing/srw/',
        diag='http://www.loc.gov/zing/srw/diagnostic/',
        ucp="info:lc/xmlns/update-v1",
        sahara="http://sahara.cq2.org/xsd/saharaget.xsd",
        status="http://sahara.cq2.org/xsd/saharaget.xsd",
    ))

xpath = namespaces.xpath
xpathFirst = namespaces.xpathFirst
Esempio n. 13
0
    def _srwSpecificValidation(self, operation, arguments):
        if operation != 'searchRetrieve':
            raise SruException(UNSUPPORTED_OPERATION, operation)
        if 'stylesheet' in arguments:
            raise SruException(UNSUPPORTED_PARAMETER, 'stylesheet')

class SoapException(Exception):
    def __init__(self, faultCode, faultString):
        self._faultCode = faultCode
        self._faultString = faultString

    def asSoap(self):
        return """<SOAP:Fault><faultcode>%s</faultcode><faultstring>%s</faultstring></SOAP:Fault>""" % (xmlEscape(self._faultCode), xmlEscape(self._faultString))

namespaces = _namespaces.copyUpdate(dict(
    soap="http://schemas.xmlsoap.org/soap/envelope/",
))
xpath = namespaces.xpath
xpathFirst = namespaces.xpathFirst

def localname(tag):
    _, _, name = tag.rpartition('}')
    return str(name)


SOAP_HEADER = """<SOAP:Envelope xmlns:SOAP="%(soap)s"><SOAP:Body>""" % namespaces
SOAP_FOOTER = """</SOAP:Body></SOAP:Envelope>"""

SOAP = SOAP_HEADER + "%s" + SOAP_FOOTER

Esempio n. 14
0
from meresco.servers.gateway.gatewayserver import NORMALISED_DOC_NAME

from meresco.components.http.utils import ContentTypePlainText, okPlainText, ContentTypeJson

# from meresco.dans.loggerrss import LoggerRSS
# from meresco.dans.logger import Logger # Normalisation Logger.

# NL_DIDL_NORMALISED_PREFIX = 'nl_didl_norm'
# NL_DIDL_COMBINED_PREFIX = 'nl_didl_combined'

NAMESPACEMAP = namespaces.copyUpdate({
    'dip':
    'urn:mpeg:mpeg21:2005:01-DIP-NS',
    'gal':
    "info:eu-repo/grantAgreement",
    'hbo':
    "info:eu-repo/xmlns/hboMODSextension",
    'wmp':
    "http://www.surfgroepen.nl/werkgroepmetadataplus",
    'norm':
    'http://dans.knaw.nl/narcis/normalized',
})


def createDownloadHelix(reactor, periodicDownload, oaiDownload,
                        dbStorageComponent):
    return \
    (periodicDownload, # Scheduled connection to a remote (response / request)...
        (XmlParseLxml(fromKwarg="data", toKwarg="lxmlNode", parseOptions=dict(huge_tree=True, remove_blank_text=True)), # Convert from plain text to lxml-object.
            (oaiDownload, # Implementation/Protocol of a PeriodicDownload...
                (UpdateAdapterFromOaiDownloadProcessor(), # Maakt van een SRU update/delete bericht (lxmlNode) een relevante message: 'delete' of 'add' message.
                    # (FilterMessages(['delete']), # Filtert delete messages
from copy import deepcopy
from weightless.core import NoneOfTheObserversRespond, DeclineMessage
from meresco.core import Observable
from meresco.components import lxmltostring, Converter
from meresco.dans.nameidentifier import Orcid, Dai, Isni, Rid, NameIdentifierFactory
from meresco.xml import namespaces


namespacesmap = namespaces.copyUpdate({ #  See: https://github.com/seecr/meresco-xml/blob/master/meresco/xml/namespaces.py
    
    'dip'    : 'urn:mpeg:mpeg21:2005:01-DIP-NS',
    'dii'    : 'urn:mpeg:mpeg21:2002:01-DII-NS',
    'dai'    : 'info:eu-repo/dai',
    'gal'    : 'info:eu-repo/grantAgreement',
    'wmp'    : 'http://www.surfgroepen.nl/werkgroepmetadataplus',
    'prs'    : 'http://www.onderzoekinformatie.nl/nod/prs',
    'proj'   : 'http://www.onderzoekinformatie.nl/nod/act',
    'org'    : 'http://www.onderzoekinformatie.nl/nod/org',
    'long'   : 'http://www.knaw.nl/narcis/1.0/long/',
    'short'  : 'http://www.knaw.nl/narcis/1.0/short/',
    'mods'   : 'http://www.loc.gov/mods/v3',
    'didl'   : 'urn:mpeg:mpeg21:2002:02-DIDL-NS',
    'norm'   : 'http://dans.knaw.nl/narcis/normalized',
})


class ShortConverter(Converter):


    def __init__(self, fromKwarg, toKwarg=None, name=None, truncate_chars=300):
        Converter.__init__(self, name=name, fromKwarg=fromKwarg, toKwarg=toKwarg)
        self._truncate_chars = truncate_chars
    Format.DATACITE: xmlDatacite
}
methods = {Item.GENRE: '_getGenre', Item.ACCESS_RIGHTS: '_getAccessRights'}

testEmpty = etree.fromstring('<test/>')
long = NormaliseOaiRecord(UiaConverter)

namespacesmap = namespaces.copyUpdate(
    {  # See: https://github.com/seecr/meresco-xml/blob/master/meresco/xml/namespaces.py
        'dip': 'urn:mpeg:mpeg21:2005:01-DIP-NS',
        'dii': 'urn:mpeg:mpeg21:2002:01-DII-NS',
        'xlink': 'http://www.w3.org/1999/xlink',
        'dai': 'info:eu-repo/dai',
        'gal': 'info:eu-repo/grantAgreement',
        'wmp': 'http://www.surfgroepen.nl/werkgroepmetadataplus',
        'prs': 'http://www.onderzoekinformatie.nl/nod/prs',
        'proj': 'http://www.onderzoekinformatie.nl/nod/act',
        'org': 'http://www.onderzoekinformatie.nl/nod/org',
        'long': 'http://www.knaw.nl/narcis/1.0/long/',
        'short': 'http://www.knaw.nl/narcis/1.0/short/',
        'mods': 'http://www.loc.gov/mods/v3',
        'didl': 'urn:mpeg:mpeg21:2002:02-DIDL-NS',
        'norm': 'http://dans.knaw.nl/narcis/normalized',
        'datacite': 'http://datacite.org/schema/kernel-4'
    })


class LongConverterTest(unittest.TestCase):
    def _reset(self, xmlBase):
        self.xml = copy.deepcopy(xmlBase)
        self.test = copy.deepcopy(testEmpty)
Esempio n. 17
0
from meresco.xml import xpathFirst, xpath, namespaces
from lxml import etree

# TODO: create UnitTestCase for o.a. writeDelete / unDelete
# TODO: SRU-throttle mogelijkheden uitzoeken.

testNamespaces = namespaces.copyUpdate({
    'oaibrand':
    'http://www.openarchives.org/OAI/2.0/branding/',
    'prs':
    'http://www.onderzoekinformatie.nl/nod/prs',
    'proj':
    'http://www.onderzoekinformatie.nl/nod/act',
    'org':
    'http://www.onderzoekinformatie.nl/nod/org',
    'long':
    'http://www.knaw.nl/narcis/1.0/long/',
    'short':
    'http://www.knaw.nl/narcis/1.0/short/',
    'mods':
    'http://www.loc.gov/mods/v3',
    'didl':
    'urn:mpeg:mpeg21:2002:02-DIDL-NS',
    'norm':
    'http://dans.knaw.nl/narcis/normalized',
})


class SruSlaveTest(IntegrationTestCase):
    def testSruQuery(self):
        response = self.doSruQuery(query='*',
                                   recordSchema='knaw_short',
Esempio n. 18
0
from meresco.core import Observable
from meresco.components import lxmltostring, Converter
from meresco.dans.metadataformats import MetadataFormat
from meresco.xml import namespaces

import time

HVSTR_NS = '{http://meresco.org/namespace/harvester/meta}'
DOCUMENT_NS = '{http://meresco.org/namespace/harvester/document}'


namespaceMap = namespaces.copyUpdate({
    'prs'   : 'http://www.onderzoekinformatie.nl/nod/prs',
    'ond'   : 'http://www.onderzoekinformatie.nl/nod/act',
    'org'   : 'http://www.onderzoekinformatie.nl/nod/org',
    'long'  : 'http://www.knaw.nl/narcis/1.0/long/',
    'short' : 'http://www.knaw.nl/narcis/1.0/short/',
    'mods'  : 'http://www.loc.gov/mods/v3',
    'didl'  : 'urn:mpeg:mpeg21:2002:02-DIDL-NS',
    'norm'  : 'http://dans.knaw.nl/narcis/normalized',
})


MODS_VERSION = '3.6'

MODS_NAMESPACE = "http://www.loc.gov/mods/v3"
MODS = "{%s}" % MODS_NAMESPACE
NSMAP = {
None : MODS_NAMESPACE,
'xlink': 'http://www.w3.org/1999/xlink',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
}
Esempio n. 19
0
from time import sleep
from urllib.parse import urlencode

from meresco.core import Observable
from meresco.components import lxmltostring, RetrieveToGetDataAdapter
from meresco.components.http.utils import parseResponse, CRLF
from meresco.sequentialstore import MultiSequentialStorage
from meresco.xml import namespaces

from meresco.oai import OaiPmh, OaiJazz, OaiBranding, SuspendRegister
from weightless.core import be, compose, asBytes


namespaces = namespaces.copyUpdate({
    'toolkit': 'http://oai.dlib.vt.edu/OAI/metadata/toolkit',
    'branding': 'http://www.openarchives.org/OAI/2.0/branding/',
    'identifier': 'http://www.openarchives.org/OAI/2.0/oai-identifier',
})
xpath = namespaces.xpath
xpathFirst = namespaces.xpathFirst

BATCHSIZE = 10
HOSTNAME = gethostname()


class _OaiPmhTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz'))
        self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store'))
        self.oaipmh = self.getOaiPmh()