def __init__(self, repository, batchSize=DEFAULT_BATCH_SIZE, supportXWait=False, dataBatchSize=DEFAULT_DATA_BATCH_SIZE): self._supportedVerbs = ['ListIdentifiers', 'ListRecords'] Observable.__init__(self) self._batchSize = batchSize self._dataBatchSize = dataBatchSize self._supportXWait = supportXWait self._repository = repository
def dna(reactor, host, portNumber, databasePath): #Choose ONE storage strategy: #strategie = HashDistributeStrategy() #irreversible? #strategie = DefaultStrategy() strategie = Md5HashDistributeStrategy() ## Define which parts should be removed from storage on an SRU delete update. storageComponent = StorageComponent(join(databasePath, 'storage'), partsRemovedOnDelete=[NL_DIDL_NORMALISED_PREFIX, NL_DIDL_COMBINED_PREFIX, 'metadata'], strategy=strategie) loggerComponent = Logger(join(databasePath, 'logger')) oaiJazz = OaiJazz(join(databasePath, 'oai', 'data')) return \ (Observable(), (ObservableHttpServer(reactor, portNumber), (PathFilter("/update"), (SRURecordUpdate(), (Amara2Lxml(fromKwarg='amaraNode', toKwarg='lxmlNode'), createUploadHelix(storageComponent, oaiJazz, loggerComponent) ) ) ), (PathFilter('/oai'), #XWAIT: (OaiPmh(repositoryName='repositoryName', adminEmail='adminEmail', batchSize=2, supportXWait=True) (OaiPmh(repositoryName='Gemeenschappelijke Harvester DANS-KB', adminEmail='*****@*****.**', batchSize=100, fixIdentifyBaseURL=True), ## batchSize = number of records before issueing a resumptionToken... (oaiJazz,), (storageComponent,), (OaiProvenance( ## NOTE: If one of the following fields lacks, provenance will NOT be written. nsMap=namespacesMap, baseURL = ('meta', '//*[local-name() = "baseurl"]/text()'), harvestDate = ('meta', '//*[local-name() = "harvestdate"]/text()'), #See: http://www.openarchives.org/OAI/2.0/guidelines-provenance.htm metadataNamespace = (NL_DIDL_NORMALISED_PREFIX, '//mods:mods/namespace::node()[name()="" or name()="mods" or contains(.,"mods")]'), # Some 'magic' here: xpath() function may return different types. # (Namespace) nodes return tuple's instead of an Element Object. (string) Functions return strings, etc... # Since meresco.oai.OaiProvenance handles all return objects from xpath() the same, the results were unpredictable. # This is why we have overriden the XmlCompose. # See: http://lxml.de/xpathxslt.html#xpath-return-values identifier = ('header', '/oai:header/oai:identifier/text()'), datestamp = ('header', '/oai:header/oai:datestamp/text()') ), (storageComponent,) ) ) ), (PathFilter('/rss'), (LoggerRSS( title = 'Gemeenschappelijke Harvester DANS-KB', description = 'Harvester normalisation log for: ', link = 'http://rss.gharvester.dans.knaw.nl/rss', maximumRecords = 30), (loggerComponent, (storageComponent,) ) ) ) ) )
def __init__(self, repository): Observable.__init__(self) self._repository = repository