def testGetState(self): state = self.pc.getState() self.assertEquals('obs_name', state.name) self.assertEquals(False, state.paused) self.assertEquals(Schedule(period=3600), state.schedule) self.assertEquals({'paused': False, 'name': 'obs_name', 'schedule': Schedule(period=3600)}, state.asDict()) self.pc.observable_setName('dashy') self.assertEquals('dashy', state.name) self.assertEquals(None, state.errorState) self.pc.setSchedule(schedule=Schedule(period=5)) self.assertEquals(Schedule(period=5), state.schedule) self.pc.pause() self.assertEquals(True, state.paused) self.pc.resume() addTimer = self.reactor.calledMethods[-1] self.reactor.calledMethods.reset() self.assertEquals(False, state.paused) addTimer.args[1]() self.pc.pause() self.assertEquals(False, state.paused) addProcess = self.reactor.calledMethods[-1] addProcess.args[0]() self.assertEquals(True, state.paused)
def testAutoStartOrScheduleRequired(self): reactor = CallTrace('reactor') self.assertRaises(ValueError, lambda: PeriodicCall(reactor=reactor)) self.assertRaises( ValueError, lambda: PeriodicCall(reactor=reactor, autoStart=True)) try: PeriodicCall(reactor=reactor, autoStart=False) except: self.fail('Unexpected exception') try: PeriodicCall(reactor=reactor, schedule=Schedule(period=1), autoStart=False) except: self.fail('Unexpected exception') try: PeriodicCall(reactor=reactor, schedule=Schedule(period=1), autoStart=True) except: self.fail('Unexpected exception')
def setUp(self): SeecrTestCase.setUp(self) self.newDNA(schedule=Schedule(period=3600), errorSchedule=Schedule(period=15), prio=9, name='obs_name') list(compose(self.dna.once.observer_init()))
def testSecondsSinceEpoch(self): s = Schedule( secondsSinceEpoch=123 ) # test with ints, but works with floats as well (much harder to test due to binary representation) self.assertEquals(123, s.secondsSinceEpoch) s._time = lambda: 76 self.assertEquals(47, s.secondsFromNow())
def testUsePeriod(self): s = Schedule(period=42) self.assertEquals(42, s.secondsFromNow()) self.assertEquals(42, s.period) s = Schedule(period=0) self.assertEquals(0, s.secondsFromNow()) self.assertEquals(0, s.period)
def testRepr(self): self.assertEquals('Schedule(period=0)', repr(Schedule(period=0))) self.assertEquals('Schedule(period=1)', repr(Schedule(period=1))) self.assertEquals("Schedule(timeOfDay='21:00')", repr(Schedule(timeOfDay='21:00'))) self.assertEquals("Schedule(dayOfWeek=1, timeOfDay='21:00')", repr(Schedule(timeOfDay='21:00', dayOfWeek=1))) self.assertEquals("Schedule(secondsSinceEpoch=42)", repr(Schedule(secondsSinceEpoch=42)))
def testFatalErrorReRaised(self): for exception in [KeyboardInterrupt, SystemExit, AssertionError]: self.newDNA(schedule=Schedule(period=987)) def raiser(): raise exception('msg') yield self.observer.methods['handle'] = raiser list(compose(self.dna.once.observer_init())) addTimer, = self.reactor.calledMethods self.reactor.calledMethods.reset() addTimer.args[1]() addProcess, = self.reactor.calledMethods self.reactor.calledMethods.reset() try: addProcess.args[0]() except exception: pass else: self.fail() self.assertEqual(['handle'], self.observer.calledMethodNames()) self.assertEqual(['removeProcess', 'addTimer'], self.reactor.calledMethodNames())
def testIncrementalHarvestScheduleSetToNone(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", incrementalHarvestSchedule=Schedule(period=0), workingDirectory=self.tempdir, xWait=False, err=StringIO()) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) self.assertNotEqual(None, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual( ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'], observer.calledMethodNames()) observer.calledMethods.reset() oaiDownloadProcessor.setFrom(from_=None) oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=None) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual( ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'], observer.calledMethodNames())
def testIncrementalHarvestScheduleNoneOverruledWithSetIncrementalHarvestSchedule( self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), incrementalHarvestSchedule=None) oaiDownloadProcessor._time = lambda: 10 consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime) oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=Schedule( period=3)) self.assertEqual(None, oaiDownloadProcessor.buildRequest()) self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime) oaiDownloadProcessor.scheduleNextRequest() self.assertNotEqual(None, oaiDownloadProcessor.buildRequest()) self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual(13, oaiDownloadProcessor._earliestNextRequestTime)
def testSetScheduleAddsANewTimer(self): self.reactor.calledMethods.reset() self.pc.setSchedule(schedule=Schedule(period=123)) self.assertEquals(['removeTimer', 'addTimer'], self.reactor.calledMethodNames()) removeTimer, addTimer = self.reactor.calledMethods self.assertEquals('TOKEN', removeTimer.args[0]) self.assertEquals(123, addTimer.args[0])
def testIncrementalHarvestWithFromAfterSomePeriod(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), incrementalHarvestSchedule=Schedule(period=10)) oaiDownloadProcessor._time = lambda: 1.0 oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE)))) self.assertEqual(None, oaiDownloadProcessor._resumptionToken) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) self.assertEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor._time = lambda: 6.0 self.assertEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor._time = lambda: 10.0 self.assertEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor._time = lambda: 11.1 request = oaiDownloadProcessor.buildRequest() self.assertTrue( request.startswith( 'GET /oai?verb=ListRecords&from=2002-06-01T19%3A20%3A30Z&metadataPrefix=oai_dc' ), request)
def testInitialSchedule(self): self.newDNA(initialSchedule=Schedule(period=0), schedule=Schedule(period=12)) list(compose(self.dna.once.observer_init())) self.assertEquals(['addTimer'], self.reactor.calledMethodNames()) addTimer, = self.reactor.calledMethods self.reactor.calledMethods.reset() self.assertEquals(0, addTimer.args[0]) addTimer.args[1]() addProcess, = self.reactor.calledMethods self.reactor.calledMethods.reset() addProcess.args[0]() self.assertEquals(['removeProcess', 'addTimer'], self.reactor.calledMethodNames()) removeProcess, addTimer = self.reactor.calledMethods self.assertEquals(12, addTimer.args[0])
def testDayOfWeekTimeOfDay(self): s = Schedule(dayOfWeek=5, timeOfDay='20:00') self.assertEquals(5, s.dayOfWeek) s._utcnow = lambda: datetime.strptime("15-11-2012 13:30", "%d-%m-%Y %H:%M") # This is a Thursday self.assertEquals(30.5 * 60 * 60, s.secondsFromNow()) s._utcnow = lambda: datetime.strptime("14-11-2012 21:00", "%d-%m-%Y %H:%M") self.assertEquals(47 * 60 * 60, s.secondsFromNow()) s._utcnow = lambda: datetime.strptime("17-11-2012 21:00", "%d-%m-%Y %H:%M") self.assertEquals((5 * 24 + 23) * 60 * 60, s.secondsFromNow()) s._utcnow = lambda: datetime.strptime("16-11-2012 20:00", "%d-%m-%Y %H:%M") self.assertEquals(7 * 24 * 60 * 60, s.secondsFromNow())
def testDayOfWeekTimeOfDay(self): s = Schedule(dayOfWeek=5, timeOfDay='20:00') self.assertEquals(5, s.dayOfWeek) s._utcnow = lambda: datetime.strptime( "15-11-2012 13:30", "%d-%m-%Y %H:%M") # This is a Thursday self.assertEquals(30.5 * 60 * 60, s.secondsFromNow()) s._utcnow = lambda: datetime.strptime("14-11-2012 21:00", "%d-%m-%Y %H:%M") self.assertEquals(47 * 60 * 60, s.secondsFromNow()) s._utcnow = lambda: datetime.strptime("17-11-2012 21:00", "%d-%m-%Y %H:%M") self.assertEquals((5 * 24 + 23) * 60 * 60, s.secondsFromNow()) s._utcnow = lambda: datetime.strptime("16-11-2012 20:00", "%d-%m-%Y %H:%M") self.assertEquals(7 * 24 * 60 * 60, s.secondsFromNow())
def testTimeOfDay(self): s = Schedule(timeOfDay='20:00') self.assertEquals('20:00', s.timeOfDay) s._utcnow = lambda: datetime.strptime("13:30", "%H:%M") self.assertEquals(6.5 * 60 * 60, s.secondsFromNow()) s._utcnow = lambda: datetime.strptime("21:15", "%H:%M") self.assertEquals(22.75 * 60 * 60, s.secondsFromNow()) s._utcnow = lambda: datetime.strptime("20:00", "%H:%M") self.assertEquals(24 * 60 * 60, s.secondsFromNow())
def testMessageConfigurable(self): self.newDNA(message="aMessage", schedule=Schedule(period=3600), errorSchedule=Schedule(period=15), prio=9, name='obs_name') list(compose(self.dna.once.observer_init())) self.assertEqual(['addTimer'], self.reactor.calledMethodNames()) addTimer, = self.reactor.calledMethods callback = addTimer.args[1] self.reactor.calledMethods.reset() callback() self.assertEqual(['addProcess'], self.reactor.calledMethodNames()) addProcess, = self.reactor.calledMethods callback = addProcess.args[0] self.reactor.calledMethods.reset() callback() self.assertEqual(['aMessage'], self.observer.calledMethodNames())
def jvmMonitorTree(reactor, updatableGustosClient): gustosInterval = 1 if __builtins__.get('__test__', False) else 60 return be(( PeriodicCall(reactor=reactor, schedule=Schedule(period=gustosInterval)), ( JvmMonitor(), (updatableGustosClient, ), ), ))
def testSetIncrementalHarvestScheduleNotAllowedInCaseOfRestartAfterFinish( self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), restartAfterFinish=True) self.assertRaises( ValueError, lambda: oaiDownloadProcessor. setIncrementalHarvestSchedule(schedule=Schedule(period=3)))
def testPausePausesOnStart(self): # autoStart reactor = CallTrace('reactor') pc = PeriodicCall(reactor=reactor, autoStart=False) pc.observer_init() self.assertEquals([], reactor.calledMethodNames()) # explicit .pause() pc = PeriodicCall(reactor=reactor, schedule=Schedule(period=1), autoStart=True) pc.pause() pc.observer_init() self.assertEquals([], reactor.calledMethodNames())
def testSetSchedule(self): addTimer, = self.reactor.calledMethods self.reactor.calledMethods.reset() addTimer.args[1]() self.pc.setSchedule(schedule=Schedule(period=1)) addProcess, = self.reactor.calledMethods self.reactor.calledMethods.reset() addProcess.args[0]() self.assertEquals(['removeProcess', 'addTimer'], self.reactor.calledMethodNames()) removeProcess, addTimer = self.reactor.calledMethods self.assertEquals(1, addTimer.args[0])
def testScheduleNextRequest(self): oaiDownloadProcessor = OaiDownloadProcessor( path='/p', metadataPrefix='p', workingDirectory=self.tempdir) oaiDownloadProcessor._time = lambda: 17 consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertTrue(oaiDownloadProcessor._earliestNextRequestTime > 17) oaiDownloadProcessor.scheduleNextRequest() self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual(True, oaiDownloadProcessor._timeForNextRequest()) self.assertNotEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor.scheduleNextRequest(Schedule(period=0)) self.assertEqual(17, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual(True, oaiDownloadProcessor._timeForNextRequest()) self.assertNotEqual(None, oaiDownloadProcessor.buildRequest()) oaiDownloadProcessor.scheduleNextRequest(Schedule(period=120)) self.assertEqual(137, oaiDownloadProcessor._earliestNextRequestTime) self.assertEqual(False, oaiDownloadProcessor._timeForNextRequest()) self.assertEqual(None, oaiDownloadProcessor.buildRequest())
def __init__(self, path, metadataPrefix, workingDirectory, set=None, xWait=True, partition=None, err=None, verb=None, autoCommit=True, incrementalHarvestSchedule=_UNSPECIFIED, restartAfterFinish=False, userAgentAddition=None, name=None): Observable.__init__(self, name=name) self._userAgent = _USER_AGENT + ('' if userAgentAddition is None else ' (%s)' % userAgentAddition) self._path = path self._metadataPrefix = metadataPrefix isdir(workingDirectory) or makedirs(workingDirectory) self._stateFilePath = join(workingDirectory, "harvester.state") self._set = set self._xWait = xWait self._partition = partition self._err = err or stderr self._verb = verb or 'ListRecords' self._autoCommit = autoCommit if restartAfterFinish and incrementalHarvestSchedule and incrementalHarvestSchedule != _UNSPECIFIED: raise ValueError( "In case restartAfterFinish==True, incrementalHarvestSchedule must not be set" ) self._restartAfterFinish = restartAfterFinish if incrementalHarvestSchedule == _UNSPECIFIED and not restartAfterFinish: incrementalHarvestSchedule = Schedule(timeOfDay='00:00') self._incrementalHarvestSchedule = incrementalHarvestSchedule self._resumptionToken = None self._from = None self._errorState = None self._earliestNextRequestTime = 0 self._readState() self._identifierFilePath = join(workingDirectory, "harvester.identifier") if isfile(self._identifierFilePath): self._identifier = _open_read(self._identifierFilePath).strip() else: self._identifier = str(uuid4()) with open(self._identifierFilePath, 'w') as f: f.write(self._identifier)
def testSetIncrementalHarvestSchedule(self): oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", workingDirectory=self.tempdir, xWait=False, err=StringIO(), incrementalHarvestSchedule=None) oaiDownloadProcessor._time = lambda: 10 oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=Schedule( period=3)) self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual(13, oaiDownloadProcessor._earliestNextRequestTime)
def main(reactor, port, statePath, gatewayPort, dbConfig, quickCommit=False, **ignored): #TODO: Implement logging. # normLogger = Logger(join(statePath, '..', 'gateway', 'normlogger')) dbStorageComponent = ResolverStorageComponent(dbConfig) verbose = True periodicGateWayDownload = PeriodicDownload( reactor, host='localhost', port=gatewayPort, schedule=Schedule( period=.1 if quickCommit else 10 ), # WST: Interval in seconds before sending a new request to the GATEWAY in case of an error while processing batch records.(default=1). IntegrationTests need <=1 second! Otherwise tests will fail! name='resolver', autoStart=True) oaiDownload = OaiDownloadProcessor(path='/oaix', metadataPrefix=NORMALISED_DOC_NAME, workingDirectory=join( statePath, 'harvesterstate', 'gateway'), userAgentAddition='ResolverServer', xWait=True, name='resolver', autoCommit=False) return \ (Observable(), createDownloadHelix(reactor, periodicGateWayDownload, oaiDownload, dbStorageComponent), (ObservableHttpServer(reactor, port, compressResponse=True), (BasicHttpHandler(), (PathFilter("/"), (StringServer("Resolver Server", ContentTypePlainText), ) ) ) ) )
def testPausePausesWhenRunning(self): self.newDNA(schedule=Schedule(period=1), autoStart=True) list(compose(self.dna.once.observer_init())) self.assertEquals(['addTimer'], self.reactor.calledMethodNames()) addTimer, = self.reactor.calledMethods # pauses after completing current task self.pc.pause() self.reactor.calledMethods.reset() addTimer.args[1]() self.assertEquals(['addProcess'], self.reactor.calledMethodNames()) addProcess, = self.reactor.calledMethods self.reactor.calledMethods.reset() addProcess.args[0]() self.assertEquals(['handle'], self.observer.calledMethodNames()) self.assertEquals(['removeProcess'], self.reactor.calledMethodNames())
def testIncrementalHarvestReScheduleIfNoRecordsMatch(self): observer = CallTrace(emptyGeneratorMethods=['add']) oaiDownloadProcessor = OaiDownloadProcessor( path="/oai", metadataPrefix="oai_dc", incrementalHarvestSchedule=Schedule(period=0), workingDirectory=self.tempdir, xWait=False, err=StringIO()) oaiDownloadProcessor.addObserver(observer) consume( oaiDownloadProcessor.handle( parse(StringIO(LISTRECORDS_RESPONSE % '')))) self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from) consume( oaiDownloadProcessor.handle( parse(StringIO(NO_RECORDS_MATCH_RESPONSE)))) self.assertEqual(None, oaiDownloadProcessor._errorState) self.assertEqual('2012-06-01T19:20:30Z', oaiDownloadProcessor._from)
def testDoNotResumeNotPaused(self): self.newDNA(schedule=Schedule(period=1), autoStart=True) self.pc.resume() self.assertEquals([], self.reactor.calledMethodNames()) list(compose(self.dna.once.observer_init())) self.assertEquals(['addTimer'], self.reactor.calledMethodNames()) addTimer, = self.reactor.calledMethods self.reactor.calledMethods.reset() self.pc.resume() self.assertEquals([], self.reactor.calledMethodNames()) addTimer.args[1]() self.assertEquals(['addProcess'], self.reactor.calledMethodNames()) self.reactor.calledMethods.reset() self.pc.resume() self.assertEquals([], self.reactor.calledMethodNames())
def testResumeStartsWhenPaused(self): self.newDNA(schedule=Schedule(period=2), autoStart=False) list(compose(self.dna.once.observer_init())) self.assertEquals([], self.reactor.calledMethodNames()) with stderr_replaced() as err: self.pc.resume() self.assertEquals('%s: resumed\n' % repr(self.pc), err.getvalue()) self.assertEquals(['addTimer'], self.reactor.calledMethodNames()) addTimer, = self.reactor.calledMethods self.assertEquals(self.pc._periodicCall, addTimer.args[1]) # finish one task (1/2) self.reactor.calledMethods.reset() addTimer.args[1]() addProcess, = self.reactor.calledMethods # finish one task (2/2) self.reactor.calledMethods.reset() addProcess.args[0]() self.assertEquals(['removeProcess', 'addTimer'], self.reactor.calledMethodNames())
def main(reactor, port, statePath, gatewayPort, quickCommit=False, **ignored): strategie = Md5HashDistributeStrategy() storage = StorageComponent(join(statePath, 'store'), strategy=strategie, partsRemovedOnDelete=[ NL_DIDL_NORMALISED_PREFIX, NL_DIDL_COMBINED_PREFIX, 'metadata' ]) oaiJazz = OaiJazz(join(statePath, 'oai')) oaiJazz.updateMetadataFormat("metadata", "http://didl.loc.nl/didl.xsd", NAMESPACEMAP.didl) oaiJazz.updateMetadataFormat(NL_DIDL_COMBINED_PREFIX, "", NAMESPACEMAP.gmhcombined) oaiJazz.updateMetadataFormat(NL_DIDL_NORMALISED_PREFIX, "", NAMESPACEMAP.gmhnorm) normLogger = Logger(join(statePath, '..', 'gateway', 'normlogger')) periodicGateWayDownload = PeriodicDownload( reactor, host='localhost', port=gatewayPort, schedule=Schedule( period=.1 if quickCommit else 10 ), # WST: Interval in seconds before sending a new request to the GATEWAY in case of an error while processing batch records.(default=1). IntegrationTests need <=1 second! Otherwise tests will fail! name='api', autoStart=True) oaiDownload = OaiDownloadProcessor(path='/oaix', metadataPrefix=NORMALISED_DOC_NAME, workingDirectory=join( statePath, 'harvesterstate', 'gateway'), userAgentAddition='ApiServer', xWait=True, name='api', autoCommit=False) return \ (Observable(), createDownloadHelix(reactor, periodicGateWayDownload, oaiDownload, storage, oaiJazz), (ObservableHttpServer(reactor, port, compressResponse=True), (BasicHttpHandler(), (PathFilter('/oai'), (OaiPmh( repositoryName="Gemeenschappelijke Metadata Harvester DANS-KB", adminEmail="*****@*****.**", externalUrl="http://oai.gharvester.dans.knaw.nl", batchSize=200, supportXWait=False, # preciseDatestamp=False, # deleteInSets=False ), (oaiJazz, ), (RetrieveToGetDataAdapter(), (storage,), ), (OaiBranding( url="https://www.narcis.nl/images/logos/logo-knaw-house.gif", #TODO: Link to a joint-GMH icon... link="https://harvester.dans.knaw.nl", title="Gemeenschappelijke Metadata Harvester (GMH) van DANS en de KB"), ), (OaiProvenance( nsMap=NAMESPACEMAP, baseURL=('meta', '//meta:repository/meta:baseurl/text()'), harvestDate=('meta', '//meta:harvestdate/text()'), metadataNamespace=('meta', '//meta:metadataPrefix/text()'), #TODO: Kan hardcoded in harvester mapper gezet eventueel: <metadataNamespace>urn:mpeg:mpeg21:2002:01-DII-NS</metadataNamespace>?? (storage,) #metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'), identifier=('header','//oai:identifier/text()'), datestamp=('header', '//oai:datestamp/text()') ), (RetrieveToGetDataAdapter(), (storage,), ) ) ) ), (PathFilter('/rss'), (LoggerRSS( title = 'GMH DANS-KB Normalisationlog Syndication', description = 'Harvester normalisation log for: ', link = 'http://rss.gharvester.dans.knaw.nl/rss', maximumRecords = 30), (normLogger, (storage,) ) ) ), (PathFilter('/xls'), # (LogComponent("XLS-Request:"),), (XlsServer(),) ) ) ) )
def testSecondsSinceEpoch(self): s = Schedule(secondsSinceEpoch=123) # test with ints, but works with floats as well (much harder to test due to binary representation) self.assertEquals(123, s.secondsSinceEpoch) s._time = lambda: 76 self.assertEquals(47, s.secondsFromNow())
def main(reactor, port, statePath, lucenePort, gatewayPort, quickCommit=False, **ignored): ######## START Lucene Integration ############################################################### defaultLuceneSettings = LuceneSettings( commitTimeout=30, readonly=True, ) http11Request = be(( HttpRequest1_1(), (SocketPool(reactor=reactor, unusedTimeout=5, limits=dict(totalSize=100, destinationSize=10)), ), )) luceneIndex = luceneAndReaderConfig( defaultLuceneSettings.clone(readonly=True), http11Request, lucenePort) luceneRoHelix = be( (AdapterToLuceneQuery(defaultCore=DEFAULT_CORE, coreConverters={ DEFAULT_CORE: QueryExpressionToLuceneQueryDict( UNQUALIFIED_TERM_FIELDS, luceneSettings=luceneIndex.settings), }), ( MultiLucene(host='localhost', port=lucenePort, defaultCore=DEFAULT_CORE), (luceneIndex, ), (http11Request, ), ))) ######## END Lucene Integration ############################################################### fieldnameRewrites = { # UNTOKENIZED_PREFIX+'genre': UNTOKENIZED_PREFIX+'dc:genre', } def fieldnameRewrite(name): return fieldnameRewrites.get(name, name) def drilldownFieldnamesTranslate(fieldname): untokenizedName = untokenizedFieldname(fieldname) if untokenizedName in untokenizedFieldnames: fieldname = untokenizedName return fieldnameRewrite(fieldname) convertToComposedQuery = ConvertToComposedQuery( resultsFrom=DEFAULT_CORE, matches=[], drilldownFieldnamesTranslate=drilldownFieldnamesTranslate) strategie = Md5HashDistributeStrategy() storage = StorageComponent(join(statePath, 'store'), strategy=strategie, partsRemovedOnDelete=[ HEADER_PARTNAME, META_PARTNAME, METADATA_PARTNAME, OAI_DC_PARTNAME, LONG_PARTNAME, SHORT_PARTNAME, OPENAIRE_PARTNAME ]) oaiJazz = OaiJazz(join(statePath, 'oai')) oaiJazz.updateMetadataFormat( OAI_DC_PARTNAME, "http://www.openarchives.org/OAI/2.0/oai_dc.xsd", "http://purl.org/dc/elements/1.1/") oai_oa_cerifJazz = OaiJazz(join(statePath, 'oai_cerif')) oai_oa_cerifJazz.updateMetadataFormat( OPENAIRE_PARTNAME, "https://www.openaire.eu/schema/cris/current/openaire-cerif-profile.xsd", "https://www.openaire.eu/cerif-profile/1.1/") # All of the following OAI-PMH sets shall be recognized by the CRIS, even if not all of them are populated. oai_oa_cerifJazz.updateSet("openaire_cris_projects", "OpenAIRE_CRIS_projects") oai_oa_cerifJazz.updateSet("openaire_cris_orgunits", "OpenAIRE_CRIS_orgunits") oai_oa_cerifJazz.updateSet("openaire_cris_persons", "OpenAIRE_CRIS_persons") oai_oa_cerifJazz.updateSet("openaire_cris_patents", "OpenAIRE_CRIS_patents") oai_oa_cerifJazz.updateSet("openaire_cris_products", "OpenAIRE_CRIS_products") oai_oa_cerifJazz.updateSet("openaire_cris_publications", "OpenAIRE_CRIS_publications") oai_oa_cerifJazz.updateSet("openaire_cris_funding", "OpenAIRE_CRIS_funding") oai_oa_cerifJazz.updateSet("openaire_cris_events", "OpenAIRE_CRIS_events") oai_oa_cerifJazz.updateSet("openaire_cris_equipments", "OpenAIRE_CRIS_equipments") cqlClauseConverters = [ RenameFieldForExact( untokenizedFields=untokenizedFieldnames, untokenizedPrefix=UNTOKENIZED_PREFIX, ).filterAndModifier(), SearchTermFilterAndModifier( shouldModifyFieldValue=lambda *args: True, fieldnameModifier=fieldnameRewrite).filterAndModifier(), ] periodicGateWayDownload = PeriodicDownload( reactor, host='localhost', port=gatewayPort, schedule=Schedule( period=1 if quickCommit else 10 ), # WST: Interval in seconds before sending a new request to the GATEWAY in case of an error while processing batch records.(default=1). IntegrationTests need 1 second! Otherwise tests will fail! name='api', autoStart=True) oaiDownload = OaiDownloadProcessor(path='/oaix', metadataPrefix=NORMALISED_DOC_NAME, workingDirectory=join( statePath, 'harvesterstate', 'gateway'), userAgentAddition='ApiServer', xWait=True, name='api', autoCommit=False) executeQueryHelix = \ (FilterMessages(allowed=['executeQuery']), (CqlMultiSearchClauseConversion(cqlClauseConverters, fromKwarg='query'), (DrilldownQueries(), (convertToComposedQuery, (luceneRoHelix,), ) ) ) ) return \ (Observable(), createDownloadHelix(reactor, periodicGateWayDownload, oaiDownload, storage, oaiJazz, oai_oa_cerifJazz), (ObservableHttpServer(reactor, port, compressResponse=True), (BasicHttpHandler(), (PathFilter(["/oai"]), (OaiPmh(repositoryName="NARCIS OAI-pmh", adminEmail="*****@*****.**", externalUrl="http://oai.narcis.nl"), (oaiJazz,), (StorageAdapter(), (storage,) ), (OaiBranding( url="http://www.narcis.nl/images/logos/logo-knaw-house.gif", link="http://oai.narcis.nl", title="Narcis - The gateway to scholarly information in The Netherlands"), ), (OaiProvenance( nsMap=NAMESPACEMAP, baseURL=('meta', '//meta:repository/meta:baseurl/text()'), harvestDate=('meta', '//meta:record/meta:harvestdate/text()'), metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'), identifier=('header','//oai:identifier/text()'), datestamp=('header', '//oai:datestamp/text()') ), (storage,) ) ) ), (PathFilter(["/cerif"]), (OaiPmhDans(repositoryName="OpenAIRE CERIF", adminEmail="*****@*****.**", repositoryIdentifier="services.nod.dans.knaw.nl", externalUrl="http://services.nod.dans.knaw.nl"), #TODO: pathFilter should resemble proxy path (oai_oa_cerifJazz,), (StorageAdapter(), (storage,) ), (OaiOpenAIREDescription( serviceid='organisation:ORG1242054', acronym='services.nod.dans.knaw.nl', name='NARCIS', description='Compliant with the OpenAIRE Guidelines for CRIS Managers v.1.1.', website='https://www.narcis.nl', baseurl='http://services.nod.dans.knaw.nl/oa-cerif', subjectheading='', orgunitid='organisation:ORG1242054', owneracronym='DANS'), ), # (OaiBranding( # url="http://www.narcis.nl/images/logos/logo-knaw-house.gif", # link="http://oai.narcis.nl", # title="Narcis - The gateway to scholarly information in The Netherlands"), # ), (OaiProvenance( nsMap=NAMESPACEMAP, baseURL=('meta', '//meta:repository/meta:baseurl/text()'), harvestDate=('meta', '//meta:record/meta:harvestdate/text()'), metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'), identifier=('header','//oai:identifier/text()'), datestamp=('header', '//oai:datestamp/text()') ), (storage,) ) ) ), (PathFilter(['/sru']), (SruParser( host='sru.narcis.nl', port=80, defaultRecordSchema='knaw_short', defaultRecordPacking='xml'), (SruLimitStartRecord(limitBeyond=4000), (SruHandler( includeQueryTimes=False, extraXParameters=[], enableCollectLog=False), (SruTermDrilldown(),), executeQueryHelix, (StorageAdapter(), (storage,) ) ) ) ) ), (PathFilter('/rss'), (Rss( supportedLanguages = ['nl','en'], # defaults to first, if requested language is not available or supplied. title = {'nl':'NARCIS', 'en':'NARCIS'}, description = {'nl':'NARCIS: De toegang tot de Nederlandse wetenschapsinformatie', 'en':'NARCIS: The gateway to Dutch scientific information'}, link = {'nl':'http://www.narcis.nl/?Language=nl', 'en':'http://www.narcis.nl/?Language=en'}, maximumRecords = 20), executeQueryHelix, (RssItem( nsMap=NAMESPACEMAP, title = ('knaw_short', {'nl':'//short:metadata/short:titleInfo[not (@xml:lang)]/short:title/text()', 'en':'//short:metadata/short:titleInfo[@xml:lang="en"]/short:title/text()'}), description = ('knaw_short', {'nl':'//short:abstract[not (@xml:lang)]/text()', 'en':'//short:abstract[@xml:lang="en"]/text()'}), pubdate = ('knaw_short', '//short:dateIssued/short:parsed/text()'), linkTemplate = 'http://www.narcis.nl/%(wcpcollection)s/RecordID/%(oai_identifier)s/Language/%(language)s', wcpcollection = ('meta', '//*[local-name() = "collection"]/text()'), oai_identifier = ('meta', '//meta:record/meta:id/text()'), language = ('Dummy: Language is auto provided by the calling RSS component, but needs to be present to serve the linkTemplate.') ), (StorageAdapter(), (storage,) ) ) ) ) ) ) )
def testSetScheduleWithIdenticalScheduleDoesNothing(self): addTimer, = self.reactor.calledMethods self.reactor.calledMethods.reset() self.assertEquals(3600, addTimer.args[0]) self.pc.setSchedule(schedule=Schedule(period=3600)) self.assertEquals([], self.reactor.calledMethodNames())