Ejemplo n.º 1
0
    def testGetState(self):
        state = self.pc.getState()
        self.assertEquals('obs_name', state.name)
        self.assertEquals(False, state.paused)
        self.assertEquals(Schedule(period=3600), state.schedule)
        self.assertEquals({'paused': False, 'name': 'obs_name', 'schedule': Schedule(period=3600)}, state.asDict())

        self.pc.observable_setName('dashy')
        self.assertEquals('dashy', state.name)
        self.assertEquals(None, state.errorState)

        self.pc.setSchedule(schedule=Schedule(period=5))
        self.assertEquals(Schedule(period=5), state.schedule)

        self.pc.pause()
        self.assertEquals(True, state.paused)

        self.pc.resume()
        addTimer = self.reactor.calledMethods[-1]
        self.reactor.calledMethods.reset()
        self.assertEquals(False, state.paused)

        addTimer.args[1]()
        self.pc.pause()
        self.assertEquals(False, state.paused)

        addProcess = self.reactor.calledMethods[-1]
        addProcess.args[0]()
        self.assertEquals(True, state.paused)
Ejemplo n.º 2
0
    def testAutoStartOrScheduleRequired(self):
        reactor = CallTrace('reactor')

        self.assertRaises(ValueError, lambda: PeriodicCall(reactor=reactor))
        self.assertRaises(
            ValueError, lambda: PeriodicCall(reactor=reactor, autoStart=True))

        try:
            PeriodicCall(reactor=reactor, autoStart=False)
        except:
            self.fail('Unexpected exception')

        try:
            PeriodicCall(reactor=reactor,
                         schedule=Schedule(period=1),
                         autoStart=False)
        except:
            self.fail('Unexpected exception')

        try:
            PeriodicCall(reactor=reactor,
                         schedule=Schedule(period=1),
                         autoStart=True)
        except:
            self.fail('Unexpected exception')
Ejemplo n.º 3
0
 def setUp(self):
     SeecrTestCase.setUp(self)
     self.newDNA(schedule=Schedule(period=3600),
                 errorSchedule=Schedule(period=15),
                 prio=9,
                 name='obs_name')
     list(compose(self.dna.once.observer_init()))
Ejemplo n.º 4
0
 def testSecondsSinceEpoch(self):
     s = Schedule(
         secondsSinceEpoch=123
     )  # test with ints, but works with floats as well (much harder to test due to binary representation)
     self.assertEquals(123, s.secondsSinceEpoch)
     s._time = lambda: 76
     self.assertEquals(47, s.secondsFromNow())
Ejemplo n.º 5
0
    def testUsePeriod(self):
        s = Schedule(period=42)
        self.assertEquals(42, s.secondsFromNow())
        self.assertEquals(42, s.period)

        s = Schedule(period=0)
        self.assertEquals(0, s.secondsFromNow())
        self.assertEquals(0, s.period)
Ejemplo n.º 6
0
 def testRepr(self):
     self.assertEquals('Schedule(period=0)', repr(Schedule(period=0)))
     self.assertEquals('Schedule(period=1)', repr(Schedule(period=1)))
     self.assertEquals("Schedule(timeOfDay='21:00')",
                       repr(Schedule(timeOfDay='21:00')))
     self.assertEquals("Schedule(dayOfWeek=1, timeOfDay='21:00')",
                       repr(Schedule(timeOfDay='21:00', dayOfWeek=1)))
     self.assertEquals("Schedule(secondsSinceEpoch=42)",
                       repr(Schedule(secondsSinceEpoch=42)))
Ejemplo n.º 7
0
    def testFatalErrorReRaised(self):
        for exception in [KeyboardInterrupt, SystemExit, AssertionError]:
            self.newDNA(schedule=Schedule(period=987))

            def raiser():
                raise exception('msg')
                yield

            self.observer.methods['handle'] = raiser
            list(compose(self.dna.once.observer_init()))

            addTimer, = self.reactor.calledMethods
            self.reactor.calledMethods.reset()
            addTimer.args[1]()
            addProcess, = self.reactor.calledMethods
            self.reactor.calledMethods.reset()

            try:
                addProcess.args[0]()
            except exception:
                pass
            else:
                self.fail()

            self.assertEqual(['handle'], self.observer.calledMethodNames())
            self.assertEqual(['removeProcess', 'addTimer'],
                             self.reactor.calledMethodNames())
Ejemplo n.º 8
0
    def testIncrementalHarvestScheduleSetToNone(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            incrementalHarvestSchedule=Schedule(period=0),
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO())
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
        self.assertNotEqual(None,
                            oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(
            ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
            observer.calledMethodNames())

        observer.calledMethods.reset()
        oaiDownloadProcessor.setFrom(from_=None)
        oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=None)
        consume(
            oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
        self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(
            ['startOaiBatch', 'add', 'stopOaiBatch', 'signalHarvestingDone'],
            observer.calledMethodNames())
Ejemplo n.º 9
0
    def testIncrementalHarvestScheduleNoneOverruledWithSetIncrementalHarvestSchedule(
            self):
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO(),
            incrementalHarvestSchedule=None)
        oaiDownloadProcessor._time = lambda: 10
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
        self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)

        oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=Schedule(
            period=3))
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        self.assertEqual(None, oaiDownloadProcessor._earliestNextRequestTime)
        oaiDownloadProcessor.scheduleNextRequest()
        self.assertNotEqual(None, oaiDownloadProcessor.buildRequest())
        self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime)
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertEqual(13, oaiDownloadProcessor._earliestNextRequestTime)
Ejemplo n.º 10
0
 def testSetScheduleAddsANewTimer(self):
     self.reactor.calledMethods.reset()
     self.pc.setSchedule(schedule=Schedule(period=123))
     self.assertEquals(['removeTimer', 'addTimer'], self.reactor.calledMethodNames())
     removeTimer, addTimer = self.reactor.calledMethods
     self.assertEquals('TOKEN', removeTimer.args[0])
     self.assertEquals(123, addTimer.args[0])
Ejemplo n.º 11
0
    def testIncrementalHarvestWithFromAfterSomePeriod(self):
        observer = CallTrace(emptyGeneratorMethods=['add'])
        oaiDownloadProcessor = OaiDownloadProcessor(
            path="/oai",
            metadataPrefix="oai_dc",
            workingDirectory=self.tempdir,
            xWait=False,
            err=StringIO(),
            incrementalHarvestSchedule=Schedule(period=10))
        oaiDownloadProcessor._time = lambda: 1.0
        oaiDownloadProcessor.addObserver(observer)
        consume(
            oaiDownloadProcessor.handle(parse(StringIO(LISTRECORDS_RESPONSE))))
        self.assertEqual(None, oaiDownloadProcessor._resumptionToken)
        self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)

        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 6.0
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 10.0
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
        oaiDownloadProcessor._time = lambda: 11.1
        request = oaiDownloadProcessor.buildRequest()
        self.assertTrue(
            request.startswith(
                'GET /oai?verb=ListRecords&from=2002-06-01T19%3A20%3A30Z&metadataPrefix=oai_dc'
            ), request)
Ejemplo n.º 12
0
    def testInitialSchedule(self):
        self.newDNA(initialSchedule=Schedule(period=0), schedule=Schedule(period=12))
        list(compose(self.dna.once.observer_init()))
        self.assertEquals(['addTimer'], self.reactor.calledMethodNames())
        addTimer, = self.reactor.calledMethods

        self.reactor.calledMethods.reset()
        self.assertEquals(0, addTimer.args[0])
        addTimer.args[1]()

        addProcess, = self.reactor.calledMethods
        self.reactor.calledMethods.reset()
        addProcess.args[0]()

        self.assertEquals(['removeProcess', 'addTimer'], self.reactor.calledMethodNames())
        removeProcess, addTimer = self.reactor.calledMethods
        self.assertEquals(12, addTimer.args[0])
Ejemplo n.º 13
0
    def testDayOfWeekTimeOfDay(self):
        s = Schedule(dayOfWeek=5, timeOfDay='20:00')
        self.assertEquals(5, s.dayOfWeek)
        s._utcnow = lambda: datetime.strptime("15-11-2012 13:30", "%d-%m-%Y %H:%M") # This is a Thursday
        self.assertEquals(30.5 * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("14-11-2012 21:00", "%d-%m-%Y %H:%M")
        self.assertEquals(47 * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("17-11-2012 21:00", "%d-%m-%Y %H:%M")
        self.assertEquals((5 * 24 + 23) * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("16-11-2012 20:00", "%d-%m-%Y %H:%M")
        self.assertEquals(7 * 24 * 60 * 60, s.secondsFromNow())
Ejemplo n.º 14
0
    def testDayOfWeekTimeOfDay(self):
        s = Schedule(dayOfWeek=5, timeOfDay='20:00')
        self.assertEquals(5, s.dayOfWeek)
        s._utcnow = lambda: datetime.strptime(
            "15-11-2012 13:30", "%d-%m-%Y %H:%M")  # This is a Thursday
        self.assertEquals(30.5 * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("14-11-2012 21:00",
                                              "%d-%m-%Y %H:%M")
        self.assertEquals(47 * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("17-11-2012 21:00",
                                              "%d-%m-%Y %H:%M")
        self.assertEquals((5 * 24 + 23) * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("16-11-2012 20:00",
                                              "%d-%m-%Y %H:%M")
        self.assertEquals(7 * 24 * 60 * 60, s.secondsFromNow())
Ejemplo n.º 15
0
    def testTimeOfDay(self):
        s = Schedule(timeOfDay='20:00')
        self.assertEquals('20:00', s.timeOfDay)
        s._utcnow = lambda: datetime.strptime("13:30", "%H:%M")
        self.assertEquals(6.5 * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("21:15", "%H:%M")
        self.assertEquals(22.75 * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("20:00", "%H:%M")
        self.assertEquals(24 * 60 * 60, s.secondsFromNow())
Ejemplo n.º 16
0
    def testMessageConfigurable(self):
        self.newDNA(message="aMessage",
                    schedule=Schedule(period=3600),
                    errorSchedule=Schedule(period=15),
                    prio=9,
                    name='obs_name')
        list(compose(self.dna.once.observer_init()))

        self.assertEqual(['addTimer'], self.reactor.calledMethodNames())
        addTimer, = self.reactor.calledMethods
        callback = addTimer.args[1]
        self.reactor.calledMethods.reset()
        callback()
        self.assertEqual(['addProcess'], self.reactor.calledMethodNames())
        addProcess, = self.reactor.calledMethods
        callback = addProcess.args[0]
        self.reactor.calledMethods.reset()
        callback()
        self.assertEqual(['aMessage'], self.observer.calledMethodNames())
Ejemplo n.º 17
0
def jvmMonitorTree(reactor, updatableGustosClient):
    gustosInterval = 1 if __builtins__.get('__test__', False) else 60

    return be((
        PeriodicCall(reactor=reactor,
                     schedule=Schedule(period=gustosInterval)),
        (
            JvmMonitor(),
            (updatableGustosClient, ),
        ),
    ))
Ejemplo n.º 18
0
 def testSetIncrementalHarvestScheduleNotAllowedInCaseOfRestartAfterFinish(
         self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO(),
         restartAfterFinish=True)
     self.assertRaises(
         ValueError, lambda: oaiDownloadProcessor.
         setIncrementalHarvestSchedule(schedule=Schedule(period=3)))
Ejemplo n.º 19
0
    def testPausePausesOnStart(self):
        # autoStart
        reactor = CallTrace('reactor')
        pc = PeriodicCall(reactor=reactor, autoStart=False)
        pc.observer_init()
        self.assertEquals([], reactor.calledMethodNames())

        # explicit .pause()
        pc = PeriodicCall(reactor=reactor, schedule=Schedule(period=1), autoStart=True)
        pc.pause()
        pc.observer_init()
        self.assertEquals([], reactor.calledMethodNames())
Ejemplo n.º 20
0
    def testTimeOfDay(self):
        s = Schedule(timeOfDay='20:00')
        self.assertEquals('20:00', s.timeOfDay)
        s._utcnow = lambda: datetime.strptime("13:30", "%H:%M")
        self.assertEquals(6.5 * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("21:15", "%H:%M")
        self.assertEquals(22.75 * 60 * 60, s.secondsFromNow())

        s._utcnow = lambda: datetime.strptime("20:00", "%H:%M")
        self.assertEquals(24 * 60 * 60, s.secondsFromNow())
Ejemplo n.º 21
0
    def testUsePeriod(self):
        s = Schedule(period=42)
        self.assertEquals(42, s.secondsFromNow())
        self.assertEquals(42, s.period)

        s = Schedule(period=0)
        self.assertEquals(0, s.secondsFromNow())
        self.assertEquals(0, s.period)
Ejemplo n.º 22
0
    def testSetSchedule(self):
        addTimer, = self.reactor.calledMethods
        self.reactor.calledMethods.reset()
        addTimer.args[1]()

        self.pc.setSchedule(schedule=Schedule(period=1))

        addProcess, = self.reactor.calledMethods
        self.reactor.calledMethods.reset()
        addProcess.args[0]()

        self.assertEquals(['removeProcess', 'addTimer'], self.reactor.calledMethodNames())
        removeProcess, addTimer = self.reactor.calledMethods
        self.assertEquals(1, addTimer.args[0])
Ejemplo n.º 23
0
    def testScheduleNextRequest(self):
        oaiDownloadProcessor = OaiDownloadProcessor(
            path='/p', metadataPrefix='p', workingDirectory=self.tempdir)
        oaiDownloadProcessor._time = lambda: 17
        consume(
            oaiDownloadProcessor.handle(
                parse(StringIO(LISTRECORDS_RESPONSE % ''))))
        self.assertTrue(oaiDownloadProcessor._earliestNextRequestTime > 17)

        oaiDownloadProcessor.scheduleNextRequest()
        self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(True, oaiDownloadProcessor._timeForNextRequest())
        self.assertNotEqual(None, oaiDownloadProcessor.buildRequest())

        oaiDownloadProcessor.scheduleNextRequest(Schedule(period=0))
        self.assertEqual(17, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(True, oaiDownloadProcessor._timeForNextRequest())
        self.assertNotEqual(None, oaiDownloadProcessor.buildRequest())

        oaiDownloadProcessor.scheduleNextRequest(Schedule(period=120))
        self.assertEqual(137, oaiDownloadProcessor._earliestNextRequestTime)
        self.assertEqual(False, oaiDownloadProcessor._timeForNextRequest())
        self.assertEqual(None, oaiDownloadProcessor.buildRequest())
 def __init__(self,
              path,
              metadataPrefix,
              workingDirectory,
              set=None,
              xWait=True,
              partition=None,
              err=None,
              verb=None,
              autoCommit=True,
              incrementalHarvestSchedule=_UNSPECIFIED,
              restartAfterFinish=False,
              userAgentAddition=None,
              name=None):
     Observable.__init__(self, name=name)
     self._userAgent = _USER_AGENT + ('' if userAgentAddition is None else
                                      ' (%s)' % userAgentAddition)
     self._path = path
     self._metadataPrefix = metadataPrefix
     isdir(workingDirectory) or makedirs(workingDirectory)
     self._stateFilePath = join(workingDirectory, "harvester.state")
     self._set = set
     self._xWait = xWait
     self._partition = partition
     self._err = err or stderr
     self._verb = verb or 'ListRecords'
     self._autoCommit = autoCommit
     if restartAfterFinish and incrementalHarvestSchedule and incrementalHarvestSchedule != _UNSPECIFIED:
         raise ValueError(
             "In case restartAfterFinish==True, incrementalHarvestSchedule must not be set"
         )
     self._restartAfterFinish = restartAfterFinish
     if incrementalHarvestSchedule == _UNSPECIFIED and not restartAfterFinish:
         incrementalHarvestSchedule = Schedule(timeOfDay='00:00')
     self._incrementalHarvestSchedule = incrementalHarvestSchedule
     self._resumptionToken = None
     self._from = None
     self._errorState = None
     self._earliestNextRequestTime = 0
     self._readState()
     self._identifierFilePath = join(workingDirectory,
                                     "harvester.identifier")
     if isfile(self._identifierFilePath):
         self._identifier = _open_read(self._identifierFilePath).strip()
     else:
         self._identifier = str(uuid4())
         with open(self._identifierFilePath, 'w') as f:
             f.write(self._identifier)
Ejemplo n.º 25
0
 def testSetIncrementalHarvestSchedule(self):
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO(),
         incrementalHarvestSchedule=None)
     oaiDownloadProcessor._time = lambda: 10
     oaiDownloadProcessor.setIncrementalHarvestSchedule(schedule=Schedule(
         period=3))
     self.assertEqual(0, oaiDownloadProcessor._earliestNextRequestTime)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % ''))))
     self.assertEqual(13, oaiDownloadProcessor._earliestNextRequestTime)
Ejemplo n.º 26
0
def main(reactor,
         port,
         statePath,
         gatewayPort,
         dbConfig,
         quickCommit=False,
         **ignored):

    #TODO: Implement logging.
    # normLogger = Logger(join(statePath, '..', 'gateway', 'normlogger'))

    dbStorageComponent = ResolverStorageComponent(dbConfig)
    verbose = True

    periodicGateWayDownload = PeriodicDownload(
        reactor,
        host='localhost',
        port=gatewayPort,
        schedule=Schedule(
            period=.1 if quickCommit else 10
        ),  # WST: Interval in seconds before sending a new request to the GATEWAY in case of an error while processing batch records.(default=1). IntegrationTests need <=1 second! Otherwise tests will fail!
        name='resolver',
        autoStart=True)

    oaiDownload = OaiDownloadProcessor(path='/oaix',
                                       metadataPrefix=NORMALISED_DOC_NAME,
                                       workingDirectory=join(
                                           statePath, 'harvesterstate',
                                           'gateway'),
                                       userAgentAddition='ResolverServer',
                                       xWait=True,
                                       name='resolver',
                                       autoCommit=False)


    return \
    (Observable(),
        createDownloadHelix(reactor, periodicGateWayDownload, oaiDownload, dbStorageComponent),
        (ObservableHttpServer(reactor, port, compressResponse=True),
            (BasicHttpHandler(),
                (PathFilter("/"),
                    (StringServer("Resolver Server", ContentTypePlainText), )
                )
            )
        )
    )
Ejemplo n.º 27
0
    def testPausePausesWhenRunning(self):
        self.newDNA(schedule=Schedule(period=1), autoStart=True)
        list(compose(self.dna.once.observer_init()))
        self.assertEquals(['addTimer'], self.reactor.calledMethodNames())
        addTimer, = self.reactor.calledMethods

        # pauses after completing current task
        self.pc.pause()

        self.reactor.calledMethods.reset()
        addTimer.args[1]()
        self.assertEquals(['addProcess'], self.reactor.calledMethodNames())
        addProcess, = self.reactor.calledMethods

        self.reactor.calledMethods.reset()
        addProcess.args[0]()
        self.assertEquals(['handle'], self.observer.calledMethodNames())
        self.assertEquals(['removeProcess'], self.reactor.calledMethodNames())
Ejemplo n.º 28
0
 def testIncrementalHarvestReScheduleIfNoRecordsMatch(self):
     observer = CallTrace(emptyGeneratorMethods=['add'])
     oaiDownloadProcessor = OaiDownloadProcessor(
         path="/oai",
         metadataPrefix="oai_dc",
         incrementalHarvestSchedule=Schedule(period=0),
         workingDirectory=self.tempdir,
         xWait=False,
         err=StringIO())
     oaiDownloadProcessor.addObserver(observer)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(LISTRECORDS_RESPONSE % ''))))
     self.assertEqual('2002-06-01T19:20:30Z', oaiDownloadProcessor._from)
     consume(
         oaiDownloadProcessor.handle(
             parse(StringIO(NO_RECORDS_MATCH_RESPONSE))))
     self.assertEqual(None, oaiDownloadProcessor._errorState)
     self.assertEqual('2012-06-01T19:20:30Z', oaiDownloadProcessor._from)
Ejemplo n.º 29
0
    def testDoNotResumeNotPaused(self):
        self.newDNA(schedule=Schedule(period=1), autoStart=True)
        self.pc.resume()
        self.assertEquals([], self.reactor.calledMethodNames())

        list(compose(self.dna.once.observer_init()))
        self.assertEquals(['addTimer'], self.reactor.calledMethodNames())
        addTimer, = self.reactor.calledMethods

        self.reactor.calledMethods.reset()
        self.pc.resume()
        self.assertEquals([], self.reactor.calledMethodNames())

        addTimer.args[1]()
        self.assertEquals(['addProcess'], self.reactor.calledMethodNames())

        self.reactor.calledMethods.reset()
        self.pc.resume()
        self.assertEquals([], self.reactor.calledMethodNames())
Ejemplo n.º 30
0
    def testResumeStartsWhenPaused(self):
        self.newDNA(schedule=Schedule(period=2), autoStart=False)
        list(compose(self.dna.once.observer_init()))
        self.assertEquals([], self.reactor.calledMethodNames())

        with stderr_replaced() as err:
            self.pc.resume()
            self.assertEquals('%s: resumed\n' % repr(self.pc), err.getvalue())
        self.assertEquals(['addTimer'], self.reactor.calledMethodNames())
        addTimer, = self.reactor.calledMethods
        self.assertEquals(self.pc._periodicCall, addTimer.args[1])

        # finish one task (1/2)
        self.reactor.calledMethods.reset()
        addTimer.args[1]()
        addProcess, = self.reactor.calledMethods

        # finish one task (2/2)
        self.reactor.calledMethods.reset()
        addProcess.args[0]()
        self.assertEquals(['removeProcess', 'addTimer'], self.reactor.calledMethodNames())
Ejemplo n.º 31
0
def main(reactor, port, statePath, gatewayPort, quickCommit=False, **ignored):

    strategie = Md5HashDistributeStrategy()
    storage = StorageComponent(join(statePath, 'store'),
                               strategy=strategie,
                               partsRemovedOnDelete=[
                                   NL_DIDL_NORMALISED_PREFIX,
                                   NL_DIDL_COMBINED_PREFIX, 'metadata'
                               ])

    oaiJazz = OaiJazz(join(statePath, 'oai'))
    oaiJazz.updateMetadataFormat("metadata", "http://didl.loc.nl/didl.xsd",
                                 NAMESPACEMAP.didl)
    oaiJazz.updateMetadataFormat(NL_DIDL_COMBINED_PREFIX, "",
                                 NAMESPACEMAP.gmhcombined)
    oaiJazz.updateMetadataFormat(NL_DIDL_NORMALISED_PREFIX, "",
                                 NAMESPACEMAP.gmhnorm)

    normLogger = Logger(join(statePath, '..', 'gateway', 'normlogger'))

    periodicGateWayDownload = PeriodicDownload(
        reactor,
        host='localhost',
        port=gatewayPort,
        schedule=Schedule(
            period=.1 if quickCommit else 10
        ),  # WST: Interval in seconds before sending a new request to the GATEWAY in case of an error while processing batch records.(default=1). IntegrationTests need <=1 second! Otherwise tests will fail!
        name='api',
        autoStart=True)

    oaiDownload = OaiDownloadProcessor(path='/oaix',
                                       metadataPrefix=NORMALISED_DOC_NAME,
                                       workingDirectory=join(
                                           statePath, 'harvesterstate',
                                           'gateway'),
                                       userAgentAddition='ApiServer',
                                       xWait=True,
                                       name='api',
                                       autoCommit=False)


    return \
    (Observable(),
        createDownloadHelix(reactor, periodicGateWayDownload, oaiDownload, storage, oaiJazz),
        (ObservableHttpServer(reactor, port, compressResponse=True),
            (BasicHttpHandler(),
                (PathFilter('/oai'),
                    (OaiPmh(
                            repositoryName="Gemeenschappelijke Metadata Harvester DANS-KB",
                            adminEmail="*****@*****.**",
                            externalUrl="http://oai.gharvester.dans.knaw.nl",
                            batchSize=200,
                            supportXWait=False,
                            # preciseDatestamp=False,
                            # deleteInSets=False
                        ),
                        (oaiJazz, ),
                        (RetrieveToGetDataAdapter(),
                            (storage,),
                        ),
                        (OaiBranding(
                            url="https://www.narcis.nl/images/logos/logo-knaw-house.gif", #TODO: Link to a joint-GMH icon...
                            link="https://harvester.dans.knaw.nl",
                            title="Gemeenschappelijke Metadata Harvester (GMH) van DANS en de KB"),
                        ),
                        (OaiProvenance(
                            nsMap=NAMESPACEMAP,
                            baseURL=('meta', '//meta:repository/meta:baseurl/text()'),
                            harvestDate=('meta', '//meta:harvestdate/text()'),
                            metadataNamespace=('meta', '//meta:metadataPrefix/text()'), #TODO: Kan hardcoded in harvester mapper gezet eventueel: <metadataNamespace>urn:mpeg:mpeg21:2002:01-DII-NS</metadataNamespace>?? (storage,) #metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'),
                            identifier=('header','//oai:identifier/text()'),
                            datestamp=('header', '//oai:datestamp/text()')
                            ),
                            (RetrieveToGetDataAdapter(),
                                (storage,),
                            )
                        )
                    )
                ),
                (PathFilter('/rss'),
                    (LoggerRSS( title = 'GMH DANS-KB Normalisationlog Syndication', description = 'Harvester normalisation log for: ', link = 'http://rss.gharvester.dans.knaw.nl/rss', maximumRecords = 30),
                        (normLogger,
                            (storage,)
                        )
                    )
                ),
                (PathFilter('/xls'),
                    # (LogComponent("XLS-Request:"),),
                    (XlsServer(),)
                )
            )
        )
    )
Ejemplo n.º 32
0
 def testSecondsSinceEpoch(self):
     s = Schedule(secondsSinceEpoch=123) # test with ints, but works with floats as well (much harder to test due to binary representation)
     self.assertEquals(123, s.secondsSinceEpoch)
     s._time = lambda: 76
     self.assertEquals(47, s.secondsFromNow())
Ejemplo n.º 33
0
def main(reactor,
         port,
         statePath,
         lucenePort,
         gatewayPort,
         quickCommit=False,
         **ignored):

    ######## START Lucene Integration ###############################################################
    defaultLuceneSettings = LuceneSettings(
        commitTimeout=30,
        readonly=True,
    )

    http11Request = be((
        HttpRequest1_1(),
        (SocketPool(reactor=reactor,
                    unusedTimeout=5,
                    limits=dict(totalSize=100, destinationSize=10)), ),
    ))

    luceneIndex = luceneAndReaderConfig(
        defaultLuceneSettings.clone(readonly=True), http11Request, lucenePort)

    luceneRoHelix = be(
        (AdapterToLuceneQuery(defaultCore=DEFAULT_CORE,
                              coreConverters={
                                  DEFAULT_CORE:
                                  QueryExpressionToLuceneQueryDict(
                                      UNQUALIFIED_TERM_FIELDS,
                                      luceneSettings=luceneIndex.settings),
                              }), (
                                  MultiLucene(host='localhost',
                                              port=lucenePort,
                                              defaultCore=DEFAULT_CORE),
                                  (luceneIndex, ),
                                  (http11Request, ),
                              )))

    ######## END Lucene Integration ###############################################################

    fieldnameRewrites = {
        #         UNTOKENIZED_PREFIX+'genre': UNTOKENIZED_PREFIX+'dc:genre',
    }

    def fieldnameRewrite(name):
        return fieldnameRewrites.get(name, name)

    def drilldownFieldnamesTranslate(fieldname):
        untokenizedName = untokenizedFieldname(fieldname)
        if untokenizedName in untokenizedFieldnames:
            fieldname = untokenizedName
        return fieldnameRewrite(fieldname)

    convertToComposedQuery = ConvertToComposedQuery(
        resultsFrom=DEFAULT_CORE,
        matches=[],
        drilldownFieldnamesTranslate=drilldownFieldnamesTranslate)

    strategie = Md5HashDistributeStrategy()
    storage = StorageComponent(join(statePath, 'store'),
                               strategy=strategie,
                               partsRemovedOnDelete=[
                                   HEADER_PARTNAME, META_PARTNAME,
                                   METADATA_PARTNAME, OAI_DC_PARTNAME,
                                   LONG_PARTNAME, SHORT_PARTNAME,
                                   OPENAIRE_PARTNAME
                               ])

    oaiJazz = OaiJazz(join(statePath, 'oai'))
    oaiJazz.updateMetadataFormat(
        OAI_DC_PARTNAME, "http://www.openarchives.org/OAI/2.0/oai_dc.xsd",
        "http://purl.org/dc/elements/1.1/")

    oai_oa_cerifJazz = OaiJazz(join(statePath, 'oai_cerif'))
    oai_oa_cerifJazz.updateMetadataFormat(
        OPENAIRE_PARTNAME,
        "https://www.openaire.eu/schema/cris/current/openaire-cerif-profile.xsd",
        "https://www.openaire.eu/cerif-profile/1.1/")
    # All of the following OAI-PMH sets shall be recognized by the CRIS, even if not all of them are populated.
    oai_oa_cerifJazz.updateSet("openaire_cris_projects",
                               "OpenAIRE_CRIS_projects")
    oai_oa_cerifJazz.updateSet("openaire_cris_orgunits",
                               "OpenAIRE_CRIS_orgunits")
    oai_oa_cerifJazz.updateSet("openaire_cris_persons",
                               "OpenAIRE_CRIS_persons")
    oai_oa_cerifJazz.updateSet("openaire_cris_patents",
                               "OpenAIRE_CRIS_patents")
    oai_oa_cerifJazz.updateSet("openaire_cris_products",
                               "OpenAIRE_CRIS_products")
    oai_oa_cerifJazz.updateSet("openaire_cris_publications",
                               "OpenAIRE_CRIS_publications")

    oai_oa_cerifJazz.updateSet("openaire_cris_funding",
                               "OpenAIRE_CRIS_funding")
    oai_oa_cerifJazz.updateSet("openaire_cris_events", "OpenAIRE_CRIS_events")
    oai_oa_cerifJazz.updateSet("openaire_cris_equipments",
                               "OpenAIRE_CRIS_equipments")

    cqlClauseConverters = [
        RenameFieldForExact(
            untokenizedFields=untokenizedFieldnames,
            untokenizedPrefix=UNTOKENIZED_PREFIX,
        ).filterAndModifier(),
        SearchTermFilterAndModifier(
            shouldModifyFieldValue=lambda *args: True,
            fieldnameModifier=fieldnameRewrite).filterAndModifier(),
    ]

    periodicGateWayDownload = PeriodicDownload(
        reactor,
        host='localhost',
        port=gatewayPort,
        schedule=Schedule(
            period=1 if quickCommit else 10
        ),  # WST: Interval in seconds before sending a new request to the GATEWAY in case of an error while processing batch records.(default=1). IntegrationTests need 1 second! Otherwise tests will fail!
        name='api',
        autoStart=True)

    oaiDownload = OaiDownloadProcessor(path='/oaix',
                                       metadataPrefix=NORMALISED_DOC_NAME,
                                       workingDirectory=join(
                                           statePath, 'harvesterstate',
                                           'gateway'),
                                       userAgentAddition='ApiServer',
                                       xWait=True,
                                       name='api',
                                       autoCommit=False)

    executeQueryHelix = \
        (FilterMessages(allowed=['executeQuery']),
            (CqlMultiSearchClauseConversion(cqlClauseConverters, fromKwarg='query'),
                (DrilldownQueries(),
                    (convertToComposedQuery,
                        (luceneRoHelix,),
                    )
                )
            )
        )

    return \
    (Observable(),
        createDownloadHelix(reactor, periodicGateWayDownload, oaiDownload, storage, oaiJazz, oai_oa_cerifJazz),
        (ObservableHttpServer(reactor, port, compressResponse=True),
            (BasicHttpHandler(),
                (PathFilter(["/oai"]),
                    (OaiPmh(repositoryName="NARCIS OAI-pmh", adminEmail="*****@*****.**", externalUrl="http://oai.narcis.nl"),
                        (oaiJazz,),
                        (StorageAdapter(),
                            (storage,)
                        ),
                        (OaiBranding(
                            url="http://www.narcis.nl/images/logos/logo-knaw-house.gif",
                            link="http://oai.narcis.nl",
                            title="Narcis - The gateway to scholarly information in The Netherlands"),
                        ),
                        (OaiProvenance(
                            nsMap=NAMESPACEMAP,
                            baseURL=('meta', '//meta:repository/meta:baseurl/text()'),
                            harvestDate=('meta', '//meta:record/meta:harvestdate/text()'),
                            metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'),
                            identifier=('header','//oai:identifier/text()'),
                            datestamp=('header', '//oai:datestamp/text()')
                            ),
                            (storage,)
                        )
                    )
                ),
                (PathFilter(["/cerif"]),
                    (OaiPmhDans(repositoryName="OpenAIRE CERIF", adminEmail="*****@*****.**", repositoryIdentifier="services.nod.dans.knaw.nl", externalUrl="http://services.nod.dans.knaw.nl"), #TODO: pathFilter should resemble proxy path
                        (oai_oa_cerifJazz,),
                        (StorageAdapter(),
                            (storage,)
                        ),
                        (OaiOpenAIREDescription(
                            serviceid='organisation:ORG1242054',
                            acronym='services.nod.dans.knaw.nl',
                            name='NARCIS',
                            description='Compliant with the OpenAIRE Guidelines for CRIS Managers v.1.1.',
                            website='https://www.narcis.nl',
                            baseurl='http://services.nod.dans.knaw.nl/oa-cerif',
                            subjectheading='',
                            orgunitid='organisation:ORG1242054',
                            owneracronym='DANS'),
                        ),
                        # (OaiBranding(
                        #     url="http://www.narcis.nl/images/logos/logo-knaw-house.gif",
                        #     link="http://oai.narcis.nl",
                        #     title="Narcis - The gateway to scholarly information in The Netherlands"),
                        # ),
                        (OaiProvenance(
                            nsMap=NAMESPACEMAP,
                            baseURL=('meta', '//meta:repository/meta:baseurl/text()'),
                            harvestDate=('meta', '//meta:record/meta:harvestdate/text()'),
                            metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'),
                            identifier=('header','//oai:identifier/text()'),
                            datestamp=('header', '//oai:datestamp/text()')
                            ),
                            (storage,)
                        )
                    )
                ),
                (PathFilter(['/sru']),
                    (SruParser(
                            host='sru.narcis.nl',
                            port=80,
                            defaultRecordSchema='knaw_short',
                            defaultRecordPacking='xml'),
                        (SruLimitStartRecord(limitBeyond=4000),
                            (SruHandler(
                                    includeQueryTimes=False,
                                    extraXParameters=[],
                                    enableCollectLog=False),
                                (SruTermDrilldown(),),
                                executeQueryHelix,
                                (StorageAdapter(),
                                    (storage,)
                                )
                            )
                        )
                    )
                ),
                (PathFilter('/rss'),
                    (Rss(   supportedLanguages = ['nl','en'], # defaults to first, if requested language is not available or supplied.
                            title = {'nl':'NARCIS', 'en':'NARCIS'},
                            description = {'nl':'NARCIS: De toegang tot de Nederlandse wetenschapsinformatie', 'en':'NARCIS: The gateway to Dutch scientific information'},
                            link = {'nl':'http://www.narcis.nl/?Language=nl', 'en':'http://www.narcis.nl/?Language=en'},
                            maximumRecords = 20),
                        executeQueryHelix,
                        (RssItem(
                                nsMap=NAMESPACEMAP,
                                title = ('knaw_short', {'nl':'//short:metadata/short:titleInfo[not (@xml:lang)]/short:title/text()', 'en':'//short:metadata/short:titleInfo[@xml:lang="en"]/short:title/text()'}),
                                description = ('knaw_short', {'nl':'//short:abstract[not (@xml:lang)]/text()', 'en':'//short:abstract[@xml:lang="en"]/text()'}),
                                pubdate = ('knaw_short', '//short:dateIssued/short:parsed/text()'),
                                linkTemplate = 'http://www.narcis.nl/%(wcpcollection)s/RecordID/%(oai_identifier)s/Language/%(language)s',
                                wcpcollection = ('meta', '//*[local-name() = "collection"]/text()'),
                                oai_identifier = ('meta', '//meta:record/meta:id/text()'),
                                language = ('Dummy: Language is auto provided by the calling RSS component, but needs to be present to serve the linkTemplate.')
                            ),
                            (StorageAdapter(),
                                (storage,)
                            )
                        )
                    )
                )
            )
        )
    )
Ejemplo n.º 34
0
 def testSetScheduleWithIdenticalScheduleDoesNothing(self):
     addTimer, = self.reactor.calledMethods
     self.reactor.calledMethods.reset()
     self.assertEquals(3600, addTimer.args[0])
     self.pc.setSchedule(schedule=Schedule(period=3600))
     self.assertEquals([], self.reactor.calledMethodNames())