def test(): sp = SocketPool(reactor=CallTrace(), limits={'totalSize': 2}) # Limits enforced on put, not async. def stillPooled(): wasStillPooled = [] for destHost, destPort in [('h', 1), ('i', 2), ('j', 3)]: while True: # do ... while (fromPool is not None) fromPool = yield sp.getPooledSocket(host=destHost, port=destPort) if fromPool: wasStillPooled.append(fromPool) if fromPool is None: break raise StopIteration(wasStillPooled) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('sH')) yield sp.putSocketInPool(host='i', port=2, sock=MockSok('sI')) with stderr_replaced() as err: yield sp.putSocketInPool(host='j', port=3, sock=MockSok('sJ')) self.assertEquals('', err.getvalue(), err.getvalue()) wasStillPooled = yield stillPooled() self.assertEquals(2, len(wasStillPooled)) self.assertTrue( set(wasStillPooled).issubset(set(['sH', 'sI', 'sJ'])))
def test(): sp = SocketPool(reactor=CallTrace(), limits={'destinationSize': 2}) # Limits enforced on put, not async. def stillPooled(): wasStillPooled = [] for destHost, destPort in [('h', 1), ('i', 2), ('j', 3)]: while True: # do ... while (fromPool is not None) fromPool = yield sp.getPooledSocket(host=destHost, port=destPort) if fromPool: wasStillPooled.append(fromPool) if fromPool is None: break raise StopIteration(wasStillPooled) sJ = MockSok('sJ') sJ2 = MockSok('sJ2') yield sp.putSocketInPool(host='h', port=1, sock=MockSok('sH')) yield sp.putSocketInPool(host='i', port=2, sock=MockSok('sI')) yield sp.putSocketInPool(host='j', port=3, sock=sJ) yield sp.putSocketInPool(host='j', port=3, sock=sJ2) with stderr_replaced() as err: yield sp.putSocketInPool(host='j', port=3, sock=MockSok('sJ3')) self.assertEquals('', err.getvalue(), err.getvalue()) wasStillPooled = yield stillPooled() self.assertEquals(4, len(wasStillPooled)) self.assertEquals(['sH', 'sI', 'sJ3', 'sJ2'], wasStillPooled) self.assertEquals(['shutdown', 'close'], sJ.log.calledMethodNames())
def test(): sp = SocketPool(reactor=CallTrace(), limits={'totalSize': 2}) # Limits enforced on put, not async. def stillPooled(): wasStillPooled = [] while True: # do ... while (fromPool is not None) fromPool = yield sp.getPooledSocket(host='h', port=1) if fromPool: wasStillPooled.append(fromPool) if fromPool is None: break raise StopIteration(wasStillPooled) s0 = MockSok('s0') s1 = MockSok('s1') yield sp.putSocketInPool(host='h', port=1, sock=s0) yield sp.putSocketInPool(host='h', port=1, sock=s1) with stderr_replaced() as err: yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s2')) self.assertEquals('', err.getvalue(), err.getvalue()) #@@ wasStillPooled = yield stillPooled() self.assertEquals(['s2', 's0'], wasStillPooled) self.assertEquals(['shutdown', 'close'], s1.log.calledMethodNames()) shutCall, closeCall = s1.log.calledMethods self.assertEquals(((SHUT_RDWR,), {}), (shutCall.args, shutCall.kwargs)) self.assertEquals(((), {}), (closeCall.args, closeCall.kwargs)) self.assertEquals([], s0.log.calledMethodNames()) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s0')) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s1')) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s2')) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s3')) wasStillPooled = yield stillPooled() self.assertEquals(['s3', 's0'], wasStillPooled)
def testUnusedTimeoutSetInitialisesTimer(self): # Whitebox (unusedTimeout -> addTimer) mockReactor = CallTrace() SocketPool(reactor=mockReactor, unusedTimeout=0.02) self.assertEquals(['addTimer'], mockReactor.calledMethodNames()) self.assertEquals(['seconds', 'callback'], mockReactor.calledMethods[0].kwargs.keys()) self.assertEquals(0.02, mockReactor.calledMethods[0].kwargs['seconds']) # Blackbox def test(): top = be(( Observable(), (SocketPool(reactor=reactor(), unusedTimeout=0.02), ), )) yield top.any.putSocketInPool(host='x', port=80, sock=MockSok('A')) yield top.any.putSocketInPool(host='x', port=80, sock=MockSok('B')) yield sleep(seconds=0.001) result = yield top.any.getPooledSocket(host='x', port=80) self.assertEquals('B', result) yield sleep(seconds=0.04) result = yield top.any.getPooledSocket(host='x', port=80) self.assertEquals(None, result) asProcess(test())
def testPut3GetOnlyYours(self): sp = SocketPool(reactor=CallTrace()) retval(sp.putSocketInPool(host='x', port=1, sock='A')) retval(sp.putSocketInPool(host='x', port=2, sock='B')) retval(sp.putSocketInPool(host='y', port=1, sock='C')) # Unknown host + port self.assertEquals(None, retval(sp.getPooledSocket(host='xx', port=1))) self.assertEquals(None, retval(sp.getPooledSocket(host='', port=1))) self.assertEquals(None, retval(sp.getPooledSocket(host=None, port=1))) self.assertEquals(None, retval(sp.getPooledSocket(host='x', port=0))) self.assertEquals(None, retval(sp.getPooledSocket(host='x', port=3))) # Retrieved once self.assertEquals('A', retval(sp.getPooledSocket(host='x', port=1)))
def test(): sp = SocketPool(reactor=CallTrace(), limits={'totalSize': 2}) # Limits enforced on put, not async. def stillPooled(): wasStillPooled = [] for destHost, destPort in [('h', 1), ('i', 2), ('j', 3)]: while True: # do ... while (fromPool is not None) fromPool = yield sp.getPooledSocket(host=destHost, port=destPort) if fromPool: wasStillPooled.append(fromPool) if fromPool is None: break raise StopIteration(wasStillPooled) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('sH')) yield sp.putSocketInPool(host='i', port=2, sock=MockSok('sI')) with stderr_replaced() as err: yield sp.putSocketInPool(host='j', port=3, sock=MockSok('sJ')) self.assertEquals('', err.getvalue(), err.getvalue()) wasStillPooled = yield stillPooled() self.assertEquals(2, len(wasStillPooled)) self.assertTrue(set(wasStillPooled).issubset(set(['sH', 'sI', 'sJ'])))
def testPutNGetLIFO(self): sp = SocketPool(reactor=CallTrace()) retval(sp.putSocketInPool(host='x', port=1, sock='A')) retval(sp.putSocketInPool(host='x', port=1, sock='B')) retval(sp.putSocketInPool(host='x', port=1, sock='C')) self.assertEquals('C', retval(sp.getPooledSocket(host='x', port=1))) self.assertEquals('B', retval(sp.getPooledSocket(host='x', port=1))) self.assertEquals('A', retval(sp.getPooledSocket(host='x', port=1))) self.assertEquals(None, retval(sp.getPooledSocket(host='x', port=1)))
def testPutNGet1Put1StillLIFO(self): sp = SocketPool(reactor=CallTrace()) retval(sp.putSocketInPool(host='example.org', port=80, sock='A')) retval(sp.putSocketInPool(host='example.org', port=80, sock='B')) self.assertEquals( 'B', retval(sp.getPooledSocket(host='example.org', port=80))) retval(sp.putSocketInPool(host='example.org', port=80, sock='C')) self.assertEquals( 'C', retval(sp.getPooledSocket(host='example.org', port=80))) self.assertEquals( 'A', retval(sp.getPooledSocket(host='example.org', port=80))) self.assertEquals( None, retval(sp.getPooledSocket(host='example.org', port=80)))
def testPutEmptyPut(self): sp = SocketPool(reactor=CallTrace()) retval(sp.putSocketInPool(host='10.0.0.1', port=60000, sock=0)) retval(sp.putSocketInPool(host='10.0.0.1', port=60000, sock=1)) for i in reversed(range(2)): self.assertEquals( i, retval(sp.getPooledSocket(host='10.0.0.1', port=60000))) self.assertEquals( None, retval(sp.getPooledSocket(host='10.0.0.1', port=60000))) retval(sp.putSocketInPool(host='10.0.0.1', port=60000, sock=2)) self.assertEquals( 2, retval(sp.getPooledSocket(host='10.0.0.1', port=60000)))
def test(): top = be(( Observable(), (SocketPool(reactor=reactor(), unusedTimeout=0.02), ), )) yield top.any.putSocketInPool(host='x', port=80, sock=MockSok('A')) yield top.any.putSocketInPool(host='x', port=80, sock=MockSok('B')) yield sleep(seconds=0.001) result = yield top.any.getPooledSocket(host='x', port=80) self.assertEquals('B', result) yield sleep(seconds=0.04) result = yield top.any.getPooledSocket(host='x', port=80) self.assertEquals(None, result)
def testPutEmptyPut(self): sp = SocketPool(reactor=CallTrace()) retval(sp.putSocketInPool(host='10.0.0.1', port=60000, sock=0)) retval(sp.putSocketInPool(host='10.0.0.1', port=60000, sock=1)) for i in reversed(range(2)): self.assertEquals(i, retval(sp.getPooledSocket(host='10.0.0.1', port=60000))) self.assertEquals(None, retval(sp.getPooledSocket(host='10.0.0.1', port=60000))) retval(sp.putSocketInPool(host='10.0.0.1', port=60000, sock=2)) self.assertEquals(2, retval(sp.getPooledSocket(host='10.0.0.1', port=60000)))
def test(): sp = SocketPool(reactor=CallTrace(), limits={'totalSize': 3}) # Limits enforced on put, not async. def fillAndEmpty(): yield sp.putSocketInPool(host='h', port=1, sock='s2') yield sp.putSocketInPool(host='h', port=1, sock='s1') yield sp.putSocketInPool(host='h', port=1, sock='s0') for i in xrange(3): self.assertEquals( 's{0}'.format(i), (yield sp.getPooledSocket(host='h', port=1))) yield fillAndEmpty() yield fillAndEmpty()
def testPutNGet1Put1StillLIFO(self): sp = SocketPool(reactor=CallTrace()) retval(sp.putSocketInPool(host='example.org', port=80, sock='A')) retval(sp.putSocketInPool(host='example.org', port=80, sock='B')) self.assertEquals('B', retval(sp.getPooledSocket(host='example.org', port=80))) retval(sp.putSocketInPool(host='example.org', port=80, sock='C')) self.assertEquals('C', retval(sp.getPooledSocket(host='example.org', port=80))) self.assertEquals('A', retval(sp.getPooledSocket(host='example.org', port=80))) self.assertEquals(None, retval(sp.getPooledSocket(host='example.org', port=80)))
def test(): sp = SocketPool(reactor=CallTrace(), limits={'totalSize': 2}) # Limits enforced on put, not async. def stillPooled(): wasStillPooled = [] while True: # do ... while (fromPool is not None) fromPool = yield sp.getPooledSocket(host='h', port=1) if fromPool: wasStillPooled.append(fromPool) if fromPool is None: break raise StopIteration(wasStillPooled) s0 = MockSok('s0') s1 = MockSok('s1') yield sp.putSocketInPool(host='h', port=1, sock=s0) yield sp.putSocketInPool(host='h', port=1, sock=s1) with stderr_replaced() as err: yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s2')) self.assertEquals('', err.getvalue(), err.getvalue()) #@@ wasStillPooled = yield stillPooled() self.assertEquals(['s2', 's0'], wasStillPooled) self.assertEquals(['shutdown', 'close'], s1.log.calledMethodNames()) shutCall, closeCall = s1.log.calledMethods self.assertEquals(((SHUT_RDWR, ), {}), (shutCall.args, shutCall.kwargs)) self.assertEquals(((), {}), (closeCall.args, closeCall.kwargs)) self.assertEquals([], s0.log.calledMethodNames()) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s0')) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s1')) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s2')) yield sp.putSocketInPool(host='h', port=1, sock=MockSok('s3')) wasStillPooled = yield stillPooled() self.assertEquals(['s3', 's0'], wasStillPooled)
def testPutThenGetThenEmpty(self): sp = SocketPool(reactor=CallTrace()) result = retval(sp.putSocketInPool(host='x', port=1, sock='mock')) self.assertEquals(None, result) self.assertEquals('mock', retval(sp.getPooledSocket(host='x', port=1))) self.assertEquals(None, retval(sp.getPooledSocket(host='x', port=1)))
def testGetOnEmptyPool(self): trace = CallTrace() sp = SocketPool(reactor=trace) self.assertEquals(None, retval(sp.getPooledSocket(host='x', port=1025))) self.assertEquals([], trace.calledMethodNames())
def test(): sA, sB, sC, s1, s2, s3 = (MockSok(x) for x in ['A', 'B', 'C', 1, 2, 3]) top = be(( Observable(), (SocketPool(reactor=reactor(), unusedTimeout=0.025), ), )) # Make sure 1st check all-sockets-ok yield sleep(seconds=(0.001)) # Initial set yield top.any.putSocketInPool(host='x', port=80, sock=sA) yield top.any.putSocketInPool(host='x', port=80, sock=sB) yield top.any.putSocketInPool(host='x', port=80, sock=sC) yield top.any.putSocketInPool(host='example.org', port=8080, sock=s1) yield top.any.putSocketInPool(host='example.org', port=8080, sock=s2) yield top.any.putSocketInPool(host='example.org', port=8080, sock=s3) self.assertEquals([], s2.log.calledMethodNames()) # sample # Pass time, no timeout - 1st check always all-sockets-ok yield sleep( seconds=(0.025 + 0.022)) # +/- 0.003 until next mostly-fatal check self.assertEquals([], s2.log.calledMethodNames()) # sample # Use some, put some back _sockC = yield top.any.getPooledSocket(host='x', port=80) _sockB = yield top.any.getPooledSocket(host='x', port=80) _sock3 = yield top.any.getPooledSocket(host='example.org', port=8080) self.assertEquals([sC, sB, s3], [_sockC, _sockB, _sock3]) self.assertEquals([], sC.log.calledMethodNames()) self.assertEquals([], sB.log.calledMethodNames()) self.assertEquals([], s3.log.calledMethodNames()) yield top.any.putSocketInPool(host='x', port=80, sock=sC) yield top.any.putSocketInPool(host='example.org', port=8080, sock=s3) yield sleep( seconds=0.015 ) # 0.025 - (0.015 - 0.003) = 0.013 until all-fatal check inPool = [] while True: result = yield top.any.getPooledSocket(host='x', port=80) if result == None: break inPool.append(result) while True: result = yield top.any.getPooledSocket(host='example.org', port=8080) if result == None: break inPool.append(result) self.assertEquals([sC, s3], inPool) self.assertEquals([], sC.log.calledMethodNames()) self.assertEquals([], s3.log.calledMethodNames()) self.assertEquals(['shutdown', 'close'], s1.log.calledMethodNames()) # sample shutdown, close = s1.log.calledMethods self.assertEquals(((SHUT_RDWR, ), {}), (shutdown.args, shutdown.kwargs)) self.assertEquals(((), {}), (close.args, close.kwargs))
def main(reactor, port, statePath, lucenePort, gatewayPort, quickCommit=False, **ignored): ######## START Lucene Integration ############################################################### defaultLuceneSettings = LuceneSettings( commitTimeout=30, readonly=True, ) http11Request = be(( HttpRequest1_1(), (SocketPool(reactor=reactor, unusedTimeout=5, limits=dict(totalSize=100, destinationSize=10)), ), )) luceneIndex = luceneAndReaderConfig( defaultLuceneSettings.clone(readonly=True), http11Request, lucenePort) luceneRoHelix = be( (AdapterToLuceneQuery(defaultCore=DEFAULT_CORE, coreConverters={ DEFAULT_CORE: QueryExpressionToLuceneQueryDict( UNQUALIFIED_TERM_FIELDS, luceneSettings=luceneIndex.settings), }), ( MultiLucene(host='localhost', port=lucenePort, defaultCore=DEFAULT_CORE), (luceneIndex, ), (http11Request, ), ))) ######## END Lucene Integration ############################################################### fieldnameRewrites = { # UNTOKENIZED_PREFIX+'genre': UNTOKENIZED_PREFIX+'dc:genre', } def fieldnameRewrite(name): return fieldnameRewrites.get(name, name) def drilldownFieldnamesTranslate(fieldname): untokenizedName = untokenizedFieldname(fieldname) if untokenizedName in untokenizedFieldnames: fieldname = untokenizedName return fieldnameRewrite(fieldname) convertToComposedQuery = ConvertToComposedQuery( resultsFrom=DEFAULT_CORE, matches=[], drilldownFieldnamesTranslate=drilldownFieldnamesTranslate) strategie = Md5HashDistributeStrategy() storage = StorageComponent(join(statePath, 'store'), strategy=strategie, partsRemovedOnDelete=[ HEADER_PARTNAME, META_PARTNAME, METADATA_PARTNAME, OAI_DC_PARTNAME, LONG_PARTNAME, SHORT_PARTNAME, OPENAIRE_PARTNAME ]) oaiJazz = OaiJazz(join(statePath, 'oai')) oaiJazz.updateMetadataFormat( OAI_DC_PARTNAME, "http://www.openarchives.org/OAI/2.0/oai_dc.xsd", "http://purl.org/dc/elements/1.1/") oai_oa_cerifJazz = OaiJazz(join(statePath, 'oai_cerif')) oai_oa_cerifJazz.updateMetadataFormat( OPENAIRE_PARTNAME, "https://www.openaire.eu/schema/cris/current/openaire-cerif-profile.xsd", "https://www.openaire.eu/cerif-profile/1.1/") # All of the following OAI-PMH sets shall be recognized by the CRIS, even if not all of them are populated. oai_oa_cerifJazz.updateSet("openaire_cris_projects", "OpenAIRE_CRIS_projects") oai_oa_cerifJazz.updateSet("openaire_cris_orgunits", "OpenAIRE_CRIS_orgunits") oai_oa_cerifJazz.updateSet("openaire_cris_persons", "OpenAIRE_CRIS_persons") oai_oa_cerifJazz.updateSet("openaire_cris_patents", "OpenAIRE_CRIS_patents") oai_oa_cerifJazz.updateSet("openaire_cris_products", "OpenAIRE_CRIS_products") oai_oa_cerifJazz.updateSet("openaire_cris_publications", "OpenAIRE_CRIS_publications") oai_oa_cerifJazz.updateSet("openaire_cris_funding", "OpenAIRE_CRIS_funding") oai_oa_cerifJazz.updateSet("openaire_cris_events", "OpenAIRE_CRIS_events") oai_oa_cerifJazz.updateSet("openaire_cris_equipments", "OpenAIRE_CRIS_equipments") cqlClauseConverters = [ RenameFieldForExact( untokenizedFields=untokenizedFieldnames, untokenizedPrefix=UNTOKENIZED_PREFIX, ).filterAndModifier(), SearchTermFilterAndModifier( shouldModifyFieldValue=lambda *args: True, fieldnameModifier=fieldnameRewrite).filterAndModifier(), ] periodicGateWayDownload = PeriodicDownload( reactor, host='localhost', port=gatewayPort, schedule=Schedule( period=1 if quickCommit else 10 ), # WST: Interval in seconds before sending a new request to the GATEWAY in case of an error while processing batch records.(default=1). IntegrationTests need 1 second! Otherwise tests will fail! name='api', autoStart=True) oaiDownload = OaiDownloadProcessor(path='/oaix', metadataPrefix=NORMALISED_DOC_NAME, workingDirectory=join( statePath, 'harvesterstate', 'gateway'), userAgentAddition='ApiServer', xWait=True, name='api', autoCommit=False) executeQueryHelix = \ (FilterMessages(allowed=['executeQuery']), (CqlMultiSearchClauseConversion(cqlClauseConverters, fromKwarg='query'), (DrilldownQueries(), (convertToComposedQuery, (luceneRoHelix,), ) ) ) ) return \ (Observable(), createDownloadHelix(reactor, periodicGateWayDownload, oaiDownload, storage, oaiJazz, oai_oa_cerifJazz), (ObservableHttpServer(reactor, port, compressResponse=True), (BasicHttpHandler(), (PathFilter(["/oai"]), (OaiPmh(repositoryName="NARCIS OAI-pmh", adminEmail="*****@*****.**", externalUrl="http://oai.narcis.nl"), (oaiJazz,), (StorageAdapter(), (storage,) ), (OaiBranding( url="http://www.narcis.nl/images/logos/logo-knaw-house.gif", link="http://oai.narcis.nl", title="Narcis - The gateway to scholarly information in The Netherlands"), ), (OaiProvenance( nsMap=NAMESPACEMAP, baseURL=('meta', '//meta:repository/meta:baseurl/text()'), harvestDate=('meta', '//meta:record/meta:harvestdate/text()'), metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'), identifier=('header','//oai:identifier/text()'), datestamp=('header', '//oai:datestamp/text()') ), (storage,) ) ) ), (PathFilter(["/cerif"]), (OaiPmhDans(repositoryName="OpenAIRE CERIF", adminEmail="*****@*****.**", repositoryIdentifier="services.nod.dans.knaw.nl", externalUrl="http://services.nod.dans.knaw.nl"), #TODO: pathFilter should resemble proxy path (oai_oa_cerifJazz,), (StorageAdapter(), (storage,) ), (OaiOpenAIREDescription( serviceid='organisation:ORG1242054', acronym='services.nod.dans.knaw.nl', name='NARCIS', description='Compliant with the OpenAIRE Guidelines for CRIS Managers v.1.1.', website='https://www.narcis.nl', baseurl='http://services.nod.dans.knaw.nl/oa-cerif', subjectheading='', orgunitid='organisation:ORG1242054', owneracronym='DANS'), ), # (OaiBranding( # url="http://www.narcis.nl/images/logos/logo-knaw-house.gif", # link="http://oai.narcis.nl", # title="Narcis - The gateway to scholarly information in The Netherlands"), # ), (OaiProvenance( nsMap=NAMESPACEMAP, baseURL=('meta', '//meta:repository/meta:baseurl/text()'), harvestDate=('meta', '//meta:record/meta:harvestdate/text()'), metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'), identifier=('header','//oai:identifier/text()'), datestamp=('header', '//oai:datestamp/text()') ), (storage,) ) ) ), (PathFilter(['/sru']), (SruParser( host='sru.narcis.nl', port=80, defaultRecordSchema='knaw_short', defaultRecordPacking='xml'), (SruLimitStartRecord(limitBeyond=4000), (SruHandler( includeQueryTimes=False, extraXParameters=[], enableCollectLog=False), (SruTermDrilldown(),), executeQueryHelix, (StorageAdapter(), (storage,) ) ) ) ) ), (PathFilter('/rss'), (Rss( supportedLanguages = ['nl','en'], # defaults to first, if requested language is not available or supplied. title = {'nl':'NARCIS', 'en':'NARCIS'}, description = {'nl':'NARCIS: De toegang tot de Nederlandse wetenschapsinformatie', 'en':'NARCIS: The gateway to Dutch scientific information'}, link = {'nl':'http://www.narcis.nl/?Language=nl', 'en':'http://www.narcis.nl/?Language=en'}, maximumRecords = 20), executeQueryHelix, (RssItem( nsMap=NAMESPACEMAP, title = ('knaw_short', {'nl':'//short:metadata/short:titleInfo[not (@xml:lang)]/short:title/text()', 'en':'//short:metadata/short:titleInfo[@xml:lang="en"]/short:title/text()'}), description = ('knaw_short', {'nl':'//short:abstract[not (@xml:lang)]/text()', 'en':'//short:abstract[@xml:lang="en"]/text()'}), pubdate = ('knaw_short', '//short:dateIssued/short:parsed/text()'), linkTemplate = 'http://www.narcis.nl/%(wcpcollection)s/RecordID/%(oai_identifier)s/Language/%(language)s', wcpcollection = ('meta', '//*[local-name() = "collection"]/text()'), oai_identifier = ('meta', '//meta:record/meta:id/text()'), language = ('Dummy: Language is auto provided by the calling RSS component, but needs to be present to serve the linkTemplate.') ), (StorageAdapter(), (storage,) ) ) ) ) ) ) )
def writerMain(writerReactor, statePath, luceneserverPort, gatewayPort, quickCommit=False): http11Request = be( (HttpRequest1_1(), (SocketPool(reactor=writerReactor, unusedTimeout=5, limits=dict(totalSize=100, destinationSize=10)),), ) ) indexCommitTimeout = 30 defaultLuceneSettings = LuceneSettings( commitTimeout=indexCommitTimeout, readonly=False, ) luceneWriter = luceneAndReaderConfig(defaultLuceneSettings, http11Request, luceneserverPort) periodicDownload = PeriodicDownload( writerReactor, host='localhost', port=gatewayPort, schedule=Schedule(period=1 if quickCommit else 10), # WST: Interval in seconds before sending a new request to the GATEWAY in case of an error while processing batch records.(default=1). IntegrationTests need 1 second! Otherwise tests will fail! name='index', autoStart=True) oaiDownload = OaiDownloadProcessor( path='/oaix', metadataPrefix=NORMALISED_DOC_NAME, workingDirectory=join(statePath, 'harvesterstate', 'gateway'), userAgentAddition='idx-server', xWait=True, name='index', autoCommit=False) # Post commit naar Lucene(server): scheduledCommitPeriodicCall = be( (PeriodicCall(writerReactor, message='commit', name='Scheduled commit', schedule=Schedule(period=1 if quickCommit else 300), initialSchedule=Schedule(period=1)), # WST: Flushes data from memory to disk. IntegrationTests need 1 second! Otherwise tests will fail! (API). (AllToDo(), # broadcast message to all components, despite of what kind of message... # (periodicDownload,), # WST: periodicDownload does not do anything with a 'commit' message? So why send it to it??? (LuceneCommit(host='localhost', port=luceneserverPort,), # 'commit' message results in http post to /commit/ to Lucene server: # (LogComponent("PERIODIC"),#), # [PERIODIC] httprequest1_1(*(), **{'body': None, 'host': 'localhost', 'request': '/commit/', 'port': 52501, 'method': 'POST'}) (http11Request,), # ), ) ) ) ) writerServer = \ (Observable(), (scheduledCommitPeriodicCall,), # Stuur periodiek een 'Commit' naar de LuceneServer... # (DebugPrompt(reactor=writerReactor, port=readerPort-1, globals=locals()),), (periodicDownload, # Ga/connect (periodiek) naar de Gateway-server... (XmlParseLxml(fromKwarg="data", toKwarg="lxmlNode", parseOptions=dict(huge_tree=True, remove_blank_text=True)), (oaiDownload, # Haal OAI spulletjes van de Gateway... (UpdateAdapterFromOaiDownloadProcessor(), # Maakt van een SRU update/delete bericht (lxmlNode) een relevante message: 'delete' of 'add' message. # (LogComponent("SRU harvest van GATEWAY"),), #[SRU harvest van GATEWAY] add(*(), **{'partname': 'record', 'identifier': 'meresco:record:1', 'lxmlNode': '_ElementTree(<record xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><header><identifier>meresco:record:1</identifier><datestamp>2016-07-13T15:31:10Z</datestamp></header><metadata><document xmlns="http://meres (FilterMessages(allowed=['add']), (XmlXPath(['/oai:record/oai:metadata/document:document'], fromKwarg='lxmlNode'), # (LogComponent("NormdocToFieldsList"),), (NormdocToFieldsList(), # Platte lijst met veldnamen en waardes... (RecordPidToAuthNid(),), # (LogComponent("NormdocToFieldsList"),), # [DcToFieldsList] add(*(), **{'fieldslist': [('dc:identifier', 'http://meresco.com?record=1'), ('dc:description', 'This is an example program about Search with Meresco'), ('dc:title', 'Example Program 1'), ('dc:creator', 'Seecr'), ('dc:publisher', 'Seecr'), ('dc:date', '2016'), ('dc:type', 'Example'), ('dc:subject', 'Search'), ('dc:language', 'en'), ('dc:rights', 'Open Source')], 'partname': 'record', 'identifier': 'meresco:record:1'}) # [NormdocToFieldsList] lookupNameIds(*(set(['doi:10.1002/lno.10611', 'wos:000423029300003']),), **{}) (FieldsListToLuceneDocument( # Maakt addDocument messege + creeert de facet/drilldown velden waarvan de value's tot max. 256 chars getruncated worden. fieldRegistry=luceneWriter.settings.fieldRegistry, # o.a. drilldownfields definitie untokenizedFieldnames=untokenizedFieldnames, # untokenized fields indexFieldFactory=DcFields, # Creeert een "__all__", veldnaam en optioneel "untokenized.veldnaam"... #rewriteIdentifier=(lambda idee: idee.split(':', 1)[-1]) # meresco:record:1' => 'record:1' ), # (LogComponent("FieldsListToLuceneDocument"),), # [LUCENE_WRITER] addDocument(*(), **{'fields': [{'type': 'TextField', 'name': '__all__', 'value': 'http://meresco.com?record=1'}, {'type': 'TextField', 'name': 'dc:identifier', 'value': 'http://meresco.com?record=1'}, {'type': 'StringField', 'name': 'untokenized.dc:identifier', 'value': 'http://meresco.com?record=1'}, {'type': 'TextField', 'name': '__all__', 'value': 'This is an example program about Search with Meresco'}, {'type': 'TextField', 'name': 'dc:description', 'value': 'This is an example program about Search with Meresco'}, {'type': 'TextField', 'name': '__all__', 'value': 'Example Program 1'}, {'type': 'TextField', 'name': 'dc:title', 'value': 'Example Program 1'}, {'type': 'TextField', 'name': '__all__', 'value': 'Seecr'}, {'type': 'TextField', 'name': 'dc:creator', 'value': 'Seecr'}, {'type': 'TextField', 'name': '__all__', 'value': 'Seecr'}, {'type': 'TextField', 'name': 'dc:publisher', 'value': 'Seecr'}, {'type': 'TextField', 'name': '__all__', 'value': '2016'}, {'type': 'TextField', 'name': 'dc:date', 'value': '2016'}, {'path': ['2016'], 'type': 'FacetField', 'name': 'untokenized.dc:date'}, {'type': 'TextField', 'name': '__all__', 'value': 'Example'}, {'type': 'TextField', 'name': 'dc:type', 'value': 'Example'}, {'type': 'TextField', 'name': '__all__', 'value': 'Search'}, {'type': 'TextField', 'name': 'dc:subject', 'value': 'Search'}, {'path': ['Search'], 'type': 'FacetField', 'name': 'untokenized.dc:subject'}, {'type': 'TextField', 'name': '__all__', 'value': 'en'}, {'type': 'TextField', 'name': 'dc:language', 'value': 'en'}, {'type': 'TextField', 'name': '__all__', 'value': 'Open Source'}, {'type': 'TextField', 'name': 'dc:rights', 'value': 'Open Source'}], 'identifier': 'meresco:record:1'}) # [####LUCENE_WRITER] addDocument(*(), **{'fields': [{'type': 'TextField', 'name': '__all__', 'value': 'knaw'}, {'type': 'TextField', 'name': 'meta:id', 'value': 'knaw'}, {'type': 'TextField', 'name': '__all__', 'value': 'olddata'}, {'type': 'TextField', 'name': 'meta:set', 'value': 'olddata'}, {'type': 'TextField', 'name': '__all__', 'value': 'http://oai.knaw.nl/oai'}, {'type': 'TextField', 'name': 'meta:baseurl', 'value': 'http://oai.knaw.nl/oai'}, {'type': 'TextField', 'name': '__all__', 'value': 'knaw'}, {'type': 'TextField', 'name': 'meta:repositoryGroupId', 'value': 'knaw'}, {'type': 'TextField', 'name': '__all__', 'value': 'nl_didl'}, {'type': 'TextField', 'name': 'meta:metadataPrefix', 'value': 'nl_didl'}, {'type': 'TextField', 'name': '__all__', 'value': 'publication'}, {'type': 'TextField', 'name': 'meta_collection', 'value': 'publication'}, {'path': ['publication'], 'type': 'FacetField', 'name': 'untokenized.meta_collection'}], 'identifier': 'knaw:record:3'}) (luceneWriter,), # ), ) ) # ) # ) ) ), (FilterMessages(allowed=['delete']), (luceneWriter,), ) ) ) ) ) ) return writerServer
def testLimitsMustBeKnown(self): self.assertRaises( TypeError, lambda: SocketPool(reactor='x', limits={'unknown': 'limit'}))
def main(reactor, port, statePath, lucenePort, **ignored): ######## START Lucene Integration ############################################################### defaultLuceneSettings = LuceneSettings( commitTimeout=30, readonly=True,) http11Request = be( (HttpRequest1_1(), (SocketPool(reactor=reactor, unusedTimeout=5, limits=dict(totalSize=100, destinationSize=10)),), ) ) luceneIndex = luceneAndReaderConfig(defaultLuceneSettings.clone(readonly=True), http11Request, lucenePort) luceneRoHelix = be( (AdapterToLuceneQuery( defaultCore=DEFAULT_CORE, coreConverters={ DEFAULT_CORE: QueryExpressionToLuceneQueryDict(UNQUALIFIED_TERM_FIELDS, luceneSettings=luceneIndex.settings), } ), (MultiLucene(host='127.0.0.1', port=lucenePort, defaultCore=DEFAULT_CORE), (luceneIndex,), (http11Request,), ) ) ) ######## END Lucene Integration ############################################################### fieldnameRewrites = {} def fieldnameRewrite(name): return fieldnameRewrites.get(name, name) def drilldownFieldnamesTranslate(fieldname): untokenizedName = untokenizedFieldname(fieldname) if untokenizedName in untokenizedFieldnames: fieldname = untokenizedName return fieldnameRewrite(fieldname) convertToComposedQuery = ConvertToComposedQuery( resultsFrom=DEFAULT_CORE, matches=[], drilldownFieldnamesTranslate=drilldownFieldnamesTranslate ) strategie = Md5HashDistributeStrategy() storage = StorageComponent(join(statePath, 'store'), strategy=strategie, partsRemovedOnDelete=[HEADER_PARTNAME, META_PARTNAME, METADATA_PARTNAME, OAI_DC_PARTNAME, LONG_PARTNAME, SHORT_PARTNAME]) # Wat doet dit? cqlClauseConverters = [ RenameFieldForExact( untokenizedFields=untokenizedFieldnames, untokenizedPrefix=UNTOKENIZED_PREFIX, ).filterAndModifier(), SearchTermFilterAndModifier( shouldModifyFieldValue=lambda *args: True, fieldnameModifier=fieldnameRewrite ).filterAndModifier(), ] executeQueryHelix = \ (FilterMessages(allowed=['executeQuery']), (CqlMultiSearchClauseConversion(cqlClauseConverters, fromKwarg='query'), (DrilldownQueries(), (convertToComposedQuery, (luceneRoHelix,), ) ) ) ) return \ (Observable(), (ObservableHttpServer(reactor, port, compressResponse=True), (BasicHttpHandler(), (PathFilter(['/sru']), (SruParser( host='sru.narcis.nl', port=80, defaultRecordSchema='knaw_short', defaultRecordPacking='xml'), (SruLimitStartRecord(limitBeyond=4000), (SruHandler( includeQueryTimes=False, extraXParameters=[], enableCollectLog=False), #2017-03-24T12:00:33Z 127.0.0.1 3.5K 0.019s - /sru OF (TRUE): 2017-03-24T11:58:53Z 127.0.0.1 2.3K 0.004s 1hits /sru maximumRecords=10&operation=searchRetrieve&query=untokenized.dd_year+exact+%221993%22&recordPacking=xml&recordSchema=knaw_short&startRecord=1&version=1.2 (SruTermDrilldown(),), executeQueryHelix, (StorageAdapter(), (storage,) ) ) ) ) ), (PathFilter('/rss'), (Rss( supportedLanguages = ['nl','en'], # defaults to first, if requested language is not available or supplied. title = {'nl':'NARCIS', 'en':'NARCIS'}, description = {'nl':'NARCIS: De toegang tot de Nederlandse wetenschapsinformatie', 'en':'NARCIS: The gateway to Dutch scientific information'}, link = {'nl':'http://www.narcis.nl/?Language=nl', 'en':'http://www.narcis.nl/?Language=en'}, maximumRecords = 20), executeQueryHelix, (RssItem( nsMap=NAMESPACEMAP, title = ('knaw_short', {'nl':'//short:metadata/short:titleInfo[not (@xml:lang)]/short:title/text()', 'en':'//short:metadata/short:titleInfo[@xml:lang="en"]/short:title/text()'}), description = ('knaw_short', {'nl':'//short:abstract[not (@xml:lang)]/text()', 'en':'//short:abstract[@xml:lang="en"]/text()'}), pubdate = ('knaw_short', '//short:dateIssued/short:parsed/text()'), linkTemplate = 'http://www.narcis.nl/%(wcpcollection)s/RecordID/%(oai_identifier)s/Language/%(language)s', wcpcollection = ('meta', '//*[local-name() = "collection"]/text()'), oai_identifier = ('meta', '//meta:record/meta:id/text()'), language = ('Dummy: Language is auto provided by the calling RSS component, but needs to be present to serve the linkTemplate.') ), (StorageAdapter(), (storage,) ) ) ) ) ) ) )