def testLogAsObservable(self): log = CallTrace('log', onlySpecifiedMethods=True, methods={'log': lambda **kwargs: None}) writer = QueryLogWriter() writer.addObserver(log) writer.writeLog(defaultCollectedLogWithPath('/sru')) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals(['/sru'], [m.kwargs['path'] for m in log.calledMethods])
def testLogForNumberOfRecordsSelection(self): log = CallTrace('log') writer = QueryLogWriter(log=log, numberOfRecordsSelection=dict(scope='myscope', key='total')) collectedLog = defaultCollectedLog() collectedLog['myscope'] = {'total': [100]} writer.writeLog(collectedLog) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals([100], [m.kwargs['numberOfRecords'] for m in log.calledMethods])
def testLogAllPaths(self): log = CallTrace('log') writer = QueryLogWriter(log=log) writer.writeLog(defaultCollectedLogWithPath('/sru')) writer.writeLog(defaultCollectedLogWithPath('/srv')) writer.writeLog(defaultCollectedLogWithPath('/srw.php')) self.assertEquals(['log','log', 'log'], log.calledMethodNames()) self.assertEquals(['/sru', '/srv', '/srw.php'], [m.kwargs['path'] for m in log.calledMethods])
def testLogForNumberOfRecordsSelection(self): log = CallTrace('log') writer = QueryLogWriter(log=log, numberOfRecordsSelection=dict(scope='myscope', key='total')) collectedLog = defaultCollectedLog() collectedLog['myscope'] = {'total': [100]} writer.writeLog(collectedLog) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals( [100], [m.kwargs['numberOfRecords'] for m in log.calledMethods])
def testAdditionalArguments(self): log = CallTrace('log') writer = QueryLogWriter(log=log) observer = CallTrace('additional', returnValues={'determineQueryArguments': dict(key='value')}) writer.addObserver(observer) writer.writeLog(defaultCollectedLog()) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals(['key=value'], [m.kwargs['queryArguments'] for m in log.calledMethods]) self.assertEquals(['determineQueryArguments'], observer.calledMethodNames()) self.assertEquals(dict( collectedLog=defaultCollectedLog(), scopeNames=(), currentArgs={'version': '1.2'}, ), observer.calledMethods[0].kwargs)
def testLogForArgumentsInsteadOfSruArguments(self): log = CallTrace('log') writer = QueryLogWriter.forHttpArguments(log=log) collectedLog = defaultCollectedLog() collectedLog['httpRequest']['arguments'] = [{'verb':'ListRecords', 'metadataPrefix':'rdf'}] writer.writeLog(collectedLog) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals(['metadataPrefix=rdf&verb=ListRecords'], [m.kwargs['queryArguments'] for m in log.calledMethods])
def testLog(self): log = CallTrace('log') writer = QueryLogWriter(log=log) collectedLog = defaultCollectedLog() collectedLog['httpResponse']['size'] = [4096] collectedLog['httpResponse']['httpStatus'] = ['200'] writer.writeLog(collectedLog) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals( dict(timestamp=1257161136.0, path='/sru', ipAddress='11.22.33.44', size=4.0, duration=3.0, queryArguments='version=1.2', numberOfRecords=32, status='200'), log.calledMethods[0].kwargs)
def setUp(self): SeecrTestCase.setUp(self) self.queryLogWriter = QueryLogWriter(DirectoryLog(self.tempdir)) self.handleRequestLog = HandleRequestLog() self._timeNow = 1257161136.0 # 2009-11-02 11:30:00 def time(): self._timeNow += 1.0 return self._timeNow self.handleRequestLog._time = time
def testLog(self): log = CallTrace('log') writer = QueryLogWriter(log=log) collectedLog = defaultCollectedLog() collectedLog['httpResponse']['size'] = [4096] collectedLog['httpResponse']['httpStatus'] = ['200'] writer.writeLog(collectedLog) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals(dict( timestamp=1257161136.0, path='/sru', ipAddress='11.22.33.44', size=4.0, duration=3.0, queryArguments='version=1.2', numberOfRecords=32, status='200' ), log.calledMethods[0].kwargs)
def testLogForArgumentsInsteadOfSruArguments(self): log = CallTrace('log') writer = QueryLogWriter.forHttpArguments(log=log) collectedLog = defaultCollectedLog() collectedLog['httpRequest']['arguments'] = [{ 'verb': 'ListRecords', 'metadataPrefix': 'rdf' }] writer.writeLog(collectedLog) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals( ['metadataPrefix=rdf&verb=ListRecords'], [m.kwargs['queryArguments'] for m in log.calledMethods])
def testLogAllPaths(self): log = CallTrace('log') writer = QueryLogWriter(log=log) writer.writeLog(defaultCollectedLogWithPath('/sru')) writer.writeLog(defaultCollectedLogWithPath('/srv')) writer.writeLog(defaultCollectedLogWithPath('/srw.php')) self.assertEquals(['log', 'log', 'log'], log.calledMethodNames()) self.assertEquals(['/sru', '/srv', '/srw.php'], [m.kwargs['path'] for m in log.calledMethods])
def testLogLiveExample(self): collectedLog = { 'httpRequest': { 'timestamp': [1396596372.708574], 'Headers': [{}], 'Client': [('127.0.0.1', 57075)], 'arguments': [{ 'query': ['meta.upload.id exact "NICL:oai:mdms.kenict.org:oai:nicl.nl:k163645"'], 'operation': ['searchRetrieve'], 'version': ['1.2'], 'recordPacking': ['xml'], 'recordSchema': ['smbAggregatedData'] }], 'RequestURI': ['/edurep/sruns?query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&operation=searchRetrieve&version=1.2&recordPacking=xml&recordSchema=smbAggregatedData'], 'query': ['query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&operation=searchRetrieve&version=1.2&recordPacking=xml&recordSchema=smbAggregatedData'], 'path': ['/edurep/sruns'], 'Method': ['GET'], 'HTTPVersion': ['1.0'] }, 'query-scope': { 'sub-scope': { 'cqlClauses': [2], 'sru': { 'indexTime': [Decimal('0.000')], 'handlingTime': [Decimal('0.004')], 'numberOfRecords': [1], 'queryTime': [Decimal('0.003')], 'arguments': [{ 'recordSchema': 'smbAggregatedData', 'version': '1.2', 'recordPacking': 'xml', 'maximumRecords': 10, 'startRecord': 1, 'query': 'meta.upload.id exact "NICL:oai:mdms.kenict.org:oai:nicl.nl:k163645"', 'operation': 'searchRetrieve' }] } } }, 'httpResponse': { 'duration': [0.004216909408569336], 'httpStatus': ['200'], 'size': [1889] } } log = CallTrace('log') writer = QueryLogWriter(log=log, scopeNames=('query-scope', 'sub-scope')) log2 = CallTrace('log') writer2 = QueryLogWriter(log=log2, scopeNames=('query-scope', 'other-scope')) writer.writeLog(collectedLog) writer2.writeLog(collectedLog) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals([], log2.calledMethodNames()) self.assertEquals(['maximumRecords=10&operation=searchRetrieve&query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&recordPacking=xml&recordSchema=smbAggregatedData&startRecord=1&version=1.2'], [m.kwargs['queryArguments'] for m in log.calledMethods])
def testLoggedPathsNewStyle(self): log = CallTrace('log') def handleRequest(**kwargs): yield okPlainText yield 'result' index = CallTrace('index', methods={'handleRequest': handleRequest}) observable = be((Observable(), (LogCollector(), (QueryLogWriter(log=log, scopeNames=('global', 'yesPath')), ), (LogCollectorScope('global'), (HandleRequestLog(), (PathFilter('/yes'), ( LogCollectorScope('yesPath'), (index, ), )), ( PathFilter('/no'), (index, ), )))))) result = asString( observable.all.handleRequest(Client=('11.22.33.44', 1234), path='/yes')) self.assertEquals(okPlainText + 'result', result) result = asString( observable.all.handleRequest(Client=('22.33.44.55', 2345), path='/no')) self.assertEquals(okPlainText + 'result', result) result = asString( observable.all.handleRequest(Client=('33.44.55.66', 3456), path='/yes')) self.assertEquals(okPlainText + 'result', result) self.assertEquals(['log', 'log'], log.calledMethodNames()) self.assertEquals(['/yes', '/yes'], [m.kwargs['path'] for m in log.calledMethods])
def testAdditionalArguments(self): log = CallTrace('log') writer = QueryLogWriter(log=log) observer = CallTrace( 'additional', returnValues={'determineQueryArguments': dict(key='value')}) writer.addObserver(observer) writer.writeLog(defaultCollectedLog()) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals( ['key=value'], [m.kwargs['queryArguments'] for m in log.calledMethods]) self.assertEquals(['determineQueryArguments'], observer.calledMethodNames()) self.assertEquals( dict( collectedLog=defaultCollectedLog(), scopeNames=(), currentArgs={'version': '1.2'}, ), observer.calledMethods[0].kwargs)
def main(reactor, port, statePath, indexPort, gatewayPort, **ignored): apacheLogStream = sys.stdout periodicDownload = PeriodicDownload( reactor, host='localhost', port=gatewayPort, name='gateway') oaiDownload = OaiDownloadProcessor( path='/oai', metadataPrefix='oai_dc', workingDirectory=join(statePath, 'harvesterstate', 'gateway'), xWait=True, name='gateway', autoCommit=False) def sortFieldRename(name): if not name.startswith('__'): name = SORTED_PREFIX + name return name fieldnameRewrites = { } def fieldnameRewrite(name): return fieldnameRewrites.get(name, name) def drilldownFieldnamesTranslate(fieldname): untokenizedName = untokenizedFieldname(fieldname) if untokenizedName in untokenizedFieldnames: fieldname = untokenizedName return fieldnameRewrite(fieldname) convertToComposedQuery = ConvertToComposedQuery( resultsFrom=DEFAULT_CORE, matches=[], drilldownFieldnamesTranslate=drilldownFieldnamesTranslate ) luceneRemote = LuceneRemote(host='localhost', port=indexPort, path='/lucene') storage = StorageComponent(join(statePath, 'store')) oaiJazz = OaiJazz(join(statePath, 'oai')) oaiJazz.updateMetadataFormat('oai_dc', None, None) cqlClauseConverters = [ RenameFieldForExact( untokenizedFields=untokenizedFieldnames, untokenizedPrefix=UNTOKENIZED_PREFIX, ).filterAndModifier(), SearchTermFilterAndModifier( shouldModifyFieldValue=lambda *args: True, fieldnameModifier=fieldnameRewrite ).filterAndModifier(), ] scheduledCommitPeriodicCall = be( (PeriodicCall(reactor, message='commit', name='Scheduled commit', initialSchedule=Schedule(period=1), schedule=Schedule(period=1)), (AllToDo(), (storage,), (periodicDownload,), ) ) ) directoryLog = DirectoryLog(join(statePath, 'log'), extension='-query.log') executeQueryHelix = \ (FilterMessages(allowed=['executeQuery']), (CqlMultiSearchClauseConversion(cqlClauseConverters, fromKwarg='query'), (DrilldownQueries(), (convertToComposedQuery, (luceneRemote,), ) ) ), ) return \ (Observable(), (scheduledCommitPeriodicCall,), (DebugPrompt(reactor=reactor, port=port+1, globals=locals()),), createDownloadHelix(reactor, periodicDownload, oaiDownload, storage, oaiJazz), (ObservableHttpServer(reactor, port, compressResponse=True), (LogCollector(), (ApacheLogWriter(apacheLogStream),), (QueryLogWriter.forHttpArguments( log=directoryLog, scopeNames=('http-scope',) ), ), (QueryLogWriter(log=directoryLog, scopeNames=('sru-scope',)),), (Deproxy(), (HandleRequestLog(), (BasicHttpHandler(), (PathFilter(["/oai"]), (LogCollectorScope("http-scope"), (OaiPmh(repositoryName="Example OAI", adminEmail="*****@*****.**"), (oaiJazz,), (StorageAdapter(), (storage,) ), ) ) ), (PathFilter(['/sru']), (LogCollectorScope('sru-scope'), (SruParser( host='example.org', port=80, defaultRecordSchema=DEFAULT_CORE, defaultRecordPacking='xml'), (SruLimitStartRecord(limitBeyond=1000), (SruHandler( includeQueryTimes=True, extraXParameters=[], enableCollectLog=True), (SruTermDrilldown(),), executeQueryHelix, (StorageAdapter(), (storage,) ) ) ) ) ) ), (PathFilter('/rss'), (Rss( title = 'Meresco', description = 'RSS feed for Meresco', link = 'http://meresco.org', maximumRecords = 15), executeQueryHelix, (RssItem( nsMap={ 'dc': "http://purl.org/dc/elements/1.1/", 'oai_dc': "http://www.openarchives.org/OAI/2.0/oai_dc/" }, title = ('oai_dc', '/oai_dc:dc/dc:title/text()'), description = ('oai_dc', '/oai_dc:dc/dc:description/text()'), linkTemplate = 'http://localhost/sru?operation=searchRetrieve&version=1.2&query=dc:identifier%%3D%(identifier)s', identifier = ('oai_dc', '/oai_dc:dc/dc:identifier/text()')), (StorageAdapter(), (storage,) ) ), ) ), (PathFilter('/log'), (LogFileServer(name="Example Queries", log=directoryLog, basepath='/log'),) ), ), ) ) ) ), )
def testLogLiveExample(self): collectedLog = { 'httpRequest': { 'timestamp': [1396596372.708574], 'Headers': [{}], 'Client': [('127.0.0.1', 57075)], 'arguments': [{ 'query': [ 'meta.upload.id exact "NICL:oai:mdms.kenict.org:oai:nicl.nl:k163645"' ], 'operation': ['searchRetrieve'], 'version': ['1.2'], 'recordPacking': ['xml'], 'recordSchema': ['smbAggregatedData'] }], 'RequestURI': [ '/edurep/sruns?query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&operation=searchRetrieve&version=1.2&recordPacking=xml&recordSchema=smbAggregatedData' ], 'query': [ 'query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&operation=searchRetrieve&version=1.2&recordPacking=xml&recordSchema=smbAggregatedData' ], 'path': ['/edurep/sruns'], 'Method': ['GET'], 'HTTPVersion': ['1.0'] }, 'query-scope': { 'sub-scope': { 'cqlClauses': [2], 'sru': { 'indexTime': [Decimal('0.000')], 'handlingTime': [Decimal('0.004')], 'numberOfRecords': [1], 'queryTime': [Decimal('0.003')], 'arguments': [{ 'recordSchema': 'smbAggregatedData', 'version': '1.2', 'recordPacking': 'xml', 'maximumRecords': 10, 'startRecord': 1, 'query': 'meta.upload.id exact "NICL:oai:mdms.kenict.org:oai:nicl.nl:k163645"', 'operation': 'searchRetrieve' }] } } }, 'httpResponse': { 'duration': [0.004216909408569336], 'httpStatus': ['200'], 'size': [1889] } } log = CallTrace('log') writer = QueryLogWriter(log=log, scopeNames=('query-scope', 'sub-scope')) log2 = CallTrace('log') writer2 = QueryLogWriter(log=log2, scopeNames=('query-scope', 'other-scope')) writer.writeLog(collectedLog) writer2.writeLog(collectedLog) self.assertEquals(['log'], log.calledMethodNames()) self.assertEquals([], log2.calledMethodNames()) self.assertEquals([ 'maximumRecords=10&operation=searchRetrieve&query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&recordPacking=xml&recordSchema=smbAggregatedData&startRecord=1&version=1.2' ], [m.kwargs['queryArguments'] for m in log.calledMethods])