def testLogAsObservable(self):
     log = CallTrace('log', onlySpecifiedMethods=True, methods={'log': lambda **kwargs: None})
     writer = QueryLogWriter()
     writer.addObserver(log)
     writer.writeLog(defaultCollectedLogWithPath('/sru'))
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals(['/sru'], [m.kwargs['path'] for m in log.calledMethods])
 def testLogForNumberOfRecordsSelection(self):
     log = CallTrace('log')
     writer = QueryLogWriter(log=log, numberOfRecordsSelection=dict(scope='myscope', key='total'))
     collectedLog = defaultCollectedLog()
     collectedLog['myscope'] = {'total': [100]}
     writer.writeLog(collectedLog)
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals([100], [m.kwargs['numberOfRecords'] for m in log.calledMethods])
 def testLogAllPaths(self):
     log = CallTrace('log')
     writer = QueryLogWriter(log=log)
     writer.writeLog(defaultCollectedLogWithPath('/sru'))
     writer.writeLog(defaultCollectedLogWithPath('/srv'))
     writer.writeLog(defaultCollectedLogWithPath('/srw.php'))
     self.assertEquals(['log','log', 'log'], log.calledMethodNames())
     self.assertEquals(['/sru', '/srv', '/srw.php'], [m.kwargs['path'] for m in log.calledMethods])
Exemple #4
0
 def testLogForNumberOfRecordsSelection(self):
     log = CallTrace('log')
     writer = QueryLogWriter(log=log,
                             numberOfRecordsSelection=dict(scope='myscope',
                                                           key='total'))
     collectedLog = defaultCollectedLog()
     collectedLog['myscope'] = {'total': [100]}
     writer.writeLog(collectedLog)
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals(
         [100], [m.kwargs['numberOfRecords'] for m in log.calledMethods])
 def testAdditionalArguments(self):
     log = CallTrace('log')
     writer = QueryLogWriter(log=log)
     observer = CallTrace('additional', returnValues={'determineQueryArguments': dict(key='value')})
     writer.addObserver(observer)
     writer.writeLog(defaultCollectedLog())
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals(['key=value'], [m.kwargs['queryArguments'] for m in log.calledMethods])
     self.assertEquals(['determineQueryArguments'], observer.calledMethodNames())
     self.assertEquals(dict(
             collectedLog=defaultCollectedLog(),
             scopeNames=(),
             currentArgs={'version': '1.2'},
         ), observer.calledMethods[0].kwargs)
 def testLogForArgumentsInsteadOfSruArguments(self):
     log = CallTrace('log')
     writer = QueryLogWriter.forHttpArguments(log=log)
     collectedLog = defaultCollectedLog()
     collectedLog['httpRequest']['arguments'] = [{'verb':'ListRecords', 'metadataPrefix':'rdf'}]
     writer.writeLog(collectedLog)
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals(['metadataPrefix=rdf&verb=ListRecords'], [m.kwargs['queryArguments'] for m in log.calledMethods])
Exemple #7
0
 def testLog(self):
     log = CallTrace('log')
     writer = QueryLogWriter(log=log)
     collectedLog = defaultCollectedLog()
     collectedLog['httpResponse']['size'] = [4096]
     collectedLog['httpResponse']['httpStatus'] = ['200']
     writer.writeLog(collectedLog)
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals(
         dict(timestamp=1257161136.0,
              path='/sru',
              ipAddress='11.22.33.44',
              size=4.0,
              duration=3.0,
              queryArguments='version=1.2',
              numberOfRecords=32,
              status='200'), log.calledMethods[0].kwargs)
 def setUp(self):
     SeecrTestCase.setUp(self)
     self.queryLogWriter = QueryLogWriter(DirectoryLog(self.tempdir))
     self.handleRequestLog = HandleRequestLog()
     self._timeNow = 1257161136.0 # 2009-11-02 11:30:00
     def time():
         self._timeNow += 1.0
         return self._timeNow
     self.handleRequestLog._time = time
 def testLog(self):
     log = CallTrace('log')
     writer = QueryLogWriter(log=log)
     collectedLog = defaultCollectedLog()
     collectedLog['httpResponse']['size'] = [4096]
     collectedLog['httpResponse']['httpStatus'] = ['200']
     writer.writeLog(collectedLog)
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals(dict(
             timestamp=1257161136.0,
             path='/sru',
             ipAddress='11.22.33.44',
             size=4.0,
             duration=3.0,
             queryArguments='version=1.2',
             numberOfRecords=32,
             status='200'
         ), log.calledMethods[0].kwargs)
Exemple #10
0
 def testLogForArgumentsInsteadOfSruArguments(self):
     log = CallTrace('log')
     writer = QueryLogWriter.forHttpArguments(log=log)
     collectedLog = defaultCollectedLog()
     collectedLog['httpRequest']['arguments'] = [{
         'verb': 'ListRecords',
         'metadataPrefix': 'rdf'
     }]
     writer.writeLog(collectedLog)
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals(
         ['metadataPrefix=rdf&verb=ListRecords'],
         [m.kwargs['queryArguments'] for m in log.calledMethods])
Exemple #11
0
 def testLogAllPaths(self):
     log = CallTrace('log')
     writer = QueryLogWriter(log=log)
     writer.writeLog(defaultCollectedLogWithPath('/sru'))
     writer.writeLog(defaultCollectedLogWithPath('/srv'))
     writer.writeLog(defaultCollectedLogWithPath('/srw.php'))
     self.assertEquals(['log', 'log', 'log'], log.calledMethodNames())
     self.assertEquals(['/sru', '/srv', '/srw.php'],
                       [m.kwargs['path'] for m in log.calledMethods])
Exemple #12
0
 def testLogAsObservable(self):
     log = CallTrace('log',
                     onlySpecifiedMethods=True,
                     methods={'log': lambda **kwargs: None})
     writer = QueryLogWriter()
     writer.addObserver(log)
     writer.writeLog(defaultCollectedLogWithPath('/sru'))
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals(['/sru'],
                       [m.kwargs['path'] for m in log.calledMethods])
 def testLogLiveExample(self):
     collectedLog = {
         'httpRequest': {
             'timestamp': [1396596372.708574],
             'Headers': [{}],
             'Client': [('127.0.0.1', 57075)],
             'arguments': [{
                 'query': ['meta.upload.id exact "NICL:oai:mdms.kenict.org:oai:nicl.nl:k163645"'],
                 'operation': ['searchRetrieve'],
                 'version': ['1.2'],
                 'recordPacking': ['xml'],
                 'recordSchema': ['smbAggregatedData']
             }],
             'RequestURI': ['/edurep/sruns?query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&operation=searchRetrieve&version=1.2&recordPacking=xml&recordSchema=smbAggregatedData'],
             'query': ['query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&operation=searchRetrieve&version=1.2&recordPacking=xml&recordSchema=smbAggregatedData'],
             'path': ['/edurep/sruns'],
             'Method': ['GET'],
             'HTTPVersion': ['1.0']
         },
         'query-scope': {
             'sub-scope': {
                 'cqlClauses': [2],
                 'sru': {
                     'indexTime': [Decimal('0.000')],
                     'handlingTime': [Decimal('0.004')],
                     'numberOfRecords': [1],
                     'queryTime': [Decimal('0.003')],
                     'arguments': [{
                         'recordSchema': 'smbAggregatedData',
                         'version': '1.2',
                         'recordPacking': 'xml',
                         'maximumRecords': 10,
                         'startRecord': 1,
                         'query': 'meta.upload.id exact "NICL:oai:mdms.kenict.org:oai:nicl.nl:k163645"',
                         'operation': 'searchRetrieve'
                     }]
                 }
             }
         },
         'httpResponse': {
             'duration': [0.004216909408569336],
             'httpStatus': ['200'],
             'size': [1889]
         }
     }
     log = CallTrace('log')
     writer = QueryLogWriter(log=log, scopeNames=('query-scope', 'sub-scope'))
     log2 = CallTrace('log')
     writer2 = QueryLogWriter(log=log2, scopeNames=('query-scope', 'other-scope'))
     writer.writeLog(collectedLog)
     writer2.writeLog(collectedLog)
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals([], log2.calledMethodNames())
     self.assertEquals(['maximumRecords=10&operation=searchRetrieve&query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&recordPacking=xml&recordSchema=smbAggregatedData&startRecord=1&version=1.2'], [m.kwargs['queryArguments'] for m in log.calledMethods])
Exemple #14
0
    def testLoggedPathsNewStyle(self):
        log = CallTrace('log')

        def handleRequest(**kwargs):
            yield okPlainText
            yield 'result'

        index = CallTrace('index', methods={'handleRequest': handleRequest})

        observable = be((Observable(),
                         (LogCollector(),
                          (QueryLogWriter(log=log,
                                          scopeNames=('global', 'yesPath')), ),
                          (LogCollectorScope('global'),
                           (HandleRequestLog(), (PathFilter('/yes'), (
                               LogCollectorScope('yesPath'),
                               (index, ),
                           )), (
                               PathFilter('/no'),
                               (index, ),
                           ))))))
        result = asString(
            observable.all.handleRequest(Client=('11.22.33.44', 1234),
                                         path='/yes'))
        self.assertEquals(okPlainText + 'result', result)
        result = asString(
            observable.all.handleRequest(Client=('22.33.44.55', 2345),
                                         path='/no'))
        self.assertEquals(okPlainText + 'result', result)
        result = asString(
            observable.all.handleRequest(Client=('33.44.55.66', 3456),
                                         path='/yes'))
        self.assertEquals(okPlainText + 'result', result)
        self.assertEquals(['log', 'log'], log.calledMethodNames())
        self.assertEquals(['/yes', '/yes'],
                          [m.kwargs['path'] for m in log.calledMethods])
Exemple #15
0
 def testAdditionalArguments(self):
     log = CallTrace('log')
     writer = QueryLogWriter(log=log)
     observer = CallTrace(
         'additional',
         returnValues={'determineQueryArguments': dict(key='value')})
     writer.addObserver(observer)
     writer.writeLog(defaultCollectedLog())
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals(
         ['key=value'],
         [m.kwargs['queryArguments'] for m in log.calledMethods])
     self.assertEquals(['determineQueryArguments'],
                       observer.calledMethodNames())
     self.assertEquals(
         dict(
             collectedLog=defaultCollectedLog(),
             scopeNames=(),
             currentArgs={'version': '1.2'},
         ), observer.calledMethods[0].kwargs)
Exemple #16
0
def main(reactor, port, statePath, indexPort, gatewayPort, **ignored):
    apacheLogStream = sys.stdout

    periodicDownload = PeriodicDownload(
        reactor,
        host='localhost',
        port=gatewayPort,
        name='gateway')

    oaiDownload = OaiDownloadProcessor(
        path='/oai',
        metadataPrefix='oai_dc',
        workingDirectory=join(statePath, 'harvesterstate', 'gateway'),
        xWait=True,
        name='gateway',
        autoCommit=False)

    def sortFieldRename(name):
        if not name.startswith('__'):
            name = SORTED_PREFIX + name
        return name

    fieldnameRewrites = {
    }
    def fieldnameRewrite(name):
        return fieldnameRewrites.get(name, name)

    def drilldownFieldnamesTranslate(fieldname):
        untokenizedName = untokenizedFieldname(fieldname)
        if untokenizedName in untokenizedFieldnames:
            fieldname = untokenizedName
        return fieldnameRewrite(fieldname)

    convertToComposedQuery = ConvertToComposedQuery(
            resultsFrom=DEFAULT_CORE,
            matches=[],
            drilldownFieldnamesTranslate=drilldownFieldnamesTranslate
        )

    luceneRemote = LuceneRemote(host='localhost', port=indexPort, path='/lucene')

    storage = StorageComponent(join(statePath, 'store'))
    oaiJazz = OaiJazz(join(statePath, 'oai'))
    oaiJazz.updateMetadataFormat('oai_dc', None, None)

    cqlClauseConverters = [
        RenameFieldForExact(
            untokenizedFields=untokenizedFieldnames,
            untokenizedPrefix=UNTOKENIZED_PREFIX,
        ).filterAndModifier(),
        SearchTermFilterAndModifier(
            shouldModifyFieldValue=lambda *args: True,
            fieldnameModifier=fieldnameRewrite
        ).filterAndModifier(),
    ]

    scheduledCommitPeriodicCall = be(
        (PeriodicCall(reactor, message='commit', name='Scheduled commit', initialSchedule=Schedule(period=1), schedule=Schedule(period=1)),
            (AllToDo(),
                (storage,),
                (periodicDownload,),
            )
        )
    )

    directoryLog = DirectoryLog(join(statePath, 'log'), extension='-query.log')

    executeQueryHelix = \
        (FilterMessages(allowed=['executeQuery']),
            (CqlMultiSearchClauseConversion(cqlClauseConverters, fromKwarg='query'),
                (DrilldownQueries(),
                    (convertToComposedQuery,
                        (luceneRemote,),
                    )
                )
            ),
        )

    return \
    (Observable(),
        (scheduledCommitPeriodicCall,),
        (DebugPrompt(reactor=reactor, port=port+1, globals=locals()),),
        createDownloadHelix(reactor, periodicDownload, oaiDownload, storage, oaiJazz),
        (ObservableHttpServer(reactor, port, compressResponse=True),
            (LogCollector(),
                (ApacheLogWriter(apacheLogStream),),
                (QueryLogWriter.forHttpArguments(
                        log=directoryLog,
                        scopeNames=('http-scope',)
                    ),
                ),
                (QueryLogWriter(log=directoryLog, scopeNames=('sru-scope',)),),
                (Deproxy(),
                    (HandleRequestLog(),
                        (BasicHttpHandler(),
                            (PathFilter(["/oai"]),
                                (LogCollectorScope("http-scope"),
                                    (OaiPmh(repositoryName="Example OAI", adminEmail="*****@*****.**"),
                                        (oaiJazz,),
                                        (StorageAdapter(),
                                            (storage,)
                                        ),
                                    )
                                )
                            ),
                            (PathFilter(['/sru']),
                                (LogCollectorScope('sru-scope'),
                                    (SruParser(
                                            host='example.org',
                                            port=80,
                                            defaultRecordSchema=DEFAULT_CORE,
                                            defaultRecordPacking='xml'),
                                        (SruLimitStartRecord(limitBeyond=1000),
                                            (SruHandler(
                                                    includeQueryTimes=True,
                                                    extraXParameters=[],
                                                    enableCollectLog=True),
                                                (SruTermDrilldown(),),
                                                executeQueryHelix,
                                                (StorageAdapter(),
                                                    (storage,)
                                                )
                                            )
                                        )
                                    )
                                )
                            ),
                            (PathFilter('/rss'),
                                (Rss(   title = 'Meresco',
                                        description = 'RSS feed for Meresco',
                                        link = 'http://meresco.org',
                                        maximumRecords = 15),
                                    executeQueryHelix,
                                    (RssItem(
                                            nsMap={
                                                'dc': "http://purl.org/dc/elements/1.1/",
                                                'oai_dc': "http://www.openarchives.org/OAI/2.0/oai_dc/"
                                            },
                                            title = ('oai_dc', '/oai_dc:dc/dc:title/text()'),
                                            description = ('oai_dc', '/oai_dc:dc/dc:description/text()'),
                                            linkTemplate = 'http://localhost/sru?operation=searchRetrieve&version=1.2&query=dc:identifier%%3D%(identifier)s',
                                            identifier = ('oai_dc', '/oai_dc:dc/dc:identifier/text()')),
                                        (StorageAdapter(),
                                            (storage,)
                                        )
                                    ),
                                )
                            ),
                            (PathFilter('/log'),
                                (LogFileServer(name="Example Queries", log=directoryLog, basepath='/log'),)
                            ),
                        ),
                    )
                )
            )
        ),
    )
Exemple #17
0
 def testLogLiveExample(self):
     collectedLog = {
         'httpRequest': {
             'timestamp': [1396596372.708574],
             'Headers': [{}],
             'Client': [('127.0.0.1', 57075)],
             'arguments': [{
                 'query': [
                     'meta.upload.id exact "NICL:oai:mdms.kenict.org:oai:nicl.nl:k163645"'
                 ],
                 'operation': ['searchRetrieve'],
                 'version': ['1.2'],
                 'recordPacking': ['xml'],
                 'recordSchema': ['smbAggregatedData']
             }],
             'RequestURI': [
                 '/edurep/sruns?query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&operation=searchRetrieve&version=1.2&recordPacking=xml&recordSchema=smbAggregatedData'
             ],
             'query': [
                 'query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&operation=searchRetrieve&version=1.2&recordPacking=xml&recordSchema=smbAggregatedData'
             ],
             'path': ['/edurep/sruns'],
             'Method': ['GET'],
             'HTTPVersion': ['1.0']
         },
         'query-scope': {
             'sub-scope': {
                 'cqlClauses': [2],
                 'sru': {
                     'indexTime': [Decimal('0.000')],
                     'handlingTime': [Decimal('0.004')],
                     'numberOfRecords': [1],
                     'queryTime': [Decimal('0.003')],
                     'arguments': [{
                         'recordSchema': 'smbAggregatedData',
                         'version': '1.2',
                         'recordPacking': 'xml',
                         'maximumRecords': 10,
                         'startRecord': 1,
                         'query':
                         'meta.upload.id exact "NICL:oai:mdms.kenict.org:oai:nicl.nl:k163645"',
                         'operation': 'searchRetrieve'
                     }]
                 }
             }
         },
         'httpResponse': {
             'duration': [0.004216909408569336],
             'httpStatus': ['200'],
             'size': [1889]
         }
     }
     log = CallTrace('log')
     writer = QueryLogWriter(log=log,
                             scopeNames=('query-scope', 'sub-scope'))
     log2 = CallTrace('log')
     writer2 = QueryLogWriter(log=log2,
                              scopeNames=('query-scope', 'other-scope'))
     writer.writeLog(collectedLog)
     writer2.writeLog(collectedLog)
     self.assertEquals(['log'], log.calledMethodNames())
     self.assertEquals([], log2.calledMethodNames())
     self.assertEquals([
         'maximumRecords=10&operation=searchRetrieve&query=meta.upload.id+exact+%22NICL%3Aoai%3Amdms.kenict.org%3Aoai%3Anicl.nl%3Ak163645%22&recordPacking=xml&recordSchema=smbAggregatedData&startRecord=1&version=1.2'
     ], [m.kwargs['queryArguments'] for m in log.calledMethods])