def testNextRecordPosition(self):
        observer = CallTrace(emptyGeneratorMethods=['additionalDiagnosticDetails'])
        response = Response(total=100, hits=hitsRange(11, 26))
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        def retrieveData(**kwargs):
            raise StopIteration('record')
            yield
        observer.methods['executeQuery'] = executeQuery
        observer.methods['retrieveData'] = retrieveData
        observer.methods['extraResponseData'] = lambda *a, **kw: (x for x in 'extraResponseData')
        observer.methods['echoedExtraRequestData'] = lambda *a, **kw: (x for x in 'echoedExtraRequestData')
        observer.methods['extraRecordData'] = lambda hit: (f for f in [])

        component = SruHandler()
        component.addObserver(observer)

        arguments = dict(startRecord=11, maximumRecords=15, query='query', recordPacking='string', recordSchema='schema')
        result = "".join(compose(component.searchRetrieve(sruArguments=arguments, **arguments)))
        self.assertTrue("<srw:nextRecordPosition>26</srw:nextRecordPosition>" in result, result)

        executeCqlCallKwargs = observer.calledMethods[0].kwargs
        self.assertEquals(10, executeCqlCallKwargs['start']) # SRU is 1 based
        self.assertEquals(25, executeCqlCallKwargs['stop'])
    def testQueryTimeInExtraResponse(self):
        handler = SruHandler(includeQueryTimes=True)
        observer = CallTrace('observer', emptyGeneratorMethods=['echoedExtraRequestData', 'extraResponseData'])

        times = [1, 2.5, 3.5]
        def timeNow():
            return times.pop(0)
        handler._timeNow = timeNow

        def executeQuery(**kwargs):
            response = Response(total=0, hits=[])
            response.queryTime=5
            raise StopIteration(response)
            yield
        observer.methods['executeQuery'] = executeQuery
        handler.addObserver(observer)
        arguments = dict(startRecord=11, maximumRecords=15, query='query', recordPacking='string', recordSchema='schema')
        result = "".join(compose(handler.searchRetrieve(sruArguments=arguments, **arguments)))
        sruResponse = parse(StringIO(result))
        extraResponseData = sruResponse.xpath('/srw:searchRetrieveResponse/srw:extraResponseData', namespaces={'srw':"http://www.loc.gov/zing/srw/"})[0]
        self.assertEqualsWS("""<srw:extraResponseData %(xmlns_srw)s %(xmlns_diag)s %(xmlns_xcql)s %(xmlns_dc)s %(xmlns_meresco_srw)s>
        <querytimes xmlns="http://meresco.org/namespace/timing">
            <sruHandling>PT2.500S</sruHandling>
            <sruQueryTime>PT1.500S</sruQueryTime>
            <index>PT0.005S</index>
        </querytimes>
</srw:extraResponseData>""" % namespaces, lxmltostring(extraResponseData))
        queryTimes = lxmltostring(extraResponseData.xpath('//ti:querytimes', namespaces={'ti':"http://meresco.org/namespace/timing"})[0])
        assertValid(queryTimes, join(schemasPath, 'timing-20120827.xsd'))
        self.assertEquals(['executeQuery', 'echoedExtraRequestData', 'extraResponseData', 'handleQueryTimes'], observer.calledMethodNames())
        self.assertEquals({'sru': Decimal("2.500"), 'queryTime': Decimal("1.500"), 'index': Decimal("0.005")}, observer.calledMethods[3].kwargs)
    def testValidXml(self):
        component = SruParser()
        sruHandler = SruHandler()
        component.addObserver(sruHandler)
        observer = CallTrace('observer')
        sruHandler.addObserver(observer)
        response = Response(total=2, hits=[Hit('id0'), Hit('id1')])
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        def retrieveData(**kwargs):
            raise StopIteration('<bike/>')
            yield
        observer.methods['executeQuery'] = executeQuery
        observer.returnValues['echoedExtraRequestData'] = (f for f in [])
        observer.returnValues['extraResponseData'] = (f for f in [])
        observer.methods['extraRecordData'] = lambda hit: (f for f in [])
        observer.methods['retrieveData'] = retrieveData

        result = ''.join(compose(component.handleRequest(arguments={'version':['1.1'], 'query': ['aQuery'], 'operation':['searchRetrieve']})))
        header, body = result.split('\r\n'*2)
        assertValid(body, join(schemasPath, 'srw-types1.2.xsd'))
        self.assertTrue('<bike/>' in body, body)

        result = ''.join(compose(component.handleRequest(arguments={'version':['1.1'], 'operation':['searchRetrieve']})))
        header, body = result.split('\r\n'*2)
        assertValid(body, join(schemasPath, 'srw-types1.2.xsd'))
        self.assertTrue('diagnostic' in body, body)
Example #4
0
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.srw = Srw()
        self.sruParser = SruParser()
        self.sruHandler = SruHandler()

        self.srw.addObserver(self.sruParser)
        self.sruParser.addObserver(self.sruHandler)
        self.response = Response(total=1, hits=[Hit('0')])

        def executeQuery(**kwargs):
            return self.response
            yield

        def retrieveData(**kwargs):
            return 'data'
            yield

        self.observer = CallTrace(methods={
            'executeQuery': executeQuery,
            'retrieveData': retrieveData
        },
                                  emptyGeneratorMethods=[
                                      'extraResponseData',
                                      'echoedExtraRequestData',
                                      'additionalDiagnosticDetails',
                                      'extraRecordData'
                                  ])
        self.sruHandler.addObserver(self.observer)
    def testCollectLog(self):
        handler = SruHandler(enableCollectLog=True)
        observer = CallTrace('observer', emptyGeneratorMethods=['echoedExtraRequestData', 'extraResponseData'])
        __callstack_var_logCollector__ = dict()
        times = [1, 2.5, 3.5]
        def timeNow():
            return times.pop(0)
        handler._timeNow = timeNow

        def executeQuery(**kwargs):
            response = Response(total=0, hits=[])
            response.queryTime=5
            raise StopIteration(response)
            yield
        observer.methods['executeQuery'] = executeQuery
        handler.addObserver(observer)
        arguments = dict(startRecord=11, maximumRecords=15, query='query', recordPacking='string', recordSchema='schema')
        consume(handler.searchRetrieve(sruArguments=arguments, **arguments))

        self.assertEquals({
            'sru': {
                'handlingTime': [Decimal('2.500')],
                'queryTime': [Decimal('1.500')],
                'indexTime': [Decimal('0.005')],
                'numberOfRecords': [0],
                'arguments': [{
                    'startRecord': 11,
                    'query': 'query',
                    'recordPacking': 'string',
                    'maximumRecords': 15,
                    'recordSchema': 'schema',
                }],
            }
        }, __callstack_var_logCollector__)
    def testCollectLogWhenIndexRaisesError(self):
        handler = SruHandler(enableCollectLog=True)
        observer = CallTrace('observer', emptyGeneratorMethods=['echoedExtraRequestData', 'extraResponseData', 'additionalDiagnosticDetails'])
        __callstack_var_logCollector__ = dict()
        times = [1]
        def timeNow():
            return times.pop(0)
        handler._timeNow = timeNow

        def executeQuery(**kwargs):
            raise Exception('Sorry')
            yield
        observer.methods['executeQuery'] = executeQuery
        handler.addObserver(observer)
        arguments = dict(startRecord=11, maximumRecords=15, query='query', recordPacking='string', recordSchema='schema')
        consume(handler.searchRetrieve(sruArguments=arguments, **arguments))

        self.assertEquals({
            'sru': {
                'arguments': [{
                    'startRecord': 11,
                    'query': 'query',
                    'recordPacking': 'string',
                    'maximumRecords': 15,
                    'recordSchema': 'schema',
                }],
            }
        }, __callstack_var_logCollector__)
    def testExtraResponseDataHandlerNoData(self):
        class TestHandler:
            def extraResponseData(self, *args, **kwargs):
                return (f for f in [])

        component = SruHandler()
        component.addObserver(TestHandler())
        result = "".join(list(component._writeExtraResponseData(cqlAbstractSyntaxTree=None, **MOCKDATA)))
        self.assertEquals('' , result)
 def testExceptionInWriteExtraRecordData(self):
     class RaisesException(object):
         def extraResponseData(self, *args, **kwargs):
             raise Exception("Test Exception")
     component = SruHandler()
     component.addObserver(RaisesException())
     result = "".join(compose(component._writeExtraResponseData(cqlAbstractSyntaxTree=None, **MOCKDATA)))
     self.assertTrue("<uri>info://srw/diagnostics/1/1</uri>" in result)
     self.assertTrue("<message>General System Error</message>" in result)
     self.assertTrue("<details>Test Exception</details>" in result)
 def testExtraResponseDataWithTermDrilldown(self):
     sruHandler = SruHandler()
     sruTermDrilldown = SRUTermDrilldown()
     drilldownData = [
             {'fieldname': 'field0', 'terms': [{'term': 'value0_0', 'count': 14}]},
             {'fieldname': 'field1', 'terms': [{'term': 'value1_0', 'count': 13}, {'term': 'value1_1', 'count': 11}]},
             {'fieldname': 'field2', 'terms': [{'term': 'value2_0', 'count': 3}, {'term': 'value2_1', 'count': 2}, {'term': 'value2_2', 'count': 1}]}
         ]
     sruHandler.addObserver(sruTermDrilldown)
     result = "".join(sruHandler._writeExtraResponseData(drilldownData=drilldownData, sruArguments={}, **MOCKDATA))
     self.assertEqualsWS("""<srw:extraResponseData><dd:drilldown\n    xmlns:dd="http://meresco.org/namespace/drilldown"\n    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n    xsi:schemaLocation="http://meresco.org/namespace/drilldown http://meresco.org/files/xsd/drilldown-20070730.xsd"><dd:term-drilldown><dd:navigator name="field0"><dd:item count="14">value0_0</dd:item></dd:navigator><dd:navigator name="field1"><dd:item count="13">value1_0</dd:item><dd:item count="11">value1_1</dd:item></dd:navigator><dd:navigator name="field2"><dd:item count="3">value2_0</dd:item><dd:item count="2">value2_1</dd:item><dd:item count="1">value2_2</dd:item></dd:navigator></dd:term-drilldown></dd:drilldown></srw:extraResponseData>""" , result)
    def testEchoedSearchRetrieveRequest(self):
        sruArguments = {'version':'1.1', 'operation':'searchRetrieve', 'query':'query >= 3', 'recordSchema':'schema', 'recordPacking':'string'}
        component = SruHandler()

        result = "".join(list(component._writeEchoedSearchRetrieveRequest(sruArguments=sruArguments)))
        self.assertEqualsWS("""<srw:echoedSearchRetrieveRequest>
    <srw:version>1.1</srw:version>
    <srw:query>query &gt;= 3</srw:query>
    <srw:recordPacking>string</srw:recordPacking>
    <srw:recordSchema>schema</srw:recordSchema>
</srw:echoedSearchRetrieveRequest>""", result)
 def testExceptionInWriteRecordData(self):
     observer = CallTrace(emptyGeneratorMethods=['additionalDiagnosticDetails'])
     def retrieveData(**kwargs):
         raise Exception("Test Exception")
         yield
     observer.methods["retrieveData"] = retrieveData
     component = SruHandler()
     component.addObserver(observer)
     result = "".join(list(compose(component._writeRecordData(recordPacking="string", recordSchema="schema", recordId="ID"))))
     self.assertTrue("<uri>info://srw/diagnostics/1/1</uri>" in result)
     self.assertTrue("<message>General System Error</message>" in result)
     self.assertTrue("<details>Test Exception</details>" in result)
 def testDiagnosticOnExecuteCql(self):
     with stderr_replaced():
         class RaisesException(object):
             def executeQuery(self, *args, **kwargs):
                 raise Exception("Test Exception")
         component = SruHandler()
         component.addObserver(RaisesException())
         arguments = dict(startRecord=11, maximumRecords=15, query='query', recordPacking='string', recordSchema='schema')
         result = parse(StringIO("".join(compose(component.searchRetrieve(sruArguments=arguments, **arguments)))))
         diagnostic = result.xpath("/srw:searchRetrieveResponse/srw:diagnostics/diag:diagnostic", namespaces=namespaces)
         self.assertEquals(1, len(diagnostic))
         self.assertEquals(["info://srw/diagnostics/1/48"], diagnostic[0].xpath("diag:uri/text()", namespaces=namespaces))
         self.assertEquals(["Query Feature Unsupported"], diagnostic[0].xpath("diag:message/text()", namespaces=namespaces))
         self.assertEquals(["Test Exception"], diagnostic[0].xpath("diag:details/text()", namespaces=namespaces))
Example #13
0
 def testQuery(self):
     def executeQuery(**kwargs):
         return Response(total=42)
         yield
     index = CallTrace('index',
         emptyGeneratorMethods=['echoedExtraRequestData', 'extraResponseData'],
         methods=dict(executeQuery=executeQuery))
     observable = be((Observable(),
         (LogCollector(),
             (self.handleRequestLog,
                 (SruParser(),
                     (SruHandler(enableCollectLog=True),
                         (index,)
                     )
                 )
             ),
             (self.queryLogWriter,),
         )
     ))
     result = asString(observable.all.handleRequest(
         Method='GET',
         Client=('127.0.0.1', 1234),
         arguments={
             'version': ['1.2'],
             'operation': ['searchRetrieve'],
             'query': ['query'],
             'maximumRecords': ['0'],
         },
         path='/path/sru',
         otherKwarg='value'))
     self.assertTrue('<srw:numberOfRecords>42</srw:numberOfRecords>' in result, result)
     self.assertTrue(isfile(join(self.tempdir, '2009-11-02-query.log')))
     with open(join(self.tempdir, '2009-11-02-query.log')) as fp:
         self.assertEqual('2009-11-02T11:25:37Z 127.0.0.1 0.7K 1.000s 42hits /path/sru maximumRecords=0&operation=searchRetrieve&query=query&recordPacking=xml&recordSchema=dc&startRecord=1&version=1.2\n', fp.read())
Example #14
0
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.srw = Srw()
        self.sruParser = SruParser()
        self.sruHandler = SruHandler()

        self.srw.addObserver(self.sruParser)
        self.sruParser.addObserver(self.sruHandler)
        self.response = StopIteration(Response(total=1, hits=[Hit('0')]))
        def executeQuery(**kwargs):
            raise self.response
            yield
        def retrieveData(**kwargs):
            raise StopIteration('data')
            yield
        self.observer = CallTrace(
            methods={
                'executeQuery': executeQuery,
                'retrieveData': retrieveData
            },
            emptyGeneratorMethods=[
                'extraResponseData',
                'echoedExtraRequestData',
                'additionalDiagnosticDetails',
                'extraRecordData'
            ])
        self.sruHandler.addObserver(self.observer)
Example #15
0
    def testAllQueryHelpersForSRU(self):
        index = CallTrace('index')

        def executeQuery(**kwargs):
            return Response(total=3201, hits=[])
            yield

        index.methods['executeQuery'] = executeQuery
        index.ignoredAttributes.extend(
            ['echoedExtraRequestData', 'extraResponseData', 'all_unknown'])
        server = be((
            Observable(),
            (self.queryLog, (SruParser(),
                             (QueryLogHelperForSru(),
                              (SruHandler(extraRecordDataNewStyle=True),
                               (QueryLogHelperForExecuteCQL(), (index, )))))),
        ))

        ''.join(
            compose(
                server.all.handleRequest(
                    path='/path/sru',
                    Client=('11.22.33.44', 8080),
                    arguments={
                        'operation': ['searchRetrieve'],
                        'version': ['1.2'],
                        'maximumRecords': ['0'],
                        'query': ['field=value'],
                    },
                )))
        with open(join(self.tempdir, '2009-11-02-query.log')) as fp:
            self.assertEqual(
                '2009-11-02T11:25:37Z 11.22.33.44 0.7K 1.000s 3201hits /path/sru maximumRecords=0&operation=searchRetrieve&query=field%3Dvalue&recordPacking=xml&recordSchema=dc&startRecord=1&version=1.2\n',
                fp.read())
    def testDiagnosticWarning(self):
        sruArguments = {'version':'1.2', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2, }
        queryArguments = {'version':'1.2', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2}

        observer = CallTrace(emptyGeneratorMethods=['additionalDiagnosticDetails'])
        response = Response(total=100, hits=[Hit('<aap&noot>'), Hit('vuur')])
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        observer.methods['executeQuery'] = executeQuery

        retrieveDataCalls = []
        def retrieveData(identifier, name):
            retrieveDataCalls.append(1)
            raise StopIteration("<MOCKED_WRITTEN_DATA>%s-%s</MOCKED_WRITTEN_DATA>" % (xmlEscape(identifier), name))
            yield
        observer.retrieveData = retrieveData

        observer.methods['extraResponseData'] = lambda *a, **kw: (x for x in 'extraResponseData')
        observer.methods['echoedExtraRequestData'] = lambda *a, **kw: (x for x in 'echoedExtraRequestData')
        observer.methods['extraRecordData'] = lambda hit: (f for f in [])

        component = SruHandler()
        component.addObserver(observer)

        result = "".join(compose(component.searchRetrieve(sruArguments=sruArguments, diagnostics=[(998, 'Diagnostic 998', 'The <tag> message'), (999, 'Diagnostic 999', 'Some message')], **queryArguments)))

        response = parse(StringIO(result))

        self.assertEquals([t % namespaces for t in [
                '{%(srw)s}version',
                '{%(srw)s}numberOfRecords',
                '{%(srw)s}records',
                '{%(srw)s}nextRecordPosition',
                '{%(srw)s}echoedSearchRetrieveRequest',
                '{%(srw)s}diagnostics',
                '{%(srw)s}extraResponseData',
            ]], [t.tag for t in xpath(response, '//srw:searchRetrieveResponse/*')])

        diagnostics = [{'uri': xpath(d, 'diag:uri/text()')[0],
            'details': xpath(d, 'diag:details/text()')[0],
            'message': xpath(d, 'diag:message/text()')[0]} for d in
                xpath(response, '/srw:searchRetrieveResponse/srw:diagnostics/diag:diagnostic')]
        self.assertEquals([
            {'uri': 'info://srw/diagnostics/1/998', 'message': 'Diagnostic 998', 'details': 'The <tag> message'},
            {'uri': 'info://srw/diagnostics/1/999', 'message': 'Diagnostic 999', 'details': 'Some message'},
            ], diagnostics)
    def testExtraResponseDataHandlerWithData(self):
        argsUsed = []
        kwargsUsed = {}
        class TestHandler:
            def extraResponseData(self, *args, **kwargs):
                argsUsed.append(args)
                kwargsUsed.update(kwargs)
                return (f for f in ["<someD", "ata/>"])

        component = SruHandler()
        component.addObserver(TestHandler())
        result = "".join(list(component._writeExtraResponseData(cqlAbstractSyntaxTree=None, **MOCKDATA)))
        self.assertEquals('<srw:extraResponseData><someData/></srw:extraResponseData>' , result)
        self.assertEquals([()], argsUsed)
        self.assertEquals(None, kwargsUsed['cqlAbstractSyntaxTree'])
        self.assertEquals(MOCKDATA['queryTime'], kwargsUsed['queryTime'])
        self.assertEquals(MOCKDATA['response'], kwargsUsed['response'])
    def testExtraRecordDataOldStyle(self):
        queryArguments = {'version':'1.2', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2}
        sruArguments = {'version':'1.2', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2, 'x-recordSchema':['extra', 'evenmore']}

        observer = CallTrace()
        response = Response(total=100, hits=[Hit('11')])
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        observer.methods['executeQuery'] = executeQuery

        retrieveDataCalls = []
        def retrieveData(identifier, name):
            retrieveDataCalls.append(1)
            raise StopIteration("<MOCKED_WRITTEN_DATA>%s-%s</MOCKED_WRITTEN_DATA>" % (identifier, name))
            yield
        observer.retrieveData = retrieveData

        observer.methods['extraResponseData'] = lambda *a, **kw: (x for x in 'extraResponseData')
        observer.methods['echoedExtraRequestData'] = lambda *a, **kw: (x for x in 'echoedExtraRequestData')
        observer.methods['extraRecordData'] = lambda hit: (f for f in [])

        component = SruHandler(extraRecordDataNewStyle=False)
        component.addObserver(observer)

        result = "".join(compose(component.searchRetrieve(sruArguments=sruArguments, **queryArguments)))

        strippedResult = result[result.index('<srw:record>'):result.index('</srw:records>')]
        self.assertEqualsWS("""<srw:record>
            <srw:recordSchema>schema</srw:recordSchema>
            <srw:recordPacking>xml</srw:recordPacking>
            <srw:recordIdentifier>11</srw:recordIdentifier>
            <srw:recordData>
                <MOCKED_WRITTEN_DATA>11-schema</MOCKED_WRITTEN_DATA>
            </srw:recordData>
            <srw:extraRecordData>
                <recordData recordSchema="extra">
                    <MOCKED_WRITTEN_DATA>11-extra</MOCKED_WRITTEN_DATA>
                </recordData>
                <recordData recordSchema="evenmore">
                    <MOCKED_WRITTEN_DATA>11-evenmore</MOCKED_WRITTEN_DATA>
                </recordData>
            </srw:extraRecordData>
        </srw:record>""", strippedResult)
 def testGetDataWithAdapter(self):
     observer = CallTrace(returnValues=dict(getData='<record/>'))
     adapter = RetrieveToGetDataAdapter()
     handler = SruHandler()
     handler.addObserver(adapter)
     adapter.addObserver(observer)
     response = Response(total=100, hits=hitsRange(1, 3))
     def executeQuery(**kwargs):
         raise StopIteration(response)
         yield
     observer.methods['executeQuery'] = executeQuery
     observer.methods['extraResponseData'] = lambda *a, **kw: (x for x in 'extraResponseData')
     observer.methods['echoedExtraRequestData'] = lambda *a, **kw: (x for x in 'echoedExtraRequestData')
     observer.methods['extraRecordData'] = lambda hit: (f for f in [])
     response = asString(handler.searchRetrieve(query="word", recordSchema='schema', recordPacking='string', maximumRecords=2, sruArguments={}))
     self.assertEquals(['getData', 'getData'], observer.calledMethodNames()[1:4:2])
     self.assertTrue('<srw:recordData>&lt;record/&gt;</srw:recordData>' in response, response)
     self.assertEquals({'identifier': '1', 'name': 'schema'}, observer.calledMethods[1].kwargs)
     self.assertEquals({'identifier': '2', 'name': 'schema'}, observer.calledMethods[3].kwargs)
    def testExecuteQueryGetsRecordSchemaAsOnOfTheKwargs(self):
        sruArguments = {'version':'1.2', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2, 'x-recordSchema':['extra', 'evenmore'], 'x-extra-key': 'extraValue'}
        queryArguments = {'version':'1.2', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2}

        observer = CallTrace(emptyGeneratorMethods=['echoedExtraRequestData', 'extraResponseData'])
        response = Response(total=0, hits=[])
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        observer.methods['executeQuery'] = executeQuery

        component = SruHandler()
        component.addObserver(observer)

        consume(component.searchRetrieve(sruArguments=sruArguments, **queryArguments))

        self.assertEqual(['executeQuery', 'echoedExtraRequestData', 'extraResponseData'], observer.calledMethodNames())
        queryKwargs = observer.calledMethods[0].kwargs
        self.assertEqual('schema', queryKwargs['recordSchema'])
        self.assertEqual(['extra', 'evenmore'], queryKwargs['extraArguments']['x-recordSchema'])
    def testNextRecordPositionNotShownIfAfterLimitBeyond(self):
        observer = CallTrace(emptyGeneratorMethods=['additionalDiagnosticDetails'])
        response = Response(total=100, hits=hitsRange(10, 11))
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        def retrieveData(**kwargs):
            raise StopIteration('record')
            yield
        observer.methods['executeQuery'] = executeQuery
        observer.methods['retrieveData'] = retrieveData
        observer.methods['extraResponseData'] = lambda *a, **kw: (x for x in 'extraResponseData')
        observer.methods['echoedExtraRequestData'] = lambda *a, **kw: (x for x in 'echoedExtraRequestData')
        observer.methods['extraRecordData'] = lambda hit: (f for f in [])

        component = SruHandler()
        component.addObserver(observer)

        arguments = dict(startRecord=10, maximumRecords=2, query='query', recordPacking='string', recordSchema='schema', limitBeyond=10)
        result = "".join(compose(component.searchRetrieve(sruArguments=arguments, **arguments)))
        self.assertFalse("<srw:nextRecordPosition>" in result, result)
 def testParseDrilldownArguments(self):
     handler = SruHandler(drilldownSortBy='count')
     self.assertEquals(None, handler._parseDrilldownArgs([]))
     self.assertEquals([], handler._parseDrilldownArgs(['']))
     self.assertEquals([{'fieldname':'field', 'maxTerms':10, 'sortBy':'count'}], handler._parseDrilldownArgs(['field']))
     self.assertEquals([{'fieldname':'field', 'maxTerms':10, 'sortBy':'count'}], handler._parseDrilldownArgs(['field,']))
     self.assertEquals([{'fieldname':'field', 'maxTerms':20, 'sortBy':'count'}], handler._parseDrilldownArgs(['field:20']))
     self.assertEquals([{'fieldname':'field', 'maxTerms':20, 'sortBy':'count'}, {'fieldname':'field2', 'maxTerms':10, 'sortBy':'count'}], handler._parseDrilldownArgs(['field:20,field2']))
     self.assertEquals([{'fieldname':'field', 'maxTerms':20, 'sortBy':'count'}, {'fieldname':'field2', 'maxTerms':10, 'sortBy':'count'}], handler._parseDrilldownArgs(['field:20','field2']))
    def testDrilldownResultInExecuteQuery(self):
        observer = CallTrace()
        response = Response(total=100, hits=hitsRange(11, 26))
        drilldownData = iter([
            ('field0', iter([('value0_0', 14)])),
            ('field1', iter([('value1_0', 13), ('value1_1', 11)])),
            ('field2', iter([('value2_0', 3), ('value2_1', 2), ('value2_2', 1)]))])
        response.drilldownData = drilldownData
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        def retrieveData(**kwargs):
            raise StopIteration('record')
            yield
        observer.methods['executeQuery'] = executeQuery
        observer.methods['retrieveData'] = retrieveData
        observer.methods['extraResponseData'] = lambda *a, **kw: (x for x in 'extraResponseData')
        observer.methods['echoedExtraRequestData'] = lambda *a, **kw: (x for x in 'echoedExtraRequestData')
        observer.methods['extraRecordData'] = lambda hit: (f for f in [])

        component = SruHandler(drilldownSortBy='somevalue')
        component.addObserver(observer)

        queryArguments = dict(startRecord=11, maximumRecords=15, query='query', recordPacking='string', recordSchema='schema')
        sruArguments = queryArguments
        sruArguments['x-term-drilldown'] = ["field0:1,fie:ld1:2,field2,fie:ld3"]
        consume(component.searchRetrieve(sruArguments=sruArguments, **queryArguments))
        self.assertEquals(['executeQuery'] + ['retrieveData', 'extraRecordData'] * 15 + ['echoedExtraRequestData', 'extraResponseData'], [m.name for m in observer.calledMethods])
        self.assertEquals([
            dict(fieldname='field0', maxTerms=1, sortBy='somevalue'),
            dict(fieldname='fie:ld1', maxTerms=2, sortBy='somevalue'),
            dict(fieldname='field2', maxTerms=DEFAULT_MAXIMUM_TERMS, sortBy='somevalue'),
            dict(fieldname='fie:ld3', maxTerms=DEFAULT_MAXIMUM_TERMS, sortBy='somevalue')
            ], list(observer.calledMethods[0].kwargs['facets']))
        extraResponseDataMethod = observer.calledMethods[-1]
        self.assertEquals(response, extraResponseDataMethod.kwargs['response'])
    def testSearchRetrieveAssertsDrilldownMaximumMaximumResultsWhenSet(self):
        drilldownMaximumMaximumResults = 3
        self.assertTrue(drilldownMaximumMaximumResults < DEFAULT_MAXIMUM_TERMS)

        def sruHandlerKwargs(x_term_drilldown):
            arguments = {'version':'1.1', 'operation':'searchRetrieve', 'query':'blissfully_ignored', 'recordSchema':'blissfully_ignored', 'recordPacking':'string'}
            arguments['x_term_drilldown'] = [x_term_drilldown]
            arguments['sruArguments'] = dict((k.replace('_', '-'),v) for k,v in arguments.items())
            return arguments

        # No problem - max
        kwargs = sruHandlerKwargs(x_term_drilldown='field0:3,fielddefault')
        sruHandler = SruHandler(drilldownMaximumMaximumResults=drilldownMaximumMaximumResults)
        observer = CallTrace('observer')
        sruHandler.addObserver(observer)
        def executeQuery(**kwargs):
            raise KeyboardInterrupt('Ok')
            yield
        observer.methods['executeQuery'] = executeQuery

        try:
            ''.join(compose(sruHandler.searchRetrieve(**kwargs)))
        except KeyboardInterrupt, e:
            self.assertEquals('Ok', str(e))
    def testEchoedSearchRetrieveRequestWithExtraRequestData(self):
        sruArguments = {'version':'1.1', 'operation':'searchRetrieve', 'query':'query >= 3', 'recordSchema':'schema', 'recordPacking':'string', 'x-term-drilldown':['field0,field1']}
        observer = CallTrace('ExtraRequestData')
        observer.methods['echoedExtraRequestData'] = lambda *a, **kw: (x for x in '<some>extra request data</some>')
        component = SruHandler()
        component.addObserver(SRUTermDrilldown())
        component.addObserver(observer)

        result = "".join(list(component._writeEchoedSearchRetrieveRequest(sruArguments=sruArguments)))

        drilldownRequestData = DRILLDOWN_HEADER \
        + """<dd:term-drilldown>field0,field1</dd:term-drilldown>"""\
        + DRILLDOWN_FOOTER
        self.assertEqualsWS("""<srw:echoedSearchRetrieveRequest>
    <srw:version>1.1</srw:version>
    <srw:query>query &gt;= 3</srw:query>
    <srw:recordPacking>string</srw:recordPacking>
    <srw:recordSchema>schema</srw:recordSchema>
    <srw:extraRequestData>%s<some>extra request data</some></srw:extraRequestData>
</srw:echoedSearchRetrieveRequest>""" % drilldownRequestData, result)
 def testExtraResponseDataHandlerNoHandler(self):
     component = SruHandler()
     result = "".join(list(component._writeExtraResponseData(cqlAbstractSyntaxTree=None, **MOCKDATA)))
     self.assertEquals('' , result)
def main(reactor, port, statePath, lucenePort, **ignored):


######## START Lucene Integration ###############################################################
    defaultLuceneSettings = LuceneSettings(
        commitTimeout=30,
        readonly=True,)
    
    
    http11Request = be(
        (HttpRequest1_1(),
            (SocketPool(reactor=reactor, unusedTimeout=5, limits=dict(totalSize=100, destinationSize=10)),),
        )
    )
    
    luceneIndex = luceneAndReaderConfig(defaultLuceneSettings.clone(readonly=True), http11Request, lucenePort)
    
    
    luceneRoHelix = be(
        (AdapterToLuceneQuery(
                defaultCore=DEFAULT_CORE,
                coreConverters={
                    DEFAULT_CORE: QueryExpressionToLuceneQueryDict(UNQUALIFIED_TERM_FIELDS, luceneSettings=luceneIndex.settings),
                }
            ),
            (MultiLucene(host='127.0.0.1', port=lucenePort, defaultCore=DEFAULT_CORE),
                (luceneIndex,),
                (http11Request,),
            )
        )
    )

######## END Lucene Integration ###############################################################


    fieldnameRewrites = {}

    def fieldnameRewrite(name):
        return fieldnameRewrites.get(name, name)

    def drilldownFieldnamesTranslate(fieldname):
        untokenizedName = untokenizedFieldname(fieldname)
        if untokenizedName in untokenizedFieldnames:
            fieldname = untokenizedName
        return fieldnameRewrite(fieldname)

    convertToComposedQuery = ConvertToComposedQuery(
            resultsFrom=DEFAULT_CORE,
            matches=[],
            drilldownFieldnamesTranslate=drilldownFieldnamesTranslate
        )


    strategie = Md5HashDistributeStrategy()
    storage = StorageComponent(join(statePath, 'store'), strategy=strategie, partsRemovedOnDelete=[HEADER_PARTNAME, META_PARTNAME, METADATA_PARTNAME, OAI_DC_PARTNAME, LONG_PARTNAME, SHORT_PARTNAME])


    # Wat doet dit?
    cqlClauseConverters = [
        RenameFieldForExact(
            untokenizedFields=untokenizedFieldnames,
            untokenizedPrefix=UNTOKENIZED_PREFIX,
        ).filterAndModifier(),
        SearchTermFilterAndModifier(
            shouldModifyFieldValue=lambda *args: True,
            fieldnameModifier=fieldnameRewrite
        ).filterAndModifier(),
    ]



    executeQueryHelix = \
        (FilterMessages(allowed=['executeQuery']),
            (CqlMultiSearchClauseConversion(cqlClauseConverters, fromKwarg='query'),
                (DrilldownQueries(),
                    (convertToComposedQuery,
                        (luceneRoHelix,),
                    )
                )
            )
        )


    return \
    (Observable(),
        
        (ObservableHttpServer(reactor, port, compressResponse=True),
            (BasicHttpHandler(),
                (PathFilter(['/sru']),
                    (SruParser(
                            host='sru.narcis.nl',
                            port=80,
                            defaultRecordSchema='knaw_short',
                            defaultRecordPacking='xml'),
                        (SruLimitStartRecord(limitBeyond=4000),
                            (SruHandler(
                                    includeQueryTimes=False,
                                    extraXParameters=[],
                                    enableCollectLog=False), #2017-03-24T12:00:33Z 127.0.0.1 3.5K 0.019s - /sru OF (TRUE): 2017-03-24T11:58:53Z 127.0.0.1 2.3K 0.004s 1hits /sru maximumRecords=10&operation=searchRetrieve&query=untokenized.dd_year+exact+%221993%22&recordPacking=xml&recordSchema=knaw_short&startRecord=1&version=1.2
                                (SruTermDrilldown(),),
                                executeQueryHelix,
                                (StorageAdapter(),
                                    (storage,)
                                )
                            )
                        )
                    )
                ),
                (PathFilter('/rss'),
                    (Rss(   supportedLanguages = ['nl','en'], # defaults to first, if requested language is not available or supplied.
                            title = {'nl':'NARCIS', 'en':'NARCIS'},
                            description = {'nl':'NARCIS: De toegang tot de Nederlandse wetenschapsinformatie', 'en':'NARCIS: The gateway to Dutch scientific information'},
                            link = {'nl':'http://www.narcis.nl/?Language=nl', 'en':'http://www.narcis.nl/?Language=en'},
                            maximumRecords = 20),
                        executeQueryHelix,
                        (RssItem(
                                nsMap=NAMESPACEMAP,                                            
                                title = ('knaw_short', {'nl':'//short:metadata/short:titleInfo[not (@xml:lang)]/short:title/text()', 'en':'//short:metadata/short:titleInfo[@xml:lang="en"]/short:title/text()'}),
                                description = ('knaw_short', {'nl':'//short:abstract[not (@xml:lang)]/text()', 'en':'//short:abstract[@xml:lang="en"]/text()'}),
                                pubdate = ('knaw_short', '//short:dateIssued/short:parsed/text()'),
                                linkTemplate = 'http://www.narcis.nl/%(wcpcollection)s/RecordID/%(oai_identifier)s/Language/%(language)s',                                
                                wcpcollection = ('meta', '//*[local-name() = "collection"]/text()'),
                                oai_identifier = ('meta', '//meta:record/meta:id/text()'),
                                language = ('Dummy: Language is auto provided by the calling RSS component, but needs to be present to serve the linkTemplate.')
                            ),
                            (StorageAdapter(),
                                (storage,)
                            )
                        )
                    )
                )
            )
        )
    )
    def testSearchRetrieveVersion12(self):
        sruArguments = {'version':'1.2', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2, 'x-recordSchema':['extra', 'evenmore'], 'x-extra-key': 'extraValue'}
        queryArguments = {'version':'1.2', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2}

        observer = CallTrace()
        response = Response(total=100, hits=[Hit('<aap&noot>'), Hit('vuur')])
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        observer.methods['executeQuery'] = executeQuery

        retrieveDataCalls = []
        def retrieveData(identifier, name):
            retrieveDataCalls.append(1)
            raise StopIteration("<MOCKED_WRITTEN_DATA>%s-%s</MOCKED_WRITTEN_DATA>" % (xmlEscape(identifier), name))
            yield
        observer.retrieveData = retrieveData

        observer.methods['extraResponseData'] = lambda *a, **kw: (x for x in 'extraResponseData')
        observer.methods['echoedExtraRequestData'] = lambda *a, **kw: (x for x in 'echoedExtraRequestData')
        observer.methods['extraRecordData'] = lambda hit: (f for f in [])

        component = SruHandler()
        component.addObserver(observer)

        result = "".join(compose(component.searchRetrieve(sruArguments=sruArguments, **queryArguments)))
        self.assertEquals(['executeQuery', 'extraRecordData', 'extraRecordData', 'echoedExtraRequestData', 'extraResponseData'], [m.name for m in observer.calledMethods])
        executeQueryMethod, extraRecordData1, extraRecordData2, echoedExtraRequestDataMethod, extraResponseDataMethod = observer.calledMethods
        self.assertEquals('executeQuery', executeQueryMethod.name)
        methodKwargs = executeQueryMethod.kwargs
        self.assertEquals(cqlToExpression('field=value'), methodKwargs['query'])
        self.assertEquals(0, methodKwargs['start'])
        self.assertEquals(2, methodKwargs['stop'])
        self.assertEquals({'x-recordSchema': ['extra', 'evenmore'], 'x-extra-key': 'extraValue'}, methodKwargs['extraArguments'])
        self.assertEquals('<aap&noot>', extraRecordData1.kwargs['hit'].id)
        self.assertEquals('vuur', extraRecordData2.kwargs['hit'].id)

        self.assertEquals(6, sum(retrieveDataCalls))

        resultXml = parse(StringIO(result))
        ids = resultXml.xpath('//srw:recordIdentifier/text()', namespaces={'srw':"http://www.loc.gov/zing/srw/"})
        self.assertEquals(['<aap&noot>', 'vuur'], ids)

        self.assertEqualsWS("""
<srw:searchRetrieveResponse %(xmlns_srw)s %(xmlns_diag)s %(xmlns_xcql)s %(xmlns_dc)s %(xmlns_meresco_srw)s>
    <srw:version>1.2</srw:version>
    <srw:numberOfRecords>100</srw:numberOfRecords>
    <srw:records>
        <srw:record>
            <srw:recordSchema>schema</srw:recordSchema>
            <srw:recordPacking>xml</srw:recordPacking>
            <srw:recordIdentifier>&lt;aap&amp;noot&gt;</srw:recordIdentifier>
            <srw:recordData>
                <MOCKED_WRITTEN_DATA>&lt;aap&amp;noot&gt;-schema</MOCKED_WRITTEN_DATA>
            </srw:recordData>
            <srw:extraRecordData>
                <srw:record>
                    <srw:recordSchema>extra</srw:recordSchema>
                    <srw:recordPacking>xml</srw:recordPacking>
                    <srw:recordData>
                    <MOCKED_WRITTEN_DATA>&lt;aap&amp;noot&gt;-extra</MOCKED_WRITTEN_DATA>
                    </srw:recordData>
                </srw:record>
                <srw:record>
                    <srw:recordSchema>evenmore</srw:recordSchema>
                    <srw:recordPacking>xml</srw:recordPacking>
                    <srw:recordData>
                    <MOCKED_WRITTEN_DATA>&lt;aap&amp;noot&gt;-evenmore</MOCKED_WRITTEN_DATA>
                    </srw:recordData>
                </srw:record>
            </srw:extraRecordData>
        </srw:record>
        <srw:record>
            <srw:recordSchema>schema</srw:recordSchema>
            <srw:recordPacking>xml</srw:recordPacking>
            <srw:recordIdentifier>vuur</srw:recordIdentifier>
            <srw:recordData>
                <MOCKED_WRITTEN_DATA>vuur-schema</MOCKED_WRITTEN_DATA>
            </srw:recordData>
            <srw:extraRecordData>
                <srw:record>
                    <srw:recordSchema>extra</srw:recordSchema>
                    <srw:recordPacking>xml</srw:recordPacking>
                    <srw:recordData>
                    <MOCKED_WRITTEN_DATA>vuur-extra</MOCKED_WRITTEN_DATA>
                    </srw:recordData>
                </srw:record>
                <srw:record>
                    <srw:recordSchema>evenmore</srw:recordSchema>
                    <srw:recordPacking>xml</srw:recordPacking>
                    <srw:recordData>
                    <MOCKED_WRITTEN_DATA>vuur-evenmore</MOCKED_WRITTEN_DATA>
                    </srw:recordData>
                </srw:record>
            </srw:extraRecordData>
        </srw:record>
    </srw:records>
    <srw:nextRecordPosition>3</srw:nextRecordPosition>
    <srw:echoedSearchRetrieveRequest>
        <srw:version>1.2</srw:version>
        <srw:query>field=value</srw:query>
        <srw:startRecord>1</srw:startRecord>
        <srw:maximumRecords>2</srw:maximumRecords>
        <srw:recordPacking>xml</srw:recordPacking>
        <srw:recordSchema>schema</srw:recordSchema>
        <srw:x-recordSchema>extra</srw:x-recordSchema>
        <srw:x-recordSchema>evenmore</srw:x-recordSchema>
        <srw:extraRequestData>echoedExtraRequestData</srw:extraRequestData>
    </srw:echoedSearchRetrieveRequest>
    <srw:extraResponseData>extraResponseData</srw:extraResponseData>
</srw:searchRetrieveResponse>
""" % namespaces, result)

        self.assertEquals((), echoedExtraRequestDataMethod.args)
        self.assertEquals(set(['version', 'recordSchema', 'x-recordSchema', 'maximumRecords', 'startRecord', 'query', 'operation', 'recordPacking', 'x-extra-key']), set(echoedExtraRequestDataMethod.kwargs['sruArguments'].keys()))
        self.assertEquals((), extraResponseDataMethod.args)
        self.assertEquals(sorted(['version', 'recordSchema', 'maximumRecords', 'startRecord', 'query', 'operation', 'recordPacking', 'response', 'drilldownData', 'queryTime', 'sruArguments']), sorted(extraResponseDataMethod.kwargs.keys()))
Example #29
0
def main(reactor,
         port,
         statePath,
         lucenePort,
         gatewayPort,
         quickCommit=False,
         **ignored):

    ######## START Lucene Integration ###############################################################
    defaultLuceneSettings = LuceneSettings(
        commitTimeout=30,
        readonly=True,
    )

    http11Request = be((
        HttpRequest1_1(),
        (SocketPool(reactor=reactor,
                    unusedTimeout=5,
                    limits=dict(totalSize=100, destinationSize=10)), ),
    ))

    luceneIndex = luceneAndReaderConfig(
        defaultLuceneSettings.clone(readonly=True), http11Request, lucenePort)

    luceneRoHelix = be(
        (AdapterToLuceneQuery(defaultCore=DEFAULT_CORE,
                              coreConverters={
                                  DEFAULT_CORE:
                                  QueryExpressionToLuceneQueryDict(
                                      UNQUALIFIED_TERM_FIELDS,
                                      luceneSettings=luceneIndex.settings),
                              }), (
                                  MultiLucene(host='localhost',
                                              port=lucenePort,
                                              defaultCore=DEFAULT_CORE),
                                  (luceneIndex, ),
                                  (http11Request, ),
                              )))

    ######## END Lucene Integration ###############################################################

    fieldnameRewrites = {
        #         UNTOKENIZED_PREFIX+'genre': UNTOKENIZED_PREFIX+'dc:genre',
    }

    def fieldnameRewrite(name):
        return fieldnameRewrites.get(name, name)

    def drilldownFieldnamesTranslate(fieldname):
        untokenizedName = untokenizedFieldname(fieldname)
        if untokenizedName in untokenizedFieldnames:
            fieldname = untokenizedName
        return fieldnameRewrite(fieldname)

    convertToComposedQuery = ConvertToComposedQuery(
        resultsFrom=DEFAULT_CORE,
        matches=[],
        drilldownFieldnamesTranslate=drilldownFieldnamesTranslate)

    strategie = Md5HashDistributeStrategy()
    storage = StorageComponent(join(statePath, 'store'),
                               strategy=strategie,
                               partsRemovedOnDelete=[
                                   HEADER_PARTNAME, META_PARTNAME,
                                   METADATA_PARTNAME, OAI_DC_PARTNAME,
                                   LONG_PARTNAME, SHORT_PARTNAME,
                                   OPENAIRE_PARTNAME
                               ])

    oaiJazz = OaiJazz(join(statePath, 'oai'))
    oaiJazz.updateMetadataFormat(
        OAI_DC_PARTNAME, "http://www.openarchives.org/OAI/2.0/oai_dc.xsd",
        "http://purl.org/dc/elements/1.1/")

    oai_oa_cerifJazz = OaiJazz(join(statePath, 'oai_cerif'))
    oai_oa_cerifJazz.updateMetadataFormat(
        OPENAIRE_PARTNAME,
        "https://www.openaire.eu/schema/cris/current/openaire-cerif-profile.xsd",
        "https://www.openaire.eu/cerif-profile/1.1/")
    # All of the following OAI-PMH sets shall be recognized by the CRIS, even if not all of them are populated.
    oai_oa_cerifJazz.updateSet("openaire_cris_projects",
                               "OpenAIRE_CRIS_projects")
    oai_oa_cerifJazz.updateSet("openaire_cris_orgunits",
                               "OpenAIRE_CRIS_orgunits")
    oai_oa_cerifJazz.updateSet("openaire_cris_persons",
                               "OpenAIRE_CRIS_persons")
    oai_oa_cerifJazz.updateSet("openaire_cris_patents",
                               "OpenAIRE_CRIS_patents")
    oai_oa_cerifJazz.updateSet("openaire_cris_products",
                               "OpenAIRE_CRIS_products")
    oai_oa_cerifJazz.updateSet("openaire_cris_publications",
                               "OpenAIRE_CRIS_publications")

    oai_oa_cerifJazz.updateSet("openaire_cris_funding",
                               "OpenAIRE_CRIS_funding")
    oai_oa_cerifJazz.updateSet("openaire_cris_events", "OpenAIRE_CRIS_events")
    oai_oa_cerifJazz.updateSet("openaire_cris_equipments",
                               "OpenAIRE_CRIS_equipments")

    cqlClauseConverters = [
        RenameFieldForExact(
            untokenizedFields=untokenizedFieldnames,
            untokenizedPrefix=UNTOKENIZED_PREFIX,
        ).filterAndModifier(),
        SearchTermFilterAndModifier(
            shouldModifyFieldValue=lambda *args: True,
            fieldnameModifier=fieldnameRewrite).filterAndModifier(),
    ]

    periodicGateWayDownload = PeriodicDownload(
        reactor,
        host='localhost',
        port=gatewayPort,
        schedule=Schedule(
            period=1 if quickCommit else 10
        ),  # WST: Interval in seconds before sending a new request to the GATEWAY in case of an error while processing batch records.(default=1). IntegrationTests need 1 second! Otherwise tests will fail!
        name='api',
        autoStart=True)

    oaiDownload = OaiDownloadProcessor(path='/oaix',
                                       metadataPrefix=NORMALISED_DOC_NAME,
                                       workingDirectory=join(
                                           statePath, 'harvesterstate',
                                           'gateway'),
                                       userAgentAddition='ApiServer',
                                       xWait=True,
                                       name='api',
                                       autoCommit=False)

    executeQueryHelix = \
        (FilterMessages(allowed=['executeQuery']),
            (CqlMultiSearchClauseConversion(cqlClauseConverters, fromKwarg='query'),
                (DrilldownQueries(),
                    (convertToComposedQuery,
                        (luceneRoHelix,),
                    )
                )
            )
        )

    return \
    (Observable(),
        createDownloadHelix(reactor, periodicGateWayDownload, oaiDownload, storage, oaiJazz, oai_oa_cerifJazz),
        (ObservableHttpServer(reactor, port, compressResponse=True),
            (BasicHttpHandler(),
                (PathFilter(["/oai"]),
                    (OaiPmh(repositoryName="NARCIS OAI-pmh", adminEmail="*****@*****.**", externalUrl="http://oai.narcis.nl"),
                        (oaiJazz,),
                        (StorageAdapter(),
                            (storage,)
                        ),
                        (OaiBranding(
                            url="http://www.narcis.nl/images/logos/logo-knaw-house.gif",
                            link="http://oai.narcis.nl",
                            title="Narcis - The gateway to scholarly information in The Netherlands"),
                        ),
                        (OaiProvenance(
                            nsMap=NAMESPACEMAP,
                            baseURL=('meta', '//meta:repository/meta:baseurl/text()'),
                            harvestDate=('meta', '//meta:record/meta:harvestdate/text()'),
                            metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'),
                            identifier=('header','//oai:identifier/text()'),
                            datestamp=('header', '//oai:datestamp/text()')
                            ),
                            (storage,)
                        )
                    )
                ),
                (PathFilter(["/cerif"]),
                    (OaiPmhDans(repositoryName="OpenAIRE CERIF", adminEmail="*****@*****.**", repositoryIdentifier="services.nod.dans.knaw.nl", externalUrl="http://services.nod.dans.knaw.nl"), #TODO: pathFilter should resemble proxy path
                        (oai_oa_cerifJazz,),
                        (StorageAdapter(),
                            (storage,)
                        ),
                        (OaiOpenAIREDescription(
                            serviceid='organisation:ORG1242054',
                            acronym='services.nod.dans.knaw.nl',
                            name='NARCIS',
                            description='Compliant with the OpenAIRE Guidelines for CRIS Managers v.1.1.',
                            website='https://www.narcis.nl',
                            baseurl='http://services.nod.dans.knaw.nl/oa-cerif',
                            subjectheading='',
                            orgunitid='organisation:ORG1242054',
                            owneracronym='DANS'),
                        ),
                        # (OaiBranding(
                        #     url="http://www.narcis.nl/images/logos/logo-knaw-house.gif",
                        #     link="http://oai.narcis.nl",
                        #     title="Narcis - The gateway to scholarly information in The Netherlands"),
                        # ),
                        (OaiProvenance(
                            nsMap=NAMESPACEMAP,
                            baseURL=('meta', '//meta:repository/meta:baseurl/text()'),
                            harvestDate=('meta', '//meta:record/meta:harvestdate/text()'),
                            metadataNamespace=('meta', '//meta:record/meta:metadataNamespace/text()'),
                            identifier=('header','//oai:identifier/text()'),
                            datestamp=('header', '//oai:datestamp/text()')
                            ),
                            (storage,)
                        )
                    )
                ),
                (PathFilter(['/sru']),
                    (SruParser(
                            host='sru.narcis.nl',
                            port=80,
                            defaultRecordSchema='knaw_short',
                            defaultRecordPacking='xml'),
                        (SruLimitStartRecord(limitBeyond=4000),
                            (SruHandler(
                                    includeQueryTimes=False,
                                    extraXParameters=[],
                                    enableCollectLog=False),
                                (SruTermDrilldown(),),
                                executeQueryHelix,
                                (StorageAdapter(),
                                    (storage,)
                                )
                            )
                        )
                    )
                ),
                (PathFilter('/rss'),
                    (Rss(   supportedLanguages = ['nl','en'], # defaults to first, if requested language is not available or supplied.
                            title = {'nl':'NARCIS', 'en':'NARCIS'},
                            description = {'nl':'NARCIS: De toegang tot de Nederlandse wetenschapsinformatie', 'en':'NARCIS: The gateway to Dutch scientific information'},
                            link = {'nl':'http://www.narcis.nl/?Language=nl', 'en':'http://www.narcis.nl/?Language=en'},
                            maximumRecords = 20),
                        executeQueryHelix,
                        (RssItem(
                                nsMap=NAMESPACEMAP,
                                title = ('knaw_short', {'nl':'//short:metadata/short:titleInfo[not (@xml:lang)]/short:title/text()', 'en':'//short:metadata/short:titleInfo[@xml:lang="en"]/short:title/text()'}),
                                description = ('knaw_short', {'nl':'//short:abstract[not (@xml:lang)]/text()', 'en':'//short:abstract[@xml:lang="en"]/text()'}),
                                pubdate = ('knaw_short', '//short:dateIssued/short:parsed/text()'),
                                linkTemplate = 'http://www.narcis.nl/%(wcpcollection)s/RecordID/%(oai_identifier)s/Language/%(language)s',
                                wcpcollection = ('meta', '//*[local-name() = "collection"]/text()'),
                                oai_identifier = ('meta', '//meta:record/meta:id/text()'),
                                language = ('Dummy: Language is auto provided by the calling RSS component, but needs to be present to serve the linkTemplate.')
                            ),
                            (StorageAdapter(),
                                (storage,)
                            )
                        )
                    )
                )
            )
        )
    )
Example #30
0
class SrwTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.srw = Srw()
        self.sruParser = SruParser()
        self.sruHandler = SruHandler()

        self.srw.addObserver(self.sruParser)
        self.sruParser.addObserver(self.sruHandler)
        self.response = Response(total=1, hits=[Hit('0')])

        def executeQuery(**kwargs):
            return self.response
            yield

        def retrieveData(**kwargs):
            return 'data'
            yield

        self.observer = CallTrace(methods={
            'executeQuery': executeQuery,
            'retrieveData': retrieveData
        },
                                  emptyGeneratorMethods=[
                                      'extraResponseData',
                                      'echoedExtraRequestData',
                                      'additionalDiagnosticDetails',
                                      'extraRecordData'
                                  ])
        self.sruHandler.addObserver(self.observer)

    def testNonSoap(self):
        """Wrong Soap envelope or body"""
        invalidSoapEnvelope = '<?xml version="1.0"?><SOAP:Envelope xmlns:SOAP="http://wrong.example.org/soap/envelope/"><SOAP:Body>%s</SOAP:Body></SOAP:Envelope>'
        request = invalidSoapEnvelope % SRW_REQUEST % argumentsWithMandatory % ""

        response = "".join(list(Srw().handleRequest(Body=request.encode())))
        self.assertEqualsWS(
            """HTTP/1.0 500 Internal Server Error
Content-Type: text/xml; charset=utf-8

<SOAP:Envelope xmlns:SOAP="http://schemas.xmlsoap.org/soap/envelope/"><SOAP:Body><SOAP:Fault><faultcode>SOAP:VersionMismatch</faultcode><faultstring>The processing party found an invalid namespace for the SOAP Envelope element</faultstring></SOAP:Fault></SOAP:Body></SOAP:Envelope>""",
            response)

    def testMalformedXML(self):
        """Stuff that is not even XML"""
        request = b'This is not even XML'

        response = "".join(self.srw.handleRequest(Body=request))
        self.assertTrue(
            '<faultcode>SOAP:Server.userException</faultcode>' in response)

    def testBadSrwRequest(self):
        request = soapEnvelope % """<srw:searchRetrieveRequest xmlns:srw="http://wrong.example.org/srw">
        <srw:version>1.2</srw:version>
        <srw:query>query</srw:query>
    </srw:searchRetrieveRequest>"""
        response = asString(self.srw.handleRequest(Body=request))
        header, body = response.split('\r\n\r\n')
        self.assertEqual(
            ['1'],
            xpath(XML(body),
                  '//srw:searchRetrieveResponse/srw:numberOfRecords/text()'))

    def testNonSRUArguments(self):
        """Arguments that are invalid in any SRU implementation"""
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % """<SRW:illegalParameter>value</SRW:illegalParameter>"""

        response = "".join(self.srw.handleRequest(Body=request))

        self.assertEqualsWS(
            httpResponse % soapEnvelope %
            """<srw:searchRetrieveResponse %(xmlns_srw)s %(xmlns_diag)s %(xmlns_xcql)s %(xmlns_dc)s %(xmlns_meresco_srw)s>
<srw:version>1.1</srw:version><srw:numberOfRecords>0</srw:numberOfRecords><srw:diagnostics><diagnostic xmlns="http://www.loc.gov/zing/srw/diagnostic/">
        <uri>info://srw/diagnostics/1/8</uri>
        <details>illegalParameter</details>
        <message>Unsupported Parameter</message>
    </diagnostic></srw:diagnostics></srw:searchRetrieveResponse>""" %
            namespaces, response)

    def testNonSRWArguments(self):
        """Arguments that are part of SRU, but not of SRW (operation (done), stylesheet)
        """
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % """<SRW:stylesheet>http://example.org/style.xsl</SRW:stylesheet>"""

        response = "".join(self.srw.handleRequest(Body=request))

        self.assertEqualsWS(
            httpResponse % soapEnvelope %
            """<srw:searchRetrieveResponse %(xmlns_srw)s %(xmlns_diag)s %(xmlns_xcql)s %(xmlns_dc)s %(xmlns_meresco_srw)s>
<srw:version>1.1</srw:version><srw:numberOfRecords>0</srw:numberOfRecords><srw:diagnostics><diagnostic xmlns="http://www.loc.gov/zing/srw/diagnostic/">
        <uri>info://srw/diagnostics/1/8</uri>
        <details>stylesheet</details>
        <message>Unsupported Parameter</message>
    </diagnostic></srw:diagnostics></srw:searchRetrieveResponse>""" %
            namespaces, response)

    def testOperationIsIllegal(self):
        request = soapEnvelope % SRW_REQUEST % """<SRW:version>1.1</SRW:version><SRW:operation>explain</SRW:operation>"""

        response = "".join(self.srw.handleRequest(Body=request))

        self.assertEqualsWS(
            httpResponse % soapEnvelope %
            """<srw:searchRetrieveResponse %(xmlns_srw)s %(xmlns_diag)s %(xmlns_xcql)s %(xmlns_dc)s %(xmlns_meresco_srw)s>
<srw:version>1.1</srw:version><srw:numberOfRecords>0</srw:numberOfRecords><srw:diagnostics><diagnostic xmlns="http://www.loc.gov/zing/srw/diagnostic/">
        <uri>info://srw/diagnostics/1/4</uri>
        <details>explain</details>
        <message>Unsupported Operation</message>
    </diagnostic></srw:diagnostics></srw:searchRetrieveResponse>""" %
            namespaces, response)

    def testContentType(self):
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % ''
        response = asString(self.srw.handleRequest(Body=request))
        self.assertTrue('text/xml; charset=utf-8' in response, response)

    def testNormalOperation(self):
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % ""
        self.response = Response(total=1, hits=[Hit('recordId')])
        del self.observer.methods['retrieveData']

        def retrieveData(identifier, name):
            return "<DATA>%s-%s</DATA>" % (identifier, name)
            yield

        self.observer.methods['retrieveData'] = retrieveData

        result = "".join(compose(self.srw.handleRequest(Body=request)))

        self.assertEqualsWS(
            httpResponse % soapEnvelope % wrappedMockAnswer %
            ('recordId', 'dc.author = "jones" and  dc.title = "smith"'),
            result)

    def testEmptySortKeys(self):
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % "<SRW:sortKeys/>"
        self.response = Response(total=0, hits=[])

        result = "".join(compose(self.srw.handleRequest(Body=request)))

        executeQueryKwargs = self.observer.calledMethods[0].kwargs
        self.assertFalse("sortKeys" in executeQueryKwargs, executeQueryKwargs)

    def testArgumentsAreNotUnicodeStrings(self):
        """JJ/TJ: unicode strings somehow paralyse server requests.
        So ensure every argument is a str!"""
        """KvS d.d. 2007/11/15 - is this true in the Component-context too?"""
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % ""
        component = Srw()
        arguments = component._soapXmlToArguments(request)
        for key in arguments:
            self.assertTrue(type(key) == str)

    def testExampleFromLibraryOffCongressSite(self):
        """testExampleFromLibraryOffCongressSite - Integration test based on http://www.loc.gov/standards/sru/srw/index.html
        spelling error ("recordSchema") corrected
        """
        request = """<SOAP:Envelope xmlns:SOAP="http://schemas.xmlsoap.org/soap/envelope/">
  <SOAP:Body>
    <SRW:searchRetrieveRequest xmlns:SRW="http://www.loc.gov/zing/srw/">
      <SRW:version>1.1</SRW:version>
      <SRW:query>dc.author = "jones" and  dc.title = "smith"</SRW:query>
      <SRW:startRecord>1</SRW:startRecord>
      <SRW:maximumRecords>10</SRW:maximumRecords>
      <SRW:recordSchema>info:srw/schema/1/mods-v3.0</SRW:recordSchema>
    </SRW:searchRetrieveRequest>
  </SOAP:Body>
</SOAP:Envelope>"""

        self.response = Response(total=1, hits=[Hit('recordId')])
        del self.observer.methods['retrieveData']

        def retrieveData(identifier, name):
            return "<DATA>%s-%s</DATA>" % (identifier, name)
            yield

        self.observer.methods['retrieveData'] = retrieveData
        response = "".join(compose(self.srw.handleRequest(Body=request)))

        echoRequest = """<srw:echoedSearchRetrieveRequest>
<srw:version>1.1</srw:version>
<srw:query>dc.author = "jones" and  dc.title = "smith"</srw:query>
<srw:startRecord>1</srw:startRecord>
<srw:maximumRecords>10</srw:maximumRecords>
<srw:recordPacking>xml</srw:recordPacking>
<srw:recordSchema>info:srw/schema/1/mods-v3.0</srw:recordSchema>
</srw:echoedSearchRetrieveRequest>"""

        self.assertEqualsWS(
            httpResponse % soapEnvelope % searchRetrieveResponse %
            (1,
             '<srw:records><srw:record><srw:recordSchema>info:srw/schema/1/mods-v3.0</srw:recordSchema><srw:recordPacking>xml</srw:recordPacking><srw:recordData><DATA>recordId-info:srw/schema/1/mods-v3.0</DATA></srw:recordData></srw:record></srw:records>'
             + echoRequest), response)

    @stderr_replaced
    def testConstructorVariablesAreUsed(self):
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % ""
        srw = Srw(defaultRecordSchema="DEFAULT_RECORD_SCHEMA",
                  defaultRecordPacking="DEFAULT_RECORD_PACKING")
        sruParser = SruParser()
        srw.addObserver(sruParser)
        sruParser.addObserver(self.sruHandler)
        response = Response(total=1, hits=[Hit(1)])

        def executeQuery(**kwargs):
            return response
            yield

        @asyncnoreturnvalue
        def methodAsGenerator(**kwargs):
            pass

        observer = CallTrace(methods={
            'executeQuery': executeQuery,
            'extraResponseData': methodAsGenerator,
            'echoedExtraRequestData': methodAsGenerator,
        },
                             emptyGeneratorMethods=[
                                 'additionalDiagnosticDetails',
                                 'extraRecordData',
                             ])

        self.sruHandler.addObserver(observer)
        response = "".join(compose(srw.handleRequest(Body=request)))
        self.assertTrue("DEFAULT_RECORD_SCHEMA" in response, response)
        self.assertTrue("DEFAULT_RECORD_PACKING" in response, response)
Example #31
0
class SrwTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.srw = Srw()
        self.sruParser = SruParser()
        self.sruHandler = SruHandler()

        self.srw.addObserver(self.sruParser)
        self.sruParser.addObserver(self.sruHandler)
        self.response = StopIteration(Response(total=1, hits=[Hit('0')]))
        def executeQuery(**kwargs):
            raise self.response
            yield
        def retrieveData(**kwargs):
            raise StopIteration('data')
            yield
        self.observer = CallTrace(
            methods={
                'executeQuery': executeQuery,
                'retrieveData': retrieveData
            },
            emptyGeneratorMethods=[
                'extraResponseData',
                'echoedExtraRequestData',
                'additionalDiagnosticDetails',
                'extraRecordData'
            ])
        self.sruHandler.addObserver(self.observer)

    def testNonSoap(self):
        """Wrong Soap envelope or body"""
        invalidSoapEnvelope = '<?xml version="1.0"?><SOAP:Envelope xmlns:SOAP="http://wrong.example.org/soap/envelope/"><SOAP:Body>%s</SOAP:Body></SOAP:Envelope>'
        request = invalidSoapEnvelope % SRW_REQUEST % argumentsWithMandatory % ""

        response = "".join(list(Srw().handleRequest(Body=request)))
        self.assertEqualsWS("""HTTP/1.0 500 Internal Server Error
Content-Type: text/xml; charset=utf-8

<SOAP:Envelope xmlns:SOAP="http://schemas.xmlsoap.org/soap/envelope/"><SOAP:Body><SOAP:Fault><faultcode>SOAP:VersionMismatch</faultcode><faultstring>The processing party found an invalid namespace for the SOAP Envelope element</faultstring></SOAP:Fault></SOAP:Body></SOAP:Envelope>""", response)

    def testMalformedXML(self):
        """Stuff that is not even XML"""
        request = 'This is not even XML'

        response = "".join(self.srw.handleRequest(Body=request))
        self.assertTrue('<faultcode>SOAP:Server.userException</faultcode>' in response)

    def testBadSrwRequest(self):
        request = soapEnvelope % """<srw:searchRetrieveRequest xmlns:srw="http://wrong.example.org/srw">
        <srw:version>1.2</srw:version>
        <srw:query>query</srw:query>
    </srw:searchRetrieveRequest>"""
        response = asString(self.srw.handleRequest(Body=request))
        header, body = response.split('\r\n\r\n')
        self.assertEquals(['1'], xpath(XML(body), '//srw:searchRetrieveResponse/srw:numberOfRecords/text()'))

    def testNonSRUArguments(self):
        """Arguments that are invalid in any SRU implementation"""
        request =  soapEnvelope % SRW_REQUEST % argumentsWithMandatory % """<SRW:illegalParameter>value</SRW:illegalParameter>"""

        response = "".join(self.srw.handleRequest(Body=request))

        self.assertEqualsWS(httpResponse % soapEnvelope % """<srw:searchRetrieveResponse %(xmlns_srw)s %(xmlns_diag)s %(xmlns_xcql)s %(xmlns_dc)s %(xmlns_meresco_srw)s>
<srw:version>1.1</srw:version><srw:numberOfRecords>0</srw:numberOfRecords><srw:diagnostics><diagnostic xmlns="http://www.loc.gov/zing/srw/diagnostic/">
        <uri>info://srw/diagnostics/1/8</uri>
        <details>illegalParameter</details>
        <message>Unsupported Parameter</message>
    </diagnostic></srw:diagnostics></srw:searchRetrieveResponse>""" % namespaces, response)

    def testNonSRWArguments(self):
        """Arguments that are part of SRU, but not of SRW (operation (done), stylesheet)
        """
        request =  soapEnvelope % SRW_REQUEST % argumentsWithMandatory % """<SRW:stylesheet>http://example.org/style.xsl</SRW:stylesheet>"""

        response = "".join(self.srw.handleRequest(Body=request))

        self.assertEqualsWS(httpResponse % soapEnvelope % """<srw:searchRetrieveResponse %(xmlns_srw)s %(xmlns_diag)s %(xmlns_xcql)s %(xmlns_dc)s %(xmlns_meresco_srw)s>
<srw:version>1.1</srw:version><srw:numberOfRecords>0</srw:numberOfRecords><srw:diagnostics><diagnostic xmlns="http://www.loc.gov/zing/srw/diagnostic/">
        <uri>info://srw/diagnostics/1/8</uri>
        <details>stylesheet</details>
        <message>Unsupported Parameter</message>
    </diagnostic></srw:diagnostics></srw:searchRetrieveResponse>""" % namespaces, response)

    def testOperationIsIllegal(self):
        request = soapEnvelope % SRW_REQUEST % """<SRW:version>1.1</SRW:version><SRW:operation>explain</SRW:operation>"""

        response = "".join(self.srw.handleRequest(Body=request))

        self.assertEqualsWS(httpResponse % soapEnvelope % """<srw:searchRetrieveResponse %(xmlns_srw)s %(xmlns_diag)s %(xmlns_xcql)s %(xmlns_dc)s %(xmlns_meresco_srw)s>
<srw:version>1.1</srw:version><srw:numberOfRecords>0</srw:numberOfRecords><srw:diagnostics><diagnostic xmlns="http://www.loc.gov/zing/srw/diagnostic/">
        <uri>info://srw/diagnostics/1/4</uri>
        <details>explain</details>
        <message>Unsupported Operation</message>
    </diagnostic></srw:diagnostics></srw:searchRetrieveResponse>""" % namespaces, response)

    def testContentType(self):
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % ''
        response = asString(self.srw.handleRequest(Body=request))
        self.assertTrue('text/xml; charset=utf-8' in response, response)

    def testNormalOperation(self):
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % ""
        self.response = StopIteration(Response(total=1, hits=[Hit('recordId')]))
        del self.observer.methods['retrieveData']
        def retrieveData(identifier, name):
            raise StopIteration("<DATA>%s-%s</DATA>" % (identifier, name))
            yield
        self.observer.methods['retrieveData'] = retrieveData

        result = "".join(compose(self.srw.handleRequest(Body=request)))

        self.assertEqualsWS(httpResponse % soapEnvelope % wrappedMockAnswer % ('recordId', 'dc.author = "jones" and  dc.title = "smith"'), result)

    def testEmptySortKeys(self):
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % "<SRW:sortKeys/>"
        self.response = StopIteration(Response(total=0, hits=[]))

        result = "".join(compose(self.srw.handleRequest(Body=request)))

        executeQueryKwargs = self.observer.calledMethods[0].kwargs
        self.assertFalse("sortKeys" in executeQueryKwargs, executeQueryKwargs)

    def testArgumentsAreNotUnicodeStrings(self):
        """JJ/TJ: unicode strings somehow paralyse server requests.
        So ensure every argument is a str!"""
        """KvS d.d. 2007/11/15 - is this true in the Component-context too?"""
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % ""
        component = Srw()
        arguments = component._soapXmlToArguments(request)
        for key in arguments:
            self.assertTrue(type(key) == str)

    def testExampleFromLibraryOffCongressSite(self):
        """testExampleFromLibraryOffCongressSite - Integration test based on http://www.loc.gov/standards/sru/srw/index.html
        spelling error ("recordSchema") corrected
        """
        request = """<SOAP:Envelope xmlns:SOAP="http://schemas.xmlsoap.org/soap/envelope/">
  <SOAP:Body>
    <SRW:searchRetrieveRequest xmlns:SRW="http://www.loc.gov/zing/srw/">
      <SRW:version>1.1</SRW:version>
      <SRW:query>dc.author = "jones" and  dc.title = "smith"</SRW:query>
      <SRW:startRecord>1</SRW:startRecord>
      <SRW:maximumRecords>10</SRW:maximumRecords>
      <SRW:recordSchema>info:srw/schema/1/mods-v3.0</SRW:recordSchema>
    </SRW:searchRetrieveRequest>
  </SOAP:Body>
</SOAP:Envelope>"""

        self.response = StopIteration(Response(total=1, hits=[Hit('recordId')]))
        del self.observer.methods['retrieveData']
        def retrieveData(identifier, name):
            raise StopIteration("<DATA>%s-%s</DATA>" % (identifier, name))
            yield
        self.observer.methods['retrieveData'] = retrieveData
        response = "".join(compose(self.srw.handleRequest(Body=request)))

        echoRequest = """<srw:echoedSearchRetrieveRequest>
<srw:version>1.1</srw:version>
<srw:query>dc.author = "jones" and  dc.title = "smith"</srw:query>
<srw:startRecord>1</srw:startRecord>
<srw:maximumRecords>10</srw:maximumRecords>
<srw:recordPacking>xml</srw:recordPacking>
<srw:recordSchema>info:srw/schema/1/mods-v3.0</srw:recordSchema>
</srw:echoedSearchRetrieveRequest>"""

        self.assertEqualsWS(httpResponse % soapEnvelope % searchRetrieveResponse % (1, '<srw:records><srw:record><srw:recordSchema>info:srw/schema/1/mods-v3.0</srw:recordSchema><srw:recordPacking>xml</srw:recordPacking><srw:recordData><DATA>recordId-info:srw/schema/1/mods-v3.0</DATA></srw:recordData></srw:record></srw:records>' +echoRequest), response)

    @stderr_replaced
    def testConstructorVariablesAreUsed(self):
        request = soapEnvelope % SRW_REQUEST % argumentsWithMandatory % ""
        srw = Srw(
            defaultRecordSchema="DEFAULT_RECORD_SCHEMA",
            defaultRecordPacking="DEFAULT_RECORD_PACKING")
        sruParser = SruParser()
        srw.addObserver(sruParser)
        sruParser.addObserver(self.sruHandler)
        response = Response(total=1, hits=[Hit(1)])
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        @asyncnoreturnvalue
        def methodAsGenerator(**kwargs):
            pass
        observer = CallTrace(
            methods={
                'executeQuery': executeQuery,
                'extraResponseData': methodAsGenerator,
                'echoedExtraRequestData': methodAsGenerator,
            },
            emptyGeneratorMethods=[
                'additionalDiagnosticDetails',
                'extraRecordData',
            ])

        self.sruHandler.addObserver(observer)
        response = "".join(compose(srw.handleRequest(Body=request)))
        self.assertTrue("DEFAULT_RECORD_SCHEMA" in response, response)
        self.assertTrue("DEFAULT_RECORD_PACKING" in response, response)
        kwargs = sruHandlerKwargs(x_term_drilldown='field0:1')
        observer.calledMethods.reset()
        try:
            ''.join(compose(sruHandler.searchRetrieve(**kwargs)))
        except KeyboardInterrupt, e:
            self.assertEquals('Ok', str(e))
        else:
            self.fail('Should not come here')

        self.assertEquals(['executeQuery'], observer.calledMethodNames())
        self.assertEquals([dict(fieldname='field0', maxTerms=1, sortBy=DRILLDOWN_SORTBY_COUNT)], observer.calledMethods[0].kwargs['facets'])

        # Too high
        kwargs = sruHandlerKwargs(x_term_drilldown='field0:4')

        sruHandler = SruHandler(drilldownMaximumMaximumResults=drilldownMaximumMaximumResults)
        observer = CallTrace('observer')
        sruHandler.addObserver(observer)
        def executeQuery(**kwargs):
            raise KeyboardInterrupt('Should have failed before triggering this exception!')
            yield
        observer.methods['executeQuery'] = executeQuery

        try:
            ''.join(compose(sruHandler.searchRetrieve(**kwargs)))
        except SruException, e:
            self.assertEquals('field0; drilldown with maximumResults > 3', str(e))
        except KeyboardInterrupt, e:
            self.fail(str(e))
        else:
            self.fail('Should not come here')
Example #33
0
def main(reactor, port, databasePath):
    drilldownFields = [
        DrilldownField('untokenized.field2'),
        DrilldownField('untokenized.fieldHier', hierarchical=True)
    ]

    fieldRegistry = FieldRegistry(drilldownFields)
    luceneSettings = LuceneSettings(fieldRegistry=fieldRegistry,
                                    commitCount=30,
                                    commitTimeout=1,
                                    analyzer=MerescoDutchStemmingAnalyzer())
    lucene = Lucene(path=join(databasePath, 'lucene'),
                    reactor=reactor,
                    name='main',
                    settings=luceneSettings)

    lucene2Settings = LuceneSettings(fieldRegistry=fieldRegistry,
                                     commitTimeout=0.1)
    lucene2 = Lucene(path=join(databasePath, 'lucene2'),
                     reactor=reactor,
                     name='main2',
                     settings=lucene2Settings)

    termNumerator = TermNumerator(path=join(databasePath, 'termNumerator'))

    emptyLuceneSettings = LuceneSettings(commitTimeout=1)
    multiLuceneHelix = (
        MultiLucene(defaultCore='main'),
        (Lucene(path=join(databasePath, 'lucene-empty'),
                reactor=reactor,
                name='empty-core',
                settings=emptyLuceneSettings), ),
        (lucene, ),
        (lucene2, ),
    )
    storageComponent = StorageComponent(
        directory=join(databasePath, 'storage'))

    return \
    (Observable(),
        (ObservableHttpServer(reactor=reactor, port=port),
            (BasicHttpHandler(),
                (ApacheLogger(outputStream=stdout),
                    (PathFilter("/info", excluding=[
                            '/info/version',
                            '/info/name',
                            '/update',
                            '/sru',
                            '/remote',
                            '/via-remote-sru',
                        ]),
                        (DynamicHtml(
                                [dynamicPath],
                                reactor=reactor,
                                indexPage='/info',
                                additionalGlobals={
                                    'VERSION': version,
                                }
                            ),
                        )
                    ),
                    (PathFilter("/info/version"),
                        (StringServer(version, ContentTypePlainText), )
                    ),
                    (PathFilter("/info/name"),
                        (StringServer('Meresco Lucene', ContentTypePlainText),)
                    ),
                    (PathFilter("/static"),
                        (PathRename(lambda path: path[len('/static'):]),
                            (FileServer(staticPath),)
                        )
                    ),
                    (PathFilter("/update_main", excluding=['/update_main2']),
                        uploadHelix(lucene, termNumerator, storageComponent, drilldownFields, fieldRegistry=luceneSettings.fieldRegistry),
                    ),
                    (PathFilter("/update_main2"),
                        uploadHelix(lucene2, termNumerator, storageComponent, drilldownFields, fieldRegistry=lucene2Settings.fieldRegistry),
                    ),
                    (PathFilter('/sru'),
                        (SruParser(defaultRecordSchema='record'),
                            (SruHandler(),
                                (MultiCqlToLuceneQuery(
                                    defaultCore='main',
                                    coreToCqlLuceneQueries={
                                        "main": CqlToLuceneQuery([], luceneSettings=luceneSettings),
                                        "main2": CqlToLuceneQuery([], luceneSettings=lucene2Settings),
                                        "empty-core": CqlToLuceneQuery([], luceneSettings=emptyLuceneSettings),
                                    }),
                                    multiLuceneHelix,
                                ),
                                (SRUTermDrilldown(defaultFormat='xml'),),
                                (SruDuplicateCount(),),
                                (storageComponent,),
                            )
                        )
                    ),
                    (PathFilter('/via-remote-sru'),
                        (SruParser(defaultRecordSchema='record'),
                            (SruHandler(),
                                (LuceneRemote(host='localhost', port=port, path='/remote'),),
                                (SRUTermDrilldown(defaultFormat='xml'),),
                                (SruDuplicateCount(),),
                                (storageComponent,),
                            )
                        )
                    ),
                    (PathFilter('/remote'),
                        (LuceneRemoteService(reactor=reactor),
                            (MultiCqlToLuceneQuery(
                                    defaultCore='main',
                                    coreToCqlLuceneQueries={
                                        "main": CqlToLuceneQuery([], luceneSettings=luceneSettings),
                                        "main2": CqlToLuceneQuery([], luceneSettings=lucene2Settings),
                                        "empty-core": CqlToLuceneQuery([], luceneSettings=emptyLuceneSettings),
                                    }),
                                multiLuceneHelix,
                            )
                        )
                    ),
                    (PathFilter('/autocomplete'),
                        (Autocomplete('localhost', port, '/autocomplete', '__all__', '?', 5, '?', '?'),
                            (lucene,),
                        )
                    )
                )
            )
        )
    )
    def testSearchRetrieveVersion11(self):
        queryArguments = {'version':'1.1', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2}
        sruArguments = {'version':'1.1', 'operation':'searchRetrieve',  'recordSchema':'schema', 'recordPacking':'xml', 'query':'field=value', 'startRecord':1, 'maximumRecords':2, 'x-recordSchema':['extra', 'evenmore']}

        observer = CallTrace()
        response = Response(total=100, hits=hitsRange(11, 13))
        def executeQuery(**kwargs):
            raise StopIteration(response)
            yield
        observer.methods['executeQuery'] = executeQuery

        retrieveDataCalls = []
        def retrieveData(identifier, name):
            retrieveDataCalls.append(1)
            raise StopIteration("<MOCKED_WRITTEN_DATA>%s-%s</MOCKED_WRITTEN_DATA>" % (xmlEscape(identifier), name))
            yield
        observer.retrieveData = retrieveData

        observer.methods['extraResponseData'] = lambda *a, **kw: (x for x in 'extraResponseData')
        observer.methods['echoedExtraRequestData'] = lambda *a, **kw: (x for x in 'echoedExtraRequestData')
        observer.methods['extraRecordData'] = lambda hit: (f for f in [])

        component = SruHandler()
        component.addObserver(observer)

        result = "".join(compose(component.searchRetrieve(sruArguments=sruArguments, **queryArguments)))

        self.assertEqualsWS("""
<srw:searchRetrieveResponse %(xmlns_srw)s %(xmlns_diag)s %(xmlns_xcql)s %(xmlns_dc)s %(xmlns_meresco_srw)s>
    <srw:version>1.1</srw:version>
    <srw:numberOfRecords>100</srw:numberOfRecords>
    <srw:records>
        <srw:record>
            <srw:recordSchema>schema</srw:recordSchema>
            <srw:recordPacking>xml</srw:recordPacking>
            <srw:recordData>
                <MOCKED_WRITTEN_DATA>11-schema</MOCKED_WRITTEN_DATA>
            </srw:recordData>
            <srw:extraRecordData>
                <srw:record>
                    <srw:recordSchema>extra</srw:recordSchema>
                    <srw:recordPacking>xml</srw:recordPacking>
                    <srw:recordData>
                    <MOCKED_WRITTEN_DATA>11-extra</MOCKED_WRITTEN_DATA>
                    </srw:recordData>
                </srw:record>
                <srw:record>
                    <srw:recordSchema>evenmore</srw:recordSchema>
                    <srw:recordPacking>xml</srw:recordPacking>
                    <srw:recordData>
                    <MOCKED_WRITTEN_DATA>11-evenmore</MOCKED_WRITTEN_DATA>
                    </srw:recordData>
                </srw:record>
            </srw:extraRecordData>
        </srw:record>
        <srw:record>
            <srw:recordSchema>schema</srw:recordSchema>
            <srw:recordPacking>xml</srw:recordPacking>
            <srw:recordData>
                <MOCKED_WRITTEN_DATA>12-schema</MOCKED_WRITTEN_DATA>
            </srw:recordData>
            <srw:extraRecordData>
                <srw:record>
                    <srw:recordSchema>extra</srw:recordSchema>
                    <srw:recordPacking>xml</srw:recordPacking>
                    <srw:recordData>
                    <MOCKED_WRITTEN_DATA>12-extra</MOCKED_WRITTEN_DATA>
                    </srw:recordData>
                </srw:record>
                <srw:record>
                    <srw:recordSchema>evenmore</srw:recordSchema>
                    <srw:recordPacking>xml</srw:recordPacking>
                    <srw:recordData>
                    <MOCKED_WRITTEN_DATA>12-evenmore</MOCKED_WRITTEN_DATA>
                    </srw:recordData>
                </srw:record>
            </srw:extraRecordData>
        </srw:record>
    </srw:records>
    <srw:nextRecordPosition>3</srw:nextRecordPosition>
    <srw:echoedSearchRetrieveRequest>
        <srw:version>1.1</srw:version>
        <srw:query>field=value</srw:query>
        <srw:startRecord>1</srw:startRecord>
        <srw:maximumRecords>2</srw:maximumRecords>
        <srw:recordPacking>xml</srw:recordPacking>
        <srw:recordSchema>schema</srw:recordSchema>
        <srw:x-recordSchema>extra</srw:x-recordSchema>
        <srw:x-recordSchema>evenmore</srw:x-recordSchema>
        <srw:extraRequestData>echoedExtraRequestData</srw:extraRequestData>
    </srw:echoedSearchRetrieveRequest>
    <srw:extraResponseData>extraResponseData</srw:extraResponseData>
</srw:searchRetrieveResponse>
""" % namespaces, result)