def testReindex(self): header, body = getRequest(self.portNumber, '/reindex', {'session': 'newReindex'}, parse=False) self.assertEquals("#\n=batches: 1", body) header, body = getRequest(self.portNumber, '/reindex', {'session': 'newReindex'}, parse=False) lines = body.split('\n') self.assertEquals('=batches left: 0', lines[-1]) self.assertTrue('+http:%2F%2Flocalhost:'+str(self.portNumber)+'%2Fresolve%2Furn%253Aex%253AAnno' in lines, lines)
def testAdmin(self): headers, body = getRequest(self.portNumber, "/login", parse='lxml') cookie = parseHeaders(headers)['Set-Cookie'] headers, body = postRequest(self.portNumber, '/login.action', urlencode(dict(username="******", password="******", formUrl='/login')), parse='lxml', additionalHeaders={'Cookie': cookie}) self.assertTrue('302' in headers, headers) self.assertEquals('/', parseHeaders(headers)['Location']) headers, body = getRequest(self.portNumber, "/index", parse='lxml', additionalHeaders={'Cookie': cookie}) self.assertEquals(['Logged in as: admin | ', ' | ', ' | ', ' | '], xpath(body, '//div[@id="loginbar"]/p/text()')) headers, body = getRequest(self.portNumber, "/changepassword", parse='lxml', additionalHeaders={'Cookie': cookie}) self.assertEquals(['admin'], xpath(body, '/html/body/div[@id="content"]/div[@id="login"]/form/input[@type="hidden" and @name="username"]/@value'), tostring(body)) self.assertEquals(['oldPassword', 'newPassword', 'retypedPassword'], xpath(body, '/html/body/div[@id="content"]/div[@id="login"]/form/dl/dd/input[@type="password"]/@name'), tostring(body)) self.assertEquals(['/login.action/changepassword'], xpath(body, '/html/body/div[@id="content"]/div[@id="login"]/form/@action')) headers, body = postRequest(self.portNumber, '/login.action/changepassword', urlencode(dict(username="******", oldPassword="******", newPassword="******", retypedPassword="******", formUrl="/changepassword")), parse='lxml', additionalHeaders={'Cookie': cookie}) self.assertTrue('302' in headers, headers) self.assertEquals('/', parseHeaders(headers)['Location']) # Test new password headers, body = getRequest(self.portNumber, "/login", parse='lxml') newcookie = parseHeaders(headers)['Set-Cookie'] headers, body = postRequest(self.portNumber, '/login.action', urlencode(dict(username="******", password="******", formUrl='/login')), parse='lxml', additionalHeaders={'Cookie': newcookie}) self.assertTrue('302' in headers, headers) self.assertEquals('/login', parseHeaders(headers)['Location']) headers, body = postRequest(self.portNumber, '/login.action', urlencode(dict(username="******", password="******", formUrl='/login')), parse='lxml', additionalHeaders={'Cookie': newcookie}) self.assertTrue('302' in headers, headers) self.assertEquals('/', parseHeaders(headers)['Location'])
def testClear(self): self.startHarvester(repository=REPOSITORY) self.assertEqual(BATCHSIZE, self.sizeDumpDir()) header, data = getRequest(self.harvesterInternalServerPortNumber, '/get', { 'verb': 'GetStatus', 'domainId': DOMAIN, 'repositoryId': REPOSITORY }) self.assertEqual(8, data['response']['GetStatus'][0]['total']) self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='clear') self.startHarvester(repository=REPOSITORY) self.assertEqual(18, self.sizeDumpDir()) for filename in sorted(listdir(self.dumpDir))[-8:]: self.assertTrue('_delete.updateRequest' in filename, filename) header, data = getRequest(self.harvesterInternalServerPortNumber, '/get', { 'verb': 'GetStatus', 'domainId': DOMAIN, 'repositoryId': REPOSITORY }) self.assertEqual(0, data['response']['GetStatus'][0]['total'])
def testIndexingState(self): header, body = getRequest(self.suggestionServerPort, '/indexingState', parse=False) self.assertTrue("200 OK" in header.upper(), header + body) self.assertTrue("Content-Type: application/json" in header, header) self.assertEqual("{}", body) postRequest(self.suggestionServerPort, '/createSuggestionNGramIndex', data=None, parse=False) header, body = getRequest(self.suggestionServerPort, '/indexingState', parse=False) self.assertTrue("200 OK" in header.upper(), header + body) self.assertTrue("Content-Type: application/json" in header, header) self.assertNotEqual("{}", body) self.assertTrue("started" in loads(body), body) self.assertTrue("count" in loads(body), body)
def testOacBodiesStored(self): headers, body = getRequest(self.portNumber, "/sru", arguments=dict( version="1.1", operation="searchRetrieve", query="IamUnique42"), parse='lxml') oacBody = xpath(body, "/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/rdf:RDF/oa:Annotation/oa:hasBody/oa:Body")[0] about = getAttrib(oacBody, "rdf:about") _,_,path,_,_ = urlsplit(about) headers, body = getRequest(self.portNumber, path, parse=False) self.assertTrue('200' in headers, headers) lines = body.split('\n') self.assertEquals('<?xml version="1.0" encoding="utf-8"?>', lines[0]) self.assertEquals('<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">', lines[1]) self.assertEquals('</rdf:RDF>', lines[-1])
def testAutocompleteWithSuggestionIndexComponent(self): data = """{ "key": 1, "values": ["harry"], "types": ["uri:book"], "creators": ["rowling"] }""" postRequest(self.suggestionServerPort, '/add?identifier=id1', data=data, parse=False) data = """{ "key": 2, "values": ["hallo"], "types": ["uri:ebook"], "creators": [null] }""" postRequest(self.suggestionServerPort, '/add?identifier=id2', data=data, parse=False) try: postRequest(self.suggestionServerPort, '/createSuggestionNGramIndex?wait=True', data=None, parse=False) header, body = getRequest(port=self.httpPort, path='/suggestion', arguments={'value': 'ha'}, parse=False) self.assertEqual(["ha", ["harry", "hallo"]], loads(body)) header, body = getRequest(port=self.httpPort, path='/suggestion', arguments={ 'value': 'ha', "filter": "type=uri:book" }, parse=False) self.assertEqual(["ha", ["harry"]], loads(body)) finally: postRequest(self.suggestionServerPort, '/delete?identifier=id1', data=None, parse=False) postRequest(self.suggestionServerPort, '/delete?identifier=id2', data=None, parse=False) postRequest(self.suggestionServerPort, '/commit', data=None, parse=False)
def testGetDomains(self): header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetDomainIds'}, parse=False) data = loads(result) self.assertEqual(1, len(data['response']['GetDomainIds'])) self.assertEqual(['adomain'], data['response']['GetDomainIds'])
def testRssForHarvesterStatus(self): self.controlHelper(action="noneInvalid") self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/rss', { 'domainId': 'adomain', 'repositoryId': 'integrationtest' }, parse='lxml') self.assertEqual("Harvester status voor integrationtest", xpath(result, "/rss/channel/title/text()")[0]) self.assertEqual( "Recente repository harvest status voor integrationtest in adomain", xpath(result, "/rss/channel/description/text()")[0]) self.assertEqual( "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest", xpath(result, "/rss/channel/link/text()")[0]) self.assertEqual(str(60 * 6), xpath(result, "/rss/channel/ttl/text()")[0]) self.assertEqual("Harvester status voor integrationtest", xpath(result, "/rss/channel/item[1]/title/text()")[0]) description = xpath(result, "/rss/channel/item[1]/description/text()")[0] self.assertTrue("Last harvest date: " in description, description) self.assertTrue("Total records: 8" in description, description) self.assertTrue("Harvested records: 10" in description, description) self.assertTrue("Uploaded records: 8" in description, description) self.assertTrue("Deleted records: 2" in description, description) self.assertTrue("Validation errors: 0" in description, description) self.assertTrue("Errors: 0" in description, description) self.assertEqual( "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest", xpath(result, "/rss/channel/item[1]/link/text()")[0])
def testRssForStatusChangesOk(self): self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/running.rss', {'domainId': 'adomain'}, parse='lxml') self.assertEqual("Harvest status changes for domain 'adomain'", xpath(result, "/rss/channel/title/text()")[0]) self.assertEqual("Status changes per repository for domain 'adomain'", xpath(result, "/rss/channel/description/text()")[0]) self.assertEqual( "http://localhost:9999/showHarvesterStatus?domainId=adomain", xpath(result, "/rss/channel/link/text()")[0]) self.assertEqual(str(60 * 6), xpath(result, "/rss/channel/ttl/text()")[0]) TODAY = strftime("%Y-%m-%d", gmtime()) items = xpath(result, "/rss/channel/item") self.assertEqual(1, len(items)) self.assertEqual("integrationtest: Ok", ''.join(xpath(items[0], "title/text()"))) description = ''.join(xpath(items[0], "description/text()")) self.assertTrue(description.startswith("Harvest time: %s" % TODAY), description) self.assertEqual('integrationtest:%s' % TODAY, ''.join(xpath(items[0], "guid/text()")).split('T')[0]) self.assertEqual( "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest", xpath(items[0], "link/text()")[0])
def testGetStatusForDomain(self): self.controlHelper(action='allInvalid') self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': 'adomain'}, parse=False) data = JsonDict.loads(result) self.assertEquals(2, len(data['response']['GetStatus'])) self.assertEquals("adomain", data['request']['domainId'])
def doSruQuery(self, query, maximumRecords=None, startRecord=None, sortKeys=None, facet=None, path='/sru', drilldownFormat='xml'): arguments = { 'version': '1.2', 'operation': 'searchRetrieve', 'query': query, } if maximumRecords is not None: arguments['maximumRecords'] = maximumRecords if startRecord is not None: arguments['startRecord'] = startRecord if sortKeys is not None: arguments["sortKeys"] = sortKeys if facet is not None: arguments["x-term-drilldown"] = facet arguments['x-drilldown-format'] = drilldownFormat header, body = getRequest(port=self.httpPort, path=path, arguments=arguments) return body
def testViewInvalidRecord(self): self.controlHelper(action='allInvalid') self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/invalidRecord', {'domainId': 'adomain', 'repositoryId': 'integrationtest', 'recordId': 'oai:record:01'}, parse='lxml') self.assertEquals("Repository integrationtest - Record oai:record:01", result.xpath("//h3/text()")[0]) self.assertEquals("/page/invalid/?domainId=adomain&repositoryId=integrationtest", result.xpath("/div/p/a/@href")[0]) self.assertEquals(["Invalid data"], result.xpath("/div/pre/text()"))
def testOaixInfo(self): header, body = getRequest(self.gatewayPort, '/oaix/info/index') self.assertEqual( 'HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8', header) # print "testOaixInfo:", etree.tostring(body) self.assertTrue('normdoc' in etree.tostring(body))
def testRemoteInfoCore(self): header, body = getRequest(port=self.httpPort, path='/remote/info/core', arguments=dict(name='main'), parse=False) self.assertFalse('Traceback' in body, body) # only tested for MultiLucene situation for now! bodyLxml = HTML(body) lists = bodyLxml.xpath('//ul') fieldList = lists[0] fields = fieldList.xpath('li/a/text()') self.assertEquals(19, len(fields)) self.assertEqual([ '$facets', '__id__', '__key__.field', 'copy', 'field1', 'field2', 'field3', 'field4', 'field5', 'field_missing', 'intfield1', 'intfield2', 'intfield3', 'intfield_missing', 'sorted.field2', 'sorted.field4', 'sorted.intfield1', 'sorted.intfield_missing', 'untokenized.field3', ], fields) drilldownFieldList = lists[1] drilldownFields = drilldownFieldList.xpath('li/a/text()') self.assertEquals(set(['untokenized.field2', 'untokenized.fieldHier', 'untokenized.field2.copy']), set(drilldownFields))
def testRemoteInfoField(self): header, body = getRequest(port=self.httpPort, path='/remote/info/field', arguments=dict(fieldname='__id__', name='main'), parse=False) self.assertEquals(50, body.count(': 1'), body)
def assertQuery(self, query, count): headers, body = getRequest(self.portNumber, "/sru", arguments=dict( version="1.1", operation="searchRetrieve", query=query), parse='lxml') recordCount = int(xpath(body, '/srw:searchRetrieveResponse/srw:numberOfRecords/text()')[0]) if recordCount != count: print tostring(body) self.assertEquals(count, recordCount)
def testProvenanceMetaDataNamespace(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="ListRecords", metadataPrefix='metadata')) # print "testProvenanceMetaDataNamespace:", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual(18, len(xpath(body, "//oai:ListRecords/oai:record"))) for provNamespace in testNamespaces.xpath(body, "//oaiprov:originDescription/oaiprov:metadataNamespace/text()"): self.assertTrue('didl' in provNamespace)
def testOaiIdentify(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="Identify")) # print "OAI Identify:", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual('Gemeenschappelijke Metadata Harvester DANS-KB', xpathFirst(body, '//oai:Identify/oai:repositoryName/text()')) self.assertEqual('*****@*****.**', xpathFirst(body, '//oai:Identify/oai:adminEmail/text()')) self.assertEqual('Gemeenschappelijke Metadata Harvester (GMH) van DANS en de KB', testNamespaces.xpathFirst(body, '//oai:Identify/oai:description/oaibrand:branding/oaibrand:collectionIcon/oaibrand:title/text()'))
def _doQuery(self, query, path=None, additionalHeaders=None, statusCode='200'): path = path or '/sru' queryArguments = {'query': query, 'version': '1.2', 'operation': 'searchRetrieve'} header, body = getRequest(self.erfGeoEnrichmentPort, path, queryArguments, parse=False, additionalHeaders=additionalHeaders) self.assertTrue(statusCode in header.split('\r\n', 1)[0]) bodyLxml = XML(body) return bodyLxml
def testListInvalidRecordsForOneRepository(self): self.controlHelper(action='allInvalid') self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/invalid', {'domainId': 'adomain', 'repositoryId': 'integrationtest'}, parse='lxml') self.assertEquals(['oai:record:08', 'oai:record:07', 'oai:record:05', 'oai:record:04', 'oai:record:02/&gkn', 'oai:record:01'], result.xpath("/div/table/tr/td[@class='link']/a/text()")) self.assertEquals("/page/invalidRecord/?recordId=oai%3Arecord%3A08&domainId=adomain&repositoryId=integrationtest", result.xpath("/div/table/tr/td[@class='link']/a")[0].attrib['href']) self.assertEquals("/page/showHarvesterStatus/show?domainId=adomain&repositoryId=integrationtest", result.xpath("/div/p/a/@href")[0])
def testOaiListRecords(self): header, body = getRequest(self.erfGeoEnrichmentPort, '/oai', {'verb': 'ListRecords', 'metadataPrefix': 'erfGeoEnrichment'}, parse=False) bodyLxml = XML(body) self.assertEquals(4, len(xpath(bodyLxml, '/oai:OAI-PMH/oai:ListRecords/oai:record'))) d = dict(zip( xpath(bodyLxml, '/oai:OAI-PMH/oai:ListRecords/oai:record/oai:metadata/rdf:RDF/oa:Annotation/oa:hasTarget/@rdf:resource'), xpath(bodyLxml, '/oai:OAI-PMH/oai:ListRecords/oai:record/oai:metadata/rdf:RDF/oa:Annotation'))) self.assertEquals(set(['NIOD_BBWO2:niod:3366459', 'geluidVanNl:geluid_van_nederland:47954146', 'NIOD_BBWO2:niod:3441263', 'limburgs_erfgoed:oai:le:RooyNet:37']), set(d.keys())) # contains no location information to even construct a ErfGeo search API query from annotation = d['NIOD_BBWO2:niod:3441263'] self.assertEquals(None, xpathFirst(annotation, 'oa:hasBody')) self.assertEquals('No ErfGeo search API query could be constructed from target record', xpathFirst(annotation, 'dcterms:description/text()')) self.assertEquals(None, xpathFirst(annotation, 'dcterms:source/@rdf:resource')) annotation = d['NIOD_BBWO2:niod:3366459'] self.assertEquals('http://data.digitalecollectie.nl/annotation/erfGeoEnrichment#TklPRF9CQldPMjpuaW9kOjMzNjY0NTk=', xpathFirst(annotation, '@rdf:about')) self.assertEquals('http://*****:*****@rdf:resource')) self.assertEquals('NIOD_BBWO2:niod:3366459', xpathFirst(annotation, 'oa:hasTarget/@rdf:resource')) annotationBody = xpathFirst(annotation, 'oa:hasBody/rdf:Description') placeInTime = xpathFirst(annotationBody, 'dcterms:spatial/hg:PlaceInTime') self.assertEquals('http://erfgeo.nl/hg/geonames/2747032', xpathFirst(placeInTime, '@rdf:about')) self.assertEquals('Soestdijk', xpathFirst(placeInTime, 'rdfs:label/text()')) geometryWKT = xpathFirst(placeInTime, 'geos:hasGeometry/rdf:Description/geos:asWKT/text()') self.assertEquals('POINT(5.28472 52.19083)', geometryWKT)
def testAutocomplete(self): header, body = getRequest(port=self.httpPort, path='/autocomplete', arguments={'field': 'field2', 'prefix': 'va'}, parse=False) prefix, completions = loads(body) self.assertEquals("va", prefix) self.assertEquals(set(["value0", "value2", "value3", "value4", "value1"]), set(completions)) self.assertEquals('value1', completions[-1])
def testRecursionStopsOnAnnotation(self): query = "urn:test:a:0" headers, body = getRequest(self.portNumber, "/sru", arguments=dict( version="1.2", operation="searchRetrieve", query=query), parse='lxml') a_0 = xpath(body, "/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/rdf:RDF")[0] self.assertEquals(["urn:test:a:1", "urn:test:a:2"], xpath(a_0, "oa:Annotation/oa:hasTarget/@rdf:resource"))
def testClear(self): self.startHarvester(repository=REPOSITORY) self.assertEquals(BATCHSIZE, self.sizeDumpDir()) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': DOMAIN, 'repositoryId': REPOSITORY}, parse=False) data = JsonDict.loads(result) self.assertEquals(8, data['response']['GetStatus'][0]['total']) self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='clear') self.startHarvester(repository=REPOSITORY) self.assertEquals(18, self.sizeDumpDir()) for filename in sorted(listdir(self.dumpDir))[-8:]: self.assertTrue('_delete.updateRequest' in filename, filename) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': DOMAIN, 'repositoryId': REPOSITORY}, parse=False) self.assertEqual(0, JsonDict.loads(result)['response']['GetStatus'][0]['total'])
def testRSS(self): header, body = getRequest(self.sruslavePort, '/rss', dict(query="*", querylabel='MyWorkerLabel', sortKeys='untokenized.dateissued,,1')) # print "RSS body:", etree.tostring(body) items = xpath(body, "/rss/channel/item") self.assertEquals(13, len(items)) self.assertTrue(xpathFirst(body, '//item/link/text()').endswith('Language/nl')) self.assertEqual(['1993-01-01', '2004-06-30', '2009-11-24', '2013', '2014', '2016', '2016-01-31', '2016-05-05', '2019-11-06'], xpath(body, "//item/pubDate/text()")) self.assertEqual('MyWorkerLabel', xpathFirst(body, '//channel/title/text()'))
def testOaiListMetadataFormats(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="ListMetadataFormats")) # print 'ListMetadataFormats:', etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual(3, len(xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat"))) self.assertEqual('metadata', xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat[1]/oai:metadataPrefix/text()")[0]) self.assertEqual('nl_didl_combined', xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat[2]/oai:metadataPrefix/text()")[0]) self.assertEqual('nl_didl_norm', xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat[3]/oai:metadataPrefix/text()")[0])
def testRemoteInfoFieldWithPrefix(self): header, body = getRequest(port=self.httpPort, path='/remote/info/field', arguments=dict(fieldname='field2', name='main', prefix='value8'), parse=False) self.assertTrue("<pre>value8: 10</pre>" in body, body)
def testSparqlQueryAcceptHttpHeader(self): headers, body = getRequest(self.portNumber, "/sparql", arguments={'query': 'select ?x ?y ?z where {?x ?y ?z}'}, additionalHeaders={'Accept': 'application/sparql-results+json'}, parse=False) splitHeaders = headers.split("\r\n") self.assertTrue("Content-Type: application/sparql-results+json" in splitHeaders, headers)
def testDocumentationPage(self): header, body = getRequest(self.portNumber, '/documentation', {}, parse='lxml') nodes = xpath(body, '/html/body/div/div[@id="filelist"]/ul/li/a') expected = sorted(listdir(self.publicDocumentationPath)) self.assertTrue(len(expected) > 1) self.assertEquals(expected, [node.text for node in nodes]) self.assertTrue(all(['target' in node.attrib for node in nodes])) self.assertEquals(['/public/%s' % f for f in expected], [node.attrib['href'] for node in nodes])
def testImageAnnotations_ia1(self): query = "urn:id:ia:1" headers, body = getRequest(self.portNumber, "/sru", arguments=dict( version="1.2", operation="searchRetrieve", query=query), parse='lxml') ia_1 = xpath(body, "/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/rdf:RDF")[0] self.assertEquals(['http://catchplus.nl/annotation/imageScan1.jpg'], xpath(ia_1, 'oa:Annotation/oa:hasBody/@rdf:resource')) self.assertEquals(['Canvas for imageScan1.jpg'], xpath(ia_1, 'oa:Annotation/oa:hasTarget/dms:Canvas[@rdf:about="http://catchplus.nl/annotation/Canvas1"]/dc:title/text()'))
def testOai(self): header, body = getRequest(self.gatewayPort, '/oaix', arguments=dict(verb='ListRecords', metadataPrefix=NORMALISED_DOC_NAME)) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) #print "OAIX body:", etree.tostring(body) records = xpath(body, '//oai:record') self.assertEqual(14, len(records)) deletes = xpath(body, '//oai:record[oai:header/@status = "deleted"]') self.assertEqual(1, len(deletes))
def testRSS(self): header, body = getRequest(self.apiPort, '/rss', dict(query="*", querylabel='MyLabel', sortKeys='untokenized.dateissued,,0', startRecord='4')) # print "RSS body:", etree.tostring(body) items = xpath(body, "/rss/channel/item") self.assertEquals(10, len(items)) self.assertTrue(xpathFirst(body, '//item/link/text()').endswith('Language/nl')) self.assertEqual({'Paden en stromingen---a historical survey', 'Preface to special issue (Fast reaction - slow diffusion scenarios: PDE approximations and free boundaries)', 'Conditiebepaling PVC', 'Appositie en de interne struktuur van de NP', 'Wetenschapswinkel', 'Late-type Giants in the Inner Galaxy', 'H.J. Bennis', 'Locatie [Matthijs Tinxgracht 16] te Edam, gemeente Edam-Volendam. Een archeologische opgraving.', 'Example Program 2', u'\u042d\u043a\u043e\u043b\u043e\u0433\u043e-\u0440\u0435\u043a\u0440\u0435\u0430\u0446\u0438\u043e\u043d\u043d\u044b\u0439 \u043a\u043e\u0440\u0438\u0434\u043e\u0440 \u0432 \u0433\u043e\u0440\u043d\u043e\u043c \u0437\u0430\u043f\u043e\u0432\u0435\u0434\u043d\u0438\u043a\u0435 \u0411\u043e\u0433\u043e\u0442\u044b'}, set(xpath(body, "//item/title/text()"))) self.assertEqual({'FransHeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeellllllang', 'Microvariatie; (Generatieve) Syntaxis; Morphosyntaxis; Syntaxis-Semantiek Interface; Dialectologie', 'Samenvatting', 'Projectomschrijving<br>Ontwikkeling van betrouwbare methoden, procedures\n en extrapolatiemodellen om de conditie en restlevensduur van in gebruik zijnde\n PVC-leidingen te bepalen.<br>Beoogde projectopbrengsten<br>- uitwerking van\n huidige kennis en inzichten m.b.t.', 'The present thesis describes the issue of\n "neonatal glucocorticoid treatment and predisposition to\n cardiovascular disease in rats".', 'Abstract van dit document', 'This is an example program about Programming with Meresco', 'Abstract'}, set(xpath(body, "//item/description/text()"))) self.assertEqual('MyLabel', xpathFirst(body, '//channel/title/text()'))
def testTextAnnotations_ta0(self): query = "urn:id:ta:0" headers, body = getRequest(self.portNumber, "/sru", arguments=dict( version="1.2", operation="searchRetrieve", query=query), parse='lxml') ta_0 = xpath(body, "/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/rdf:RDF")[0] self.assertEquals(['Dit is een beschrijving van Den Haag. En dit is een tweede zin.'], xpath(ta_0, 'oa:Annotation/oa:hasBody/cnt:ContentAsText[@rdf:about="urn:id:ib:0"]/cnt:chars/text()')) self.assertEquals(['Canvas for imageScan1.jpg'], xpath(ta_0, 'oa:Annotation/oa:hasTarget/dms:Canvas[@rdf:about="http://catchplus.nl/annotation/Canvas1"]/dc:title/text()'))
def testOaiIdentify(self): header, body = getRequest(self.gatewayPort, '/oaix', arguments=dict(verb='Identify')) # print "Identify body:", etree.tostring(body) self.assertEqual( 'HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) adminEmail = xpath(body, '//oai:Identify/oai:adminEmail/text()') self.assertEqual("*****@*****.**", adminEmail[0])
def testGetStatusForDomainAndRepositoryId(self): self.controlHelper(action='allInvalid') self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', {'verb': 'GetStatus', 'domainId': 'adomain', 'repositoryId': 'integrationtest'}, parse=False) data = JsonDict.loads(result) self.assertEquals("GetStatus", data['request']['verb']) self.assertEquals("adomain", data['request']['domainId']) self.assertEquals("integrationtest", data['request']['repositoryId']) self.assertEquals("IntegrationTest", data['response']['GetStatus'][0]['repositoryGroupId']) self.assertEquals(6, data['response']['GetStatus'][0]['invalid'])
def testAddRdfUsingClient(self): rdf = """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description> <rdf:type>testAddRdfUsingClient</rdf:type> </rdf:Description> </rdf:RDF>""" jenaClient = HttpClient(host='localhost', port=self.jenaPort, synchronous=True) list(compose(jenaClient.add('uri:identifier', rdf))) header, body = getRequest(port=self.jenaPort, path='/ds/query', arguments=dict(query="SELECT ?o WHERE {?s ?p ?o}"), parse=False) self.assertTrue("<literal>testAddRdfUsingClient</literal>" in body, body)
def testRssForNeverHarvestedRepository(self): header, result = getRequest(self.harvesterInternalServerPortNumber, '/rss', { 'domainId': 'adomain', 'repositoryId': 'repository2' }, parse='lxml') self.assertEqual("Harvester status voor repository2", xpath(result, "/rss/channel/title/text()")[0]) self.assertEqual(0, len(xpath(result, "/rss/channel/item")))
def testTextAnnotations_ta2(self): query = "urn:id:ta:2" headers, body = getRequest(self.portNumber, "/sru", arguments=dict( version="1.2", operation="searchRetrieve", query=query), parse='lxml') ta_2 = xpath(body, "/srw:searchRetrieveResponse/srw:records/srw:record/srw:recordData/rdf:RDF")[0] specificResourceUrl = "http://*****:*****@rdf:about="%s"]/oa:hasSource/cnt:ContentAsText[@rdf:about="urn:id:ib:0"]/cnt:chars/text()' % specificResourceUrl)) self.assertEquals(['Canvas for imageScan1.jpg'], xpath(ta_2, 'oa:Annotation/oa:hasTarget/oa:SpecificResource[dc:identifier/text()="urn:id:ct:2"]/oa:hasSource/dms:Canvas[@rdf:about="http://catchplus.nl/annotation/Canvas1"]/dc:title/text()'))
def testOne(self): uuid = "urn:uuid:8ab4ee28-651a-45bd-9206-59763f9e5487" query = uuid headers, body = getRequest(self.portNumber, "/sru", arguments=dict( version="1.1", operation="searchRetrieve", query=query), parse='lxml') self.assertEquals(2, len(xpath(body, '//oa:hasSource/oa:Body/cnt:chars')))
def testBadOai(self): header, data = getRequest(port=self.helperServerPortNumber, path='/badoai/responsedate', arguments=dict(verb='ListRecords', metadataPrefix='prefix')) self.assertEqual( 'resume0', xpathFirst( data, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')) header, data = getRequest(port=self.helperServerPortNumber, path='/badoai/responsedate', arguments=dict(verb='ListRecords', resumptionToken='resume0')) self.assertEqual( 'resume1', xpathFirst( data, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()'))
def testRemoteInfoCore(self): header, body = getRequest(port=self.httpPort, path='/remote/info/core', arguments=dict(name='main'), parse=False) bodyLxml = HTML(body) lists = bodyLxml.xpath('//ul') fieldList = lists[0] fields = fieldList.xpath('li/a/text()') self.assertEquals(12, len(fields)) drilldownFieldList = lists[1] drilldownFields = drilldownFieldList.xpath('li/a/text()') self.assertEquals(['untokenized.field2', 'untokenized.fieldHier'], drilldownFields)
def testOne(self): query='oa:hasTarget="http://catchplus.nl/annotation/Canvas1"' headers, body = getRequest(self.portNumber, "/sru", arguments=dict( version="1.2", operation="searchRetrieve", query=query), parse='lxml') self.assertEquals(["4"], xpath(body, '//srw:numberOfRecords/text()')) self.assertEquals([ 'http://localhost:%s/resolve/urn%%3Aid%%3Aia%%3A1' % self.portNumber, 'http://localhost:%s/resolve/urn%%3Aid%%3Ata%%3A0' % self.portNumber, 'http://localhost:%s/resolve/urn%%3Aid%%3Ata%%3A1' % self.portNumber, 'http://localhost:%s/resolve/urn%%3Aid%%3Ata%%3A2' % self.portNumber ], xpath(body, '//srw:records/srw:record/srw:recordIdentifier/text()'))
def testAddRdf(self): rdf = """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description> <rdf:type>testAddRdf</rdf:type> </rdf:Description> </rdf:RDF>""" putRequest(port=self.jenaPort, path="/ds/data?graph=uri:record", additionalHeaders={'Content-Type': 'application/rdf+xml', 'Content-length': len(rdf)}, data=rdf, parse=False) header, body = getRequest(port=self.jenaPort, path='/ds/query', arguments=dict(query="SELECT ?o WHERE {?s ?p ?o}"), parse=False) self.assertTrue("<literal>testAddRdf</literal>" in body, body)
def testAbout(self): header, body = getRequest(self.erfGeoEnrichmentPort, '/about', {'uri': 'NIOD_BBWO2:niod:3366459', 'profile': 'erfGeoEnrichment'}, parse=False) bodyLxml = parse(StringIO(body)) rdf = xpathFirst(bodyLxml, '/rdf:RDF') self.assertEquals('http://data.digitalecollectie.nl/annotation/erfGeoEnrichment#TklPRF9CQldPMjpuaW9kOjMzNjY0NTk=', xpathFirst(rdf, 'oa:Annotation/@rdf:about')) self.assertEquals('NIOD_BBWO2:niod:3366459', xpathFirst(rdf, 'oa:Annotation/oa:hasTarget/@rdf:resource')) annotationBody = xpathFirst(rdf, 'oa:Annotation/oa:hasBody/rdf:Description') placeInTime = xpathFirst(annotationBody, 'dcterms:spatial/hg:PlaceInTime') self.assertEquals('http://erfgeo.nl/hg/geonames/2747032', xpathFirst(placeInTime, '@rdf:about')) self.assertEquals('Soestdijk', xpathFirst(placeInTime, 'rdfs:label/text()')) geometryWKT = xpathFirst(placeInTime, 'geos:hasGeometry/rdf:Description/geos:asWKT/text()') self.assertEquals('POINT(5.28472 52.19083)', geometryWKT)
def testAdd(self): data = """{ "key": 1, "values": ["harry"], "types": ["uri:book"], "creators": ["rowling"] }""" try: header, body = postRequest(self.suggestionServerPort, '/add?identifier=id1', data=data, parse=False) self.assertTrue("200 OK" in header.upper(), header + body) header, body = postRequest(self.suggestionServerPort, '/commit', data=None, parse=False) header, body = getRequest(self.suggestionServerPort, '/totalRecords', parse=False) self.assertTrue("200 OK" in header.upper(), header + body) self.assertEqual("1", body) header, body = getRequest(self.suggestionServerPort, '/totalSuggestions', parse=False) self.assertTrue("200 OK" in header.upper(), header + body) self.assertEqual("0", body) finally: postRequest(self.suggestionServerPort, '/delete?identifier=id1', data=None, parse=False) postRequest(self.suggestionServerPort, '/commit', data=None, parse=False)
def testGetStatusForDomain(self): self.controlHelper(action='allInvalid') self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', { 'verb': 'GetStatus', 'domainId': 'adomain' }, parse=False) data = JsonDict.loads(result) self.assertEqual(2, len(data['response']['GetStatus'])) self.assertEqual("adomain", data['request']['domainId'])
def testGetRepositoriesForDomain(self): header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', { 'verb': 'GetRepositories', 'domainId': 'adomain' }, parse=False) data = loads(result) self.assertEqual(2, len(data['response']['GetRepositories'])) self.assertEqual( ['integrationtest', 'repository2'], [r['identifier'] for r in data['response']['GetRepositories']])
def testGetRepository(self): header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', { 'verb': 'GetRepository', 'domainId': 'adomain', 'identifier': 'integrationtest' }, parse=False) data = JsonDict.loads(result) self.assertEqual( "IntegrationTest", data['response']['GetRepository']['repositoryGroupId'])
def testOaiIdentify(self): header, body = getRequest( self.resolverPort, '/', arguments=dict( identifier='urn:nbn:nl:ui:39-ae86436a9031f6f287b2fdc6f54e3fe6') ) self.assertEqual( 'HTTP/1.0 200 OK\r\nContent-Type: text/plain; charset=utf-8', header) self.assertEqual('<html><body><p>Resolver Server</p></body></html>', etree.tostring(body))
def testRemoteInfoDrilldownValues(self): header, body = getRequest(port=self.httpPort, path='/remote/info/drilldownvalues', arguments=dict(path='untokenized.field2', name='main'), parse=False) self.assertFalse('Traceback' in body, body) bodyLxml = HTML(body) self.assertEquals( set([ 'value1', 'value0', 'value9', 'value8', 'value7', 'value6', 'value5', 'value4', 'value3', 'othervalue2', 'value2' ]), set(bodyLxml.xpath('//ul/li/a/text()')))
def testInfo(self): header, body = postRequest(self.numerateServerPort, '/numerate', data='id0', parse=False) header, body = postRequest(self.numerateServerPort, '/numerate', data='id1', parse=False) header, body = getRequest(self.numerateServerPort, '/info', parse=False) self.assertTrue("200 OK" in header.upper(), header) self.assertEqual('{"total": 2}', body)
def doOaiListRecord(port): header, body = getRequest(port=portNumber, path="/", arguments={ "verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True" }, additionalHeaders={ 'X-Meresco-Oai-Client-Identifier': clientId }, parse=False) responses.append((header, body))
def testViewInvalidRecord(self): self.controlHelper(action='allInvalid') self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/invalidRecord', { 'domainId': 'adomain', 'repositoryId': 'integrationtest', 'recordId': 'oai:record:01' }, parse=True) self.assertEqual("Repository integrationtest - Record oai:record:01", result.xpath("//h3/text()")[0]) self.assertEqual( "/page/invalid/?domainId=adomain&repositoryId=integrationtest", result.xpath("/html/body/div/p/a/@href")[0]) self.assertEqual(["Invalid data"], result.xpath("/html/body/div/pre/text()"))
def testGetStatusForDomainAndRepositoryId(self): self.controlHelper(action='allInvalid') self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/get', { 'verb': 'GetStatus', 'domainId': 'adomain', 'repositoryId': 'integrationtest' }, parse=False) data = JsonDict.loads(result) self.assertEqual("GetStatus", data['request']['verb']) self.assertEqual("adomain", data['request']['domainId']) self.assertEqual("integrationtest", data['request']['repositoryId']) self.assertEqual("IntegrationTest", data['response']['GetStatus'][0]['repositoryGroupId']) self.assertEqual(6, data['response']['GetStatus'][0]['invalid'])