def testRssForNeverHarvestedRepository(self): header, result = getRequest(self.harvesterInternalServerPortNumber, '/rss', { 'domainId': 'adomain', 'repositoryId': 'repository2' }, parse='lxml') self.assertEqual("Harvester status voor repository2", xpath(result, "/rss/channel/title/text()")[0]) self.assertEqual(0, len(xpath(result, "/rss/channel/item")))
def testRssForStatusChangesOk(self): self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/running.rss', {'domainId': 'adomain'}, parse='lxml') self.assertEqual("Harvest status changes for domain 'adomain'", xpath(result, "/rss/channel/title/text()")[0]) self.assertEqual("Status changes per repository for domain 'adomain'", xpath(result, "/rss/channel/description/text()")[0]) self.assertEqual( "http://localhost:9999/showHarvesterStatus?domainId=adomain", xpath(result, "/rss/channel/link/text()")[0]) self.assertEqual(str(60 * 6), xpath(result, "/rss/channel/ttl/text()")[0]) TODAY = strftime("%Y-%m-%d", gmtime()) items = xpath(result, "/rss/channel/item") self.assertEqual(1, len(items)) self.assertEqual("integrationtest: Ok", ''.join(xpath(items[0], "title/text()"))) description = ''.join(xpath(items[0], "description/text()")) self.assertTrue(description.startswith("Harvest time: %s" % TODAY), description) self.assertEqual('integrationtest:%s' % TODAY, ''.join(xpath(items[0], "guid/text()")).split('T')[0]) self.assertEqual( "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest", xpath(items[0], "link/text()")[0])
def testRssForStatusChangesError(self): self.controlHelper(action="raiseExceptionOnIds", id=['%s:oai:record:01' % REPOSITORY] ) self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/running.rss', {'domainId': 'adomain'}, parse='lxml') TODAY = strftime("%Y-%m-%d", gmtime()) items = xpath(result, "/rss/channel/item") self.assertEquals(1, len(items)) self.assertEquals("integrationtest: Error", ''.join(xpath(items[0], "title/text()"))) description = ''.join(xpath(items[0], "description/text()")) self.assertTrue(description.startswith("Harvest time: %s" % TODAY), description) self.assertTrue("Exception: ERROR" in description, description) self.assertEquals('integrationtest:%s' % TODAY, ''.join(xpath(items[0], "guid/text()")).split('T')[0]) self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain&repositoryId=integrationtest", xpath(items[0], "link/text()")[0])
def testOne(self): self.uploader.send(self.upload) self.assertEqual(1, len(self.sentData)) updateRequest = _parse(self.sentData[0]) self.assertEqual( 'some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()')) self.assertEqual('info:srw/action/1/replace', xpathFirst(updateRequest, 'ucp:action/text()')) documentParts = xpath( updateRequest, 'srw:record/srw:recordData/document:document/document:part') self.assertEqual(2, len(documentParts)) self.assertEqual('meta', documentParts[0].attrib['name']) self.assertEqual('<meta>....</meta>', documentParts[0].text) self.assertEqual('otherdata', documentParts[1].attrib['name']) self.assertEqual('<stupidXML>ßabcdefgh', documentParts[1].text) self.uploader.delete(self.upload) updateRequest = _parse(self.sentData[1]) self.assertEqual( 'some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()')) self.assertEqual('info:srw/action/1/delete', xpathFirst(updateRequest, 'ucp:action/text()'))
def testConcurrentHarvestToSruUpdate(self): self.startHarvester(concurrency=3) requestsLogged = sorted(listdir(self.dumpDir)) repositoryIds = [] for f in requestsLogged: lxml = parse(open(join(self.dumpDir, f))) repositoryIds.append( xpath(lxml, '//ucp:recordIdentifier/text()')[0].split(':', 1)[0]) repositoryIdsSet = set(repositoryIds) self.assertEqual( set(['repository2', 'integrationtest', 'harvestertestrepository']), repositoryIdsSet) lastSeenRepoId = None try: for repo in repositoryIds: if repo != lastSeenRepoId: repositoryIdsSet.remove(repo) lastSeenRepoId = repo continue except KeyError: pass else: self.fail('Records should have been inserted out-of-order.')
def testRssForHarvesterStatus(self): self.controlHelper(action="noneInvalid") self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/rss', { 'domainId': 'adomain', 'repositoryId': 'integrationtest' }, parse='lxml') self.assertEqual("Harvester status voor integrationtest", xpath(result, "/rss/channel/title/text()")[0]) self.assertEqual( "Recente repository harvest status voor integrationtest in adomain", xpath(result, "/rss/channel/description/text()")[0]) self.assertEqual( "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest", xpath(result, "/rss/channel/link/text()")[0]) self.assertEqual(str(60 * 6), xpath(result, "/rss/channel/ttl/text()")[0]) self.assertEqual("Harvester status voor integrationtest", xpath(result, "/rss/channel/item[1]/title/text()")[0]) description = xpath(result, "/rss/channel/item[1]/description/text()")[0] self.assertTrue("Last harvest date: " in description, description) self.assertTrue("Total records: 8" in description, description) self.assertTrue("Harvested records: 10" in description, description) self.assertTrue("Uploaded records: 8" in description, description) self.assertTrue("Deleted records: 2" in description, description) self.assertTrue("Validation errors: 0" in description, description) self.assertTrue("Errors: 0" in description, description) self.assertEqual( "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest", xpath(result, "/rss/channel/item[1]/link/text()")[0])
def testRssForHarvesterStatus(self): self.controlHelper(action="noneInvalid") self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/rss', {'domainId': 'adomain', 'repositoryId': 'integrationtest'}, parse='lxml') self.assertEquals("Harvester status voor integrationtest", xpath(result, "/rss/channel/title/text()")[0]) self.assertEquals("Recente repository harvest status voor integrationtest in adomain", xpath(result, "/rss/channel/description/text()")[0]) self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain&repositoryId=integrationtest", xpath(result, "/rss/channel/link/text()")[0]) self.assertEquals(str(60 * 6), xpath(result, "/rss/channel/ttl/text()")[0]) self.assertEquals("Harvester status voor integrationtest", xpath(result, "/rss/channel/item[1]/title/text()")[0]) description = xpath(result, "/rss/channel/item[1]/description/text()")[0] self.assertTrue("Last harvest date: " in description, description) self.assertTrue("Total records: 8" in description, description) self.assertTrue("Harvested records: 10" in description, description) self.assertTrue("Uploaded records: 8" in description, description) self.assertTrue("Deleted records: 2" in description, description) self.assertTrue("Validation errors: 0" in description, description) self.assertTrue("Errors: 0" in description, description) self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain&repositoryId=integrationtest", xpath(result, "/rss/channel/item[1]/link/text()")[0])
def __init__(self, response): self.response = response self.records = xpath(response, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.resumptionToken = xpathFirst(response, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()') or '' self.responseDate = xpathFirst(response, '/oai:OAI-PMH/oai:responseDate/text()') if not self.responseDate is None: # should be there, happens to be absent for some repositories self.responseDate = self.responseDate.strip() if not self.responseDate: self.responseDate = self._zulu() self.selectRecord(xpathFirst(response, '/oai:OAI-PMH/oai:*/oai:record'))
def testSendWithMultipleAbout(self): ABOUT = '<about xmlns="%(oai)s">about_1</about><about xmlns="%(oai)s">about_2</about>' % namespaces recordFile = self.tempdir + '/group/repo/id.record' self.uploader._filenameFor = lambda *args: recordFile upload = createUpload(about=ABOUT) self.uploader.send(upload) self.assertTrue(isfile(recordFile)) self.assertEquals(ABOUT, ''.join(lxmltostring(x) for x in xpath(parse(open(recordFile)), '//oai:about')))
def testRssForStatusChangesError(self): self.controlHelper(action="raiseExceptionOnIds", id=['%s:oai:record:01' % REPOSITORY]) self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/running.rss', {'domainId': 'adomain'}, parse='lxml') TODAY = strftime("%Y-%m-%d", gmtime()) items = xpath(result, "/rss/channel/item") self.assertEqual(1, len(items)) self.assertEqual("integrationtest: Error", ''.join(xpath(items[0], "title/text()"))) description = ''.join(xpath(items[0], "description/text()")) self.assertTrue(description.startswith("Harvest time: %s" % TODAY), description) self.assertTrue("Exception: ERROR" in description, description) self.assertEqual('integrationtest:%s' % TODAY, ''.join(xpath(items[0], "guid/text()")).split('T')[0]) self.assertEqual( "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest", xpath(items[0], "link/text()")[0])
def testConcurrentHarvestToSruUpdateBUG(self): self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, complete=True) self.startHarvester(concurrency=1) requestsLogged = sorted(listdir(self.dumpDir)) repositoryIds = [] for f in requestsLogged: lxml = parse(open(join(self.dumpDir, f))) repositoryIds.append(xpath(lxml, '//ucp:recordIdentifier/text()')[0].split(':', 1)[0]) self.assertEquals(15, repositoryIds.count(REPOSITORY)) self.assertEquals(10, repositoryIds.count('repository2')) self.assertEquals(10, repositoryIds.count('integrationtest'))
def testSendWithMultipleAbout(self): ABOUT = '<about xmlns="%(oai)s">about_1</about><about xmlns="%(oai)s">about_2</about>' % namespaces recordFile = self.tempdir + '/group/repo/id.record' self.uploader._filenameFor = lambda *args: recordFile upload = createUpload(about=ABOUT) self.uploader.send(upload) self.assertTrue(isfile(recordFile)) with open(recordFile) as fp: self.assertEqual( ABOUT, ''.join( lxmltostring(x) for x in xpath(parse(fp), '//oai:about')))
def testOne(self): self.uploader.send(self.upload) self.assertEquals(1, len(self.sentData)) updateRequest = XML(self.sentData[0]) self.assertEquals('some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()')) self.assertEquals('info:srw/action/1/replace', xpathFirst(updateRequest, 'ucp:action/text()')) documentParts = xpath(updateRequest, 'srw:record/srw:recordData/document:document/document:part') self.assertEquals(2, len(documentParts)) self.uploader.delete(self.upload) updateRequest = XML(self.sentData[1]) self.assertEquals('some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()')) self.assertEquals('info:srw/action/1/delete', xpathFirst(updateRequest, 'ucp:action/text()'))
def testConcurrentHarvestToSruUpdateBUG(self): self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, complete=True) self.startHarvester(concurrency=1) requestsLogged = sorted(listdir(self.dumpDir)) repositoryIds = [] for f in requestsLogged: lxml = parse(open(join(self.dumpDir, f))) repositoryIds.append( xpath(lxml, '//ucp:recordIdentifier/text()')[0].split(':', 1)[0]) self.assertEqual(15, repositoryIds.count(REPOSITORY)) self.assertEqual(10, repositoryIds.count('repository2')) self.assertEqual(10, repositoryIds.count('integrationtest'))
def __init__(self, response): self.response = response self.records = xpath(response, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.resumptionToken = xpathFirst( response, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()') or '' self.responseDate = xpathFirst(response, '/oai:OAI-PMH/oai:responseDate/text()') if not self.responseDate is None: # should be there, happens to be absent for some repositories self.responseDate = self.responseDate.strip() if not self.responseDate: self.responseDate = self._zulu() self.selectRecord(xpathFirst(response, '/oai:OAI-PMH/oai:*/oai:record'))
def _parseMessage(self, message): version = xpath(message, "/srw:updateResponse/srw:version/text()")[0] operationStatus = xpath(message, "/srw:updateResponse/ucp:operationStatus/text()")[0] diagresult = None diagnostics = xpath(message, "/srw:updateResponse/srw:diagnostics/diag:diagnostic") if len(diagnostics) > 0: diagnostic_uri = xpath(diagnostics[0], "diag:uri/text()")[0] diagnostic_details = ''.join(xpath(diagnostics[0], "diag:details/text()")) diagnostic_message = ''.join(xpath(diagnostics[0], "diag:message/text()")) diagresult = (diagnostic_uri, diagnostic_details, diagnostic_message) return version, operationStatus, diagresult
def testRssForStatusChangesOk(self): self.startHarvester(repository=REPOSITORY) header, result = getRequest(self.harvesterInternalServerPortNumber, '/running.rss', {'domainId': 'adomain'}, parse='lxml') self.assertEquals("Harvest status changes for domain 'adomain'", xpath(result, "/rss/channel/title/text()")[0]) self.assertEquals("Status changes per repository for domain 'adomain'", xpath(result, "/rss/channel/description/text()")[0]) self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain", xpath(result, "/rss/channel/link/text()")[0]) self.assertEquals(str(60 * 6), xpath(result, "/rss/channel/ttl/text()")[0]) TODAY = strftime("%Y-%m-%d", gmtime()) items = xpath(result, "/rss/channel/item") self.assertEquals(1, len(items)) self.assertEquals("integrationtest: Ok", ''.join(xpath(items[0], "title/text()"))) description = ''.join(xpath(items[0], "description/text()")) self.assertTrue(description.startswith("Harvest time: %s" % TODAY), description) self.assertEquals('integrationtest:%s' % TODAY, ''.join(xpath(items[0], "guid/text()")).split('T')[0]) self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain&repositoryId=integrationtest", xpath(items[0], "link/text()")[0])
def _parseMessage(self, message): version = xpath(message, "/srw:updateResponse/srw:version/text()")[0] operationStatus = xpath( message, "/srw:updateResponse/ucp:operationStatus/text()")[0] diagresult = None diagnostics = xpath( message, "/srw:updateResponse/srw:diagnostics/diag:diagnostic") if len(diagnostics) > 0: diagnostic_uri = xpath(diagnostics[0], "diag:uri/text()")[0] diagnostic_details = ''.join( xpath(diagnostics[0], "diag:details/text()")) diagnostic_message = ''.join( xpath(diagnostics[0], "diag:message/text()")) diagresult = (diagnostic_uri, diagnostic_details, diagnostic_message) return version, operationStatus, diagresult
def testConcurrentHarvestToSruUpdate(self): self.startHarvester(concurrency=3) requestsLogged = sorted(listdir(self.dumpDir)) repositoryIds = [] for f in requestsLogged: lxml = parse(open(join(self.dumpDir, f))) repositoryIds.append(xpath(lxml, '//ucp:recordIdentifier/text()')[0].split(':', 1)[0]) repositoryIdsSet = set(repositoryIds) self.assertEquals(set(['repository2', 'integrationtest', 'harvestertestrepository']), repositoryIdsSet) lastSeenRepoId = None try: for repo in repositoryIds: if repo != lastSeenRepoId: repositoryIdsSet.remove(repo) lastSeenRepoId = repo continue except KeyError: pass else: self.fail('Records should have been inserted out-of-order.')
def __init__(self, response): self.response = response self.records = xpath(response, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.resumptionToken = xpathFirst(response, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()') or '' self.responseDate = xpathFirst(response, '/oai:OAI-PMH/oai:responseDate/text()').strip() self.selectRecord(xpathFirst(response, '/oai:OAI-PMH/oai:*/oai:record'))
def testRssForNeverHarvestedRepository(self): header, result = getRequest(self.harvesterInternalServerPortNumber, '/rss', {'domainId': 'adomain', 'repositoryId': 'repository2'}, parse='lxml') self.assertEquals("Harvester status voor repository2", xpath(result, "/rss/channel/title/text()")[0]) self.assertEquals(0, len(xpath(result, "/rss/channel/item")))