Exemple #1
0
 def testRssForNeverHarvestedRepository(self):
     header, result = getRequest(self.harvesterInternalServerPortNumber,
                                 '/rss', {
                                     'domainId': 'adomain',
                                     'repositoryId': 'repository2'
                                 },
                                 parse='lxml')
     self.assertEqual("Harvester status voor repository2",
                      xpath(result, "/rss/channel/title/text()")[0])
     self.assertEqual(0, len(xpath(result, "/rss/channel/item")))
Exemple #2
0
 def testRssForStatusChangesOk(self):
     self.startHarvester(repository=REPOSITORY)
     header, result = getRequest(self.harvesterInternalServerPortNumber,
                                 '/running.rss', {'domainId': 'adomain'},
                                 parse='lxml')
     self.assertEqual("Harvest status changes for domain 'adomain'",
                      xpath(result, "/rss/channel/title/text()")[0])
     self.assertEqual("Status changes per repository for domain 'adomain'",
                      xpath(result, "/rss/channel/description/text()")[0])
     self.assertEqual(
         "http://localhost:9999/showHarvesterStatus?domainId=adomain",
         xpath(result, "/rss/channel/link/text()")[0])
     self.assertEqual(str(60 * 6),
                      xpath(result, "/rss/channel/ttl/text()")[0])
     TODAY = strftime("%Y-%m-%d", gmtime())
     items = xpath(result, "/rss/channel/item")
     self.assertEqual(1, len(items))
     self.assertEqual("integrationtest: Ok",
                      ''.join(xpath(items[0], "title/text()")))
     description = ''.join(xpath(items[0], "description/text()"))
     self.assertTrue(description.startswith("Harvest time: %s" % TODAY),
                     description)
     self.assertEqual('integrationtest:%s' % TODAY,
                      ''.join(xpath(items[0], "guid/text()")).split('T')[0])
     self.assertEqual(
         "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest",
         xpath(items[0], "link/text()")[0])
Exemple #3
0
 def testRssForStatusChangesError(self):
     self.controlHelper(action="raiseExceptionOnIds", id=['%s:oai:record:01' % REPOSITORY] )
     self.startHarvester(repository=REPOSITORY)
     header, result = getRequest(self.harvesterInternalServerPortNumber, '/running.rss', {'domainId': 'adomain'}, parse='lxml')
     TODAY = strftime("%Y-%m-%d", gmtime())
     items = xpath(result, "/rss/channel/item")
     self.assertEquals(1, len(items))
     self.assertEquals("integrationtest: Error", ''.join(xpath(items[0], "title/text()")))
     description = ''.join(xpath(items[0], "description/text()"))
     self.assertTrue(description.startswith("Harvest time: %s" % TODAY), description)
     self.assertTrue("Exception: ERROR" in description, description)
     self.assertEquals('integrationtest:%s' % TODAY, ''.join(xpath(items[0], "guid/text()")).split('T')[0])
     self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain&repositoryId=integrationtest", xpath(items[0], "link/text()")[0])
    def testOne(self):
        self.uploader.send(self.upload)
        self.assertEqual(1, len(self.sentData))

        updateRequest = _parse(self.sentData[0])
        self.assertEqual(
            'some:id', xpathFirst(updateRequest,
                                  'ucp:recordIdentifier/text()'))
        self.assertEqual('info:srw/action/1/replace',
                         xpathFirst(updateRequest, 'ucp:action/text()'))
        documentParts = xpath(
            updateRequest,
            'srw:record/srw:recordData/document:document/document:part')
        self.assertEqual(2, len(documentParts))
        self.assertEqual('meta', documentParts[0].attrib['name'])
        self.assertEqual('<meta>....</meta>', documentParts[0].text)
        self.assertEqual('otherdata', documentParts[1].attrib['name'])
        self.assertEqual('<stupidXML>ßabcdefgh', documentParts[1].text)

        self.uploader.delete(self.upload)
        updateRequest = _parse(self.sentData[1])
        self.assertEqual(
            'some:id', xpathFirst(updateRequest,
                                  'ucp:recordIdentifier/text()'))
        self.assertEqual('info:srw/action/1/delete',
                         xpathFirst(updateRequest, 'ucp:action/text()'))
Exemple #5
0
    def testConcurrentHarvestToSruUpdate(self):
        self.startHarvester(concurrency=3)

        requestsLogged = sorted(listdir(self.dumpDir))

        repositoryIds = []
        for f in requestsLogged:
            lxml = parse(open(join(self.dumpDir, f)))
            repositoryIds.append(
                xpath(lxml, '//ucp:recordIdentifier/text()')[0].split(':',
                                                                      1)[0])

        repositoryIdsSet = set(repositoryIds)
        self.assertEqual(
            set(['repository2', 'integrationtest', 'harvestertestrepository']),
            repositoryIdsSet)

        lastSeenRepoId = None
        try:
            for repo in repositoryIds:
                if repo != lastSeenRepoId:
                    repositoryIdsSet.remove(repo)
                    lastSeenRepoId = repo
                    continue
        except KeyError:
            pass
        else:
            self.fail('Records should have been inserted out-of-order.')
Exemple #6
0
    def testRssForHarvesterStatus(self):
        self.controlHelper(action="noneInvalid")
        self.startHarvester(repository=REPOSITORY)
        header, result = getRequest(self.harvesterInternalServerPortNumber,
                                    '/rss', {
                                        'domainId': 'adomain',
                                        'repositoryId': 'integrationtest'
                                    },
                                    parse='lxml')
        self.assertEqual("Harvester status voor integrationtest",
                         xpath(result, "/rss/channel/title/text()")[0])
        self.assertEqual(
            "Recente repository harvest status voor integrationtest in adomain",
            xpath(result, "/rss/channel/description/text()")[0])
        self.assertEqual(
            "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest",
            xpath(result, "/rss/channel/link/text()")[0])
        self.assertEqual(str(60 * 6),
                         xpath(result, "/rss/channel/ttl/text()")[0])

        self.assertEqual("Harvester status voor integrationtest",
                         xpath(result, "/rss/channel/item[1]/title/text()")[0])
        description = xpath(result,
                            "/rss/channel/item[1]/description/text()")[0]
        self.assertTrue("Last harvest date: " in description, description)
        self.assertTrue("Total records: 8" in description, description)
        self.assertTrue("Harvested records: 10" in description, description)
        self.assertTrue("Uploaded records: 8" in description, description)
        self.assertTrue("Deleted records: 2" in description, description)
        self.assertTrue("Validation errors: 0" in description, description)
        self.assertTrue("Errors: 0" in description, description)
        self.assertEqual(
            "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest",
            xpath(result, "/rss/channel/item[1]/link/text()")[0])
Exemple #7
0
    def testRssForHarvesterStatus(self):
        self.controlHelper(action="noneInvalid")
        self.startHarvester(repository=REPOSITORY)
        header, result = getRequest(self.harvesterInternalServerPortNumber, '/rss', {'domainId': 'adomain', 'repositoryId': 'integrationtest'}, parse='lxml')
        self.assertEquals("Harvester status voor integrationtest", xpath(result, "/rss/channel/title/text()")[0])
        self.assertEquals("Recente repository harvest status voor integrationtest in adomain", xpath(result, "/rss/channel/description/text()")[0])
        self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain&repositoryId=integrationtest", xpath(result, "/rss/channel/link/text()")[0])
        self.assertEquals(str(60 * 6), xpath(result, "/rss/channel/ttl/text()")[0])

        self.assertEquals("Harvester status voor integrationtest", xpath(result, "/rss/channel/item[1]/title/text()")[0])
        description = xpath(result, "/rss/channel/item[1]/description/text()")[0]
        self.assertTrue("Last harvest date: " in description, description)
        self.assertTrue("Total records: 8" in description, description)
        self.assertTrue("Harvested records: 10" in description, description)
        self.assertTrue("Uploaded records: 8" in description, description)
        self.assertTrue("Deleted records: 2" in description, description)
        self.assertTrue("Validation errors: 0" in description, description)
        self.assertTrue("Errors: 0" in description, description)
        self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain&repositoryId=integrationtest", xpath(result, "/rss/channel/item[1]/link/text()")[0])
 def __init__(self, response):
     self.response = response
     self.records = xpath(response, '/oai:OAI-PMH/oai:ListRecords/oai:record')
     self.resumptionToken = xpathFirst(response, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()') or ''
     self.responseDate = xpathFirst(response, '/oai:OAI-PMH/oai:responseDate/text()')
     if not self.responseDate is None:  # should be there, happens to be absent for some repositories
         self.responseDate = self.responseDate.strip()
     if not self.responseDate:
         self.responseDate = self._zulu()
     self.selectRecord(xpathFirst(response, '/oai:OAI-PMH/oai:*/oai:record'))
    def testSendWithMultipleAbout(self):
        ABOUT = '<about xmlns="%(oai)s">about_1</about><about xmlns="%(oai)s">about_2</about>' % namespaces

        recordFile = self.tempdir + '/group/repo/id.record'
        self.uploader._filenameFor = lambda *args: recordFile

        upload = createUpload(about=ABOUT)
        self.uploader.send(upload)

        self.assertTrue(isfile(recordFile))
        self.assertEquals(ABOUT, ''.join(lxmltostring(x) for x in xpath(parse(open(recordFile)), '//oai:about')))
Exemple #10
0
 def testRssForStatusChangesError(self):
     self.controlHelper(action="raiseExceptionOnIds",
                        id=['%s:oai:record:01' % REPOSITORY])
     self.startHarvester(repository=REPOSITORY)
     header, result = getRequest(self.harvesterInternalServerPortNumber,
                                 '/running.rss', {'domainId': 'adomain'},
                                 parse='lxml')
     TODAY = strftime("%Y-%m-%d", gmtime())
     items = xpath(result, "/rss/channel/item")
     self.assertEqual(1, len(items))
     self.assertEqual("integrationtest: Error",
                      ''.join(xpath(items[0], "title/text()")))
     description = ''.join(xpath(items[0], "description/text()"))
     self.assertTrue(description.startswith("Harvest time: %s" % TODAY),
                     description)
     self.assertTrue("Exception: ERROR" in description, description)
     self.assertEqual('integrationtest:%s' % TODAY,
                      ''.join(xpath(items[0], "guid/text()")).split('T')[0])
     self.assertEqual(
         "http://localhost:9999/showHarvesterStatus?domainId=adomain&repositoryId=integrationtest",
         xpath(items[0], "link/text()")[0])
Exemple #11
0
    def testConcurrentHarvestToSruUpdateBUG(self):
        self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, complete=True)

        self.startHarvester(concurrency=1)

        requestsLogged = sorted(listdir(self.dumpDir))
        repositoryIds = []
        for f in requestsLogged:
            lxml = parse(open(join(self.dumpDir, f)))
            repositoryIds.append(xpath(lxml, '//ucp:recordIdentifier/text()')[0].split(':', 1)[0])
        self.assertEquals(15, repositoryIds.count(REPOSITORY))
        self.assertEquals(10, repositoryIds.count('repository2'))
        self.assertEquals(10, repositoryIds.count('integrationtest'))
    def testSendWithMultipleAbout(self):
        ABOUT = '<about xmlns="%(oai)s">about_1</about><about xmlns="%(oai)s">about_2</about>' % namespaces

        recordFile = self.tempdir + '/group/repo/id.record'
        self.uploader._filenameFor = lambda *args: recordFile

        upload = createUpload(about=ABOUT)
        self.uploader.send(upload)

        self.assertTrue(isfile(recordFile))
        with open(recordFile) as fp:
            self.assertEqual(
                ABOUT, ''.join(
                    lxmltostring(x) for x in xpath(parse(fp), '//oai:about')))
    def testOne(self):
        self.uploader.send(self.upload)
        self.assertEquals(1, len(self.sentData))

        updateRequest = XML(self.sentData[0])
        self.assertEquals('some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()'))
        self.assertEquals('info:srw/action/1/replace', xpathFirst(updateRequest, 'ucp:action/text()'))
        documentParts = xpath(updateRequest, 'srw:record/srw:recordData/document:document/document:part')
        self.assertEquals(2, len(documentParts))

        self.uploader.delete(self.upload)
        updateRequest = XML(self.sentData[1])
        self.assertEquals('some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()'))
        self.assertEquals('info:srw/action/1/delete', xpathFirst(updateRequest, 'ucp:action/text()'))
Exemple #14
0
    def testConcurrentHarvestToSruUpdateBUG(self):
        self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, complete=True)

        self.startHarvester(concurrency=1)

        requestsLogged = sorted(listdir(self.dumpDir))
        repositoryIds = []
        for f in requestsLogged:
            lxml = parse(open(join(self.dumpDir, f)))
            repositoryIds.append(
                xpath(lxml, '//ucp:recordIdentifier/text()')[0].split(':',
                                                                      1)[0])
        self.assertEqual(15, repositoryIds.count(REPOSITORY))
        self.assertEqual(10, repositoryIds.count('repository2'))
        self.assertEqual(10, repositoryIds.count('integrationtest'))
Exemple #15
0
 def __init__(self, response):
     self.response = response
     self.records = xpath(response,
                          '/oai:OAI-PMH/oai:ListRecords/oai:record')
     self.resumptionToken = xpathFirst(
         response,
         '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()') or ''
     self.responseDate = xpathFirst(response,
                                    '/oai:OAI-PMH/oai:responseDate/text()')
     if not self.responseDate is None:  # should be there, happens to be absent for some repositories
         self.responseDate = self.responseDate.strip()
     if not self.responseDate:
         self.responseDate = self._zulu()
     self.selectRecord(xpathFirst(response,
                                  '/oai:OAI-PMH/oai:*/oai:record'))
 def _parseMessage(self, message):
     version = xpath(message, "/srw:updateResponse/srw:version/text()")[0]
     operationStatus = xpath(message, "/srw:updateResponse/ucp:operationStatus/text()")[0]
     diagresult = None
     diagnostics = xpath(message, "/srw:updateResponse/srw:diagnostics/diag:diagnostic")
     if len(diagnostics) > 0:
         diagnostic_uri = xpath(diagnostics[0], "diag:uri/text()")[0]
         diagnostic_details = ''.join(xpath(diagnostics[0], "diag:details/text()"))
         diagnostic_message = ''.join(xpath(diagnostics[0], "diag:message/text()"))
         diagresult = (diagnostic_uri, diagnostic_details, diagnostic_message)
     return version, operationStatus, diagresult
Exemple #17
0
 def testRssForStatusChangesOk(self):
     self.startHarvester(repository=REPOSITORY)
     header, result = getRequest(self.harvesterInternalServerPortNumber, '/running.rss', {'domainId': 'adomain'}, parse='lxml')
     self.assertEquals("Harvest status changes for domain 'adomain'", xpath(result, "/rss/channel/title/text()")[0])
     self.assertEquals("Status changes per repository for domain 'adomain'", xpath(result, "/rss/channel/description/text()")[0])
     self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain", xpath(result, "/rss/channel/link/text()")[0])
     self.assertEquals(str(60 * 6), xpath(result, "/rss/channel/ttl/text()")[0])
     TODAY = strftime("%Y-%m-%d", gmtime())
     items = xpath(result, "/rss/channel/item")
     self.assertEquals(1, len(items))
     self.assertEquals("integrationtest: Ok", ''.join(xpath(items[0], "title/text()")))
     description = ''.join(xpath(items[0], "description/text()"))
     self.assertTrue(description.startswith("Harvest time: %s" % TODAY), description)
     self.assertEquals('integrationtest:%s' % TODAY, ''.join(xpath(items[0], "guid/text()")).split('T')[0])
     self.assertEquals("http://localhost:9999/harvesterStatus.page?domainId=adomain&repositoryId=integrationtest", xpath(items[0], "link/text()")[0])
 def _parseMessage(self, message):
     version = xpath(message, "/srw:updateResponse/srw:version/text()")[0]
     operationStatus = xpath(
         message, "/srw:updateResponse/ucp:operationStatus/text()")[0]
     diagresult = None
     diagnostics = xpath(
         message, "/srw:updateResponse/srw:diagnostics/diag:diagnostic")
     if len(diagnostics) > 0:
         diagnostic_uri = xpath(diagnostics[0], "diag:uri/text()")[0]
         diagnostic_details = ''.join(
             xpath(diagnostics[0], "diag:details/text()"))
         diagnostic_message = ''.join(
             xpath(diagnostics[0], "diag:message/text()"))
         diagresult = (diagnostic_uri, diagnostic_details,
                       diagnostic_message)
     return version, operationStatus, diagresult
Exemple #19
0
    def testConcurrentHarvestToSruUpdate(self):
        self.startHarvester(concurrency=3)

        requestsLogged = sorted(listdir(self.dumpDir))

        repositoryIds = []
        for f in requestsLogged:
            lxml = parse(open(join(self.dumpDir, f)))
            repositoryIds.append(xpath(lxml, '//ucp:recordIdentifier/text()')[0].split(':', 1)[0])

        repositoryIdsSet = set(repositoryIds)
        self.assertEquals(set(['repository2', 'integrationtest', 'harvestertestrepository']), repositoryIdsSet)

        lastSeenRepoId = None
        try:
            for repo in repositoryIds:
                if repo != lastSeenRepoId:
                    repositoryIdsSet.remove(repo)
                    lastSeenRepoId = repo
                    continue
        except KeyError:
            pass
        else:
            self.fail('Records should have been inserted out-of-order.')
Exemple #20
0
 def __init__(self, response):
     self.response = response
     self.records = xpath(response, '/oai:OAI-PMH/oai:ListRecords/oai:record')
     self.resumptionToken = xpathFirst(response, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()') or ''
     self.responseDate = xpathFirst(response, '/oai:OAI-PMH/oai:responseDate/text()').strip()
     self.selectRecord(xpathFirst(response, '/oai:OAI-PMH/oai:*/oai:record'))
Exemple #21
0
 def testRssForNeverHarvestedRepository(self):
     header, result = getRequest(self.harvesterInternalServerPortNumber, '/rss', {'domainId': 'adomain', 'repositoryId': 'repository2'}, parse='lxml')
     self.assertEquals("Harvester status voor repository2", xpath(result, "/rss/channel/title/text()")[0])
     self.assertEquals(0, len(xpath(result, "/rss/channel/item")))