Esempio n. 1
0
class XlsServer(object):
    def __init__(self, name=None):
        self._name = name
        self._repostatus = RepositoryStatus(
            '/var/log/meresco-harvester', '/var/lib/meresco-harvester/state')

    def handleRequest(self,
                      path,
                      port=None,
                      Client=None,
                      Method=None,
                      Headers=None,
                      **kwargs):
        resolvedFileOrDir = self._createXLS(kwargs.get("arguments")['rid'][0])
        if resolvedFileOrDir is None:
            yield httputils.notFoundHtml
            yield '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">\n'
            yield "<html><head>\n"
            yield "<title>404 Not Found</title>\n"
            yield "</head><body>\n"
            yield "<h1>Not Found</h1>\n"
            yield "<p>The requested URL %s was not found on this server.</p>\n" % path
            yield "</body></html>\n"
            return
        yield resolvedFileOrDir.stream()

    def _createXLS(self, repositoryId):

        domainId = 'EduStandaard'
        invalidrecordIds = self._repostatus.invalidRecords(
            domainId, repositoryId)

        config = None
        repofilepath = self._getRepositoryJson(domainId, repositoryId)

        if repofilepath is not None and isfile(repofilepath):
            with open(repofilepath, 'r') as repojsonfile:
                config = json.load(repojsonfile)
            repojsonfile.close()

        if config is not None and invalidrecordIds:

            #Create workbook
            wb = Workbook()
            ws = wb.worksheets[0]

            ws.title = (
                '%s Invald OAI-PMH records' %
                repositoryId)[:30]  #max. string length for worksheet title...

            # Add content:
            max_col = 0
            for idx, recId in enumerate(invalidrecordIds):
                recordId = recId.split(":", 1)[-1]
                if len(recordId) > max_col: max_col = len(recordId)
                etree = self._repostatus.getInvalidRecord(
                    domainId, str(repositoryId), str(recordId))
                diagnostic = etree.xpath(
                    '//diag:diagnostic/diag:details/text()',
                    namespaces={
                        'diag': 'http://www.loc.gov/zing/srw/diagnostic/'
                    })
                basisurl = "%(baseurl)s?verb=GetRecord&identifier=%(oai_id)s&metadataPrefix=%(mdpf)s" % {
                    'baseurl': config['baseurl'],
                    'oai_id': recordId,
                    'mdpf': config['metadataPrefix']
                }
                firstcell = ws.cell(column=1, row=(idx + 1), value=recordId)
                firstcell.hyperlink = "%(baseurl)s?verb=GetRecord&identifier=%(oai_id)s&metadataPrefix=%(mdpf)s" % {
                    'baseurl': config['baseurl'],
                    'oai_id': recordId,
                    'mdpf': config['metadataPrefix']
                }
                ws.cell(column=2, row=(idx + 1), value=diagnostic[0])

            if max_col > 0: ws.column_dimensions['A'].width = max_col

            # Create tempdir to save to and archive to:
            temp_dir = tempfile.mkdtemp()
            xlsPath = join(temp_dir, "%s.xlsx" % repositoryId)
            zipPath = join(temp_dir, "%s.zip" % repositoryId)

            wb.save(xlsPath)

            zf = zipfile.ZipFile(zipPath, mode='w')
            try:
                zf.write(xlsPath, "%s.xlsx" % repositoryId)
            finally:
                zf.close()

            return File(zipPath)

        return None

    def _getRepositoryJson(self, domainId, repositoryId):
        repojsonfile = join(
            '/var/lib/meresco-harvester/data',
            escapeFilename("%s.%s.repository" % (domainId, repositoryId)))
        if not isfile(repojsonfile):
            return None
        return repojsonfile
class RepositoryStatusTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.domainId = "adomain"
        self.stateDir = mkdir(self.tempdir, "state")
        mkdir(self.stateDir, self.domainId)
        self.logDir = mkdir(self.tempdir, "log")
        repoId1LogDir = mkdir(self.logDir, self.domainId, "invalid", "repoId1")
        repoId2LogDir = mkdir(self.logDir, self.domainId, "invalid",
                              escapeFilename("repoId/2"))
        _writeFile(repoId1LogDir,
                   "invalidId1",
                   data="<diagnostic>ERROR1</diagnostic>")
        _writeFile(repoId1LogDir,
                   "invalidId&2",
                   data="<diagnostic>ERROR2</diagnostic>")
        _writeFile(repoId2LogDir,
                   escapeFilename("invalidId/3"),
                   data="<diagnostic>ERROR3</diagnostic>")
        _writeFile(self.stateDir,
                   self.domainId,
                   "repoId1_invalid.ids",
                   data="invalidId1\ninvalidId&2")
        _writeFile(self.stateDir,
                   self.domainId,
                   escapeFilename("repoId/2_invalid.ids"),
                   data="invalidId/3")
        _writeFile(self.stateDir,
                   self.domainId,
                   "repoId3_invalid.ids",
                   data="")
        self.status = RepositoryStatus(self.logDir, self.stateDir)
        observer = CallTrace("HarvesterData")
        observer.returnValues["getRepositoryGroupIds"] = [
            "repoGroupId1", "repoGroupId2"
        ]

        def getRepositoryIds(domainId, repositoryGroupId):
            if repositoryGroupId == "repoGroupId1":
                return ["repoId1", "repoId/2"]
            return ["repoId3", "anotherRepoId"]

        observer.methods["getRepositoryIds"] = getRepositoryIds

        def getRepositoryGroupId(domainId, repositoryId):
            return 'repoGroupId1' if repositoryId in ['repoId1', 'repoId/2'
                                                      ] else 'repoGroupId2'

        observer.methods["getRepositoryGroupId"] = getRepositoryGroupId
        self.status.addObserver(observer)

    def testGetRunningStatesForDomain(self):
        with open(join(self.stateDir, self.domainId, "repoId1.running"),
                  'w') as fp:
            jsonDump(
                {
                    'changedate': "2012-08-14 12:00:00",
                    'status': "Ok",
                    'message': ""
                }, fp)
        with open(join(self.stateDir, self.domainId, "repoId3.running"),
                  'w') as fp:
            jsonDump(
                {
                    'changedate': "2012-08-13 12:00:00",
                    'status': "Error",
                    'message': "an error message"
                }, fp)
        with open(join(self.stateDir, self.domainId, "anotherRepoId.running"),
                  'w') as fp:
            jsonDump(
                {
                    'changedate': "2012-08-16 12:00:00",
                    'status': "Ok",
                    'message': ""
                }, fp)

        expected = [
            {
                'repositoryId': 'anotherRepoId',
                'changedate': "2012-08-16 12:00:00",
                'status': "Ok",
                'message': ""
            },
            {
                'repositoryId': 'repoId1',
                'changedate': "2012-08-14 12:00:00",
                'status': "Ok",
                'message': ""
            },
            {
                'repositoryId': 'repoId3',
                'changedate': "2012-08-13 12:00:00",
                'status': "Error",
                'message': "an error message"
            },
        ]
        self.assertEqual(expected,
                         self.status.getRunningStatesForDomain(self.domainId))

    def testGetStatusForRepoIdAndDomainId(self):
        self.assertEqual([{
            "repositoryId": "repoId1",
            "repositoryGroupId": "repoGroupId1",
            "lastHarvestDate": None,
            "harvested": 0,
            "uploaded": 0,
            "deleted": 0,
            "total": 0,
            "totalerrors": 0,
            "recenterrors": [],
            "invalid": 2,
            "recentinvalids": ["invalidId&2", "invalidId1"],
            "lastHarvestAttempt": None
        }],
                         self.status.getStatus(domainId=self.domainId,
                                               repositoryId="repoId1"))
        self.assertEqual([{
            "repositoryId": "anotherRepoId",
            "repositoryGroupId": "repoGroupId2",
            "lastHarvestDate": None,
            "harvested": 0,
            "uploaded": 0,
            "deleted": 0,
            "total": 0,
            "totalerrors": 0,
            "recenterrors": [],
            "invalid": 0,
            "recentinvalids": [],
            "lastHarvestAttempt": None
        }],
                         self.status.getStatus(domainId=self.domainId,
                                               repositoryId="anotherRepoId"))

    def testGetStatusForDomainIdAndRepositoryGroupId(self):
        self.assertEqual([{
            "repositoryId": "repoId1",
            "repositoryGroupId": "repoGroupId1",
            "lastHarvestDate": None,
            "harvested": 0,
            "uploaded": 0,
            "deleted": 0,
            "total": 0,
            "totalerrors": 0,
            "recenterrors": [],
            "invalid": 2,
            "recentinvalids": ["invalidId&2", "invalidId1"],
            "lastHarvestAttempt": None
        }, {
            "repositoryId": "repoId/2",
            "repositoryGroupId": "repoGroupId1",
            "lastHarvestDate": None,
            "harvested": 0,
            "uploaded": 0,
            "deleted": 0,
            "total": 0,
            "totalerrors": 0,
            "recenterrors": [],
            "invalid": 1,
            "recentinvalids": ['invalidId/3'],
            "lastHarvestAttempt": None
        }],
                         self.status.getStatus(
                             domainId=self.domainId,
                             repositoryGroupId='repoGroupId1'))

    def testGetStatusForDomainId(self):
        self.assertEqual([{
            "repositoryId": "repoId1",
            "repositoryGroupId": "repoGroupId1",
            "lastHarvestDate": None,
            "harvested": 0,
            "uploaded": 0,
            "deleted": 0,
            "total": 0,
            "totalerrors": 0,
            "recenterrors": [],
            "invalid": 2,
            "recentinvalids": ["invalidId&2", "invalidId1"],
            "lastHarvestAttempt": None
        }, {
            "repositoryId": "repoId/2",
            "repositoryGroupId": "repoGroupId1",
            "lastHarvestDate": None,
            "harvested": 0,
            "uploaded": 0,
            "deleted": 0,
            "total": 0,
            "totalerrors": 0,
            "recenterrors": [],
            "invalid": 1,
            "recentinvalids": ['invalidId/3'],
            "lastHarvestAttempt": None
        }, {
            "repositoryId": "repoId3",
            "repositoryGroupId": "repoGroupId2",
            "lastHarvestDate": None,
            "harvested": 0,
            "uploaded": 0,
            "deleted": 0,
            "total": 0,
            "totalerrors": 0,
            "recenterrors": [],
            "invalid": 0,
            "recentinvalids": [],
            "lastHarvestAttempt": None
        }, {
            "repositoryId": "anotherRepoId",
            "repositoryGroupId": "repoGroupId2",
            "lastHarvestDate": None,
            "harvested": 0,
            "uploaded": 0,
            "deleted": 0,
            "total": 0,
            "totalerrors": 0,
            "recenterrors": [],
            "invalid": 0,
            "recentinvalids": [],
            "lastHarvestAttempt": None
        }], self.status.getStatus(domainId=self.domainId))

    def testGetAllInvalidRecords(self):
        def invalidRecords(repoId):
            return list(self.status.invalidRecords(self.domainId, repoId))

        self.assertEqual(["invalidId&2", "invalidId1"],
                         invalidRecords("repoId1"))
        self.assertEqual(["invalidId/3"], invalidRecords("repoId/2"))
        self.assertEqual([], invalidRecords("repoId3"))
        self.assertEqual([], invalidRecords("repoId4"))

    def testGetInvalidRecord(self):
        def getInvalidRecord(repoId, recordId):
            return tostring(self.status.getInvalidRecord(
                self.domainId, repoId, recordId),
                            encoding=str)

        self.assertEqual("<diagnostic>ERROR1</diagnostic>",
                         getInvalidRecord("repoId1", "invalidId1"))
        self.assertEqual("<diagnostic>ERROR2</diagnostic>",
                         getInvalidRecord("repoId1", "invalidId&2"))
        self.assertEqual("<diagnostic>ERROR3</diagnostic>",
                         getInvalidRecord("repoId/2", "invalidId/3"))

    def testRecentInvalidsOnlyGives10InCaseOfManyMoreInvalids(self):
        with open(join(self.stateDir, self.domainId, "repoId1_invalid.ids"),
                  'w') as f:
            for i in range(20):
                f.write("invalidId%d\n" % i)
        status = self.status.getStatus(domainId=self.domainId,
                                       repositoryId="repoId1")
        self.assertEqual(20, status[0]['invalid'])
        self.assertEqual(10, len(status[0]['recentinvalids']))

    def testSucces(self):
        logLine = '\t'.join([
            '[2006-03-13 12:13:14]', 'SUCCES', 'repoId1',
            'Harvested/Uploaded/Deleted/Total: 200/199/1/1542, ResumptionToken: None'
        ])
        _writeFile(self.logDir, self.domainId, 'repoId1.events', data=logLine)
        state = self.status._parseEventsFile(domainId=self.domainId,
                                             repositoryId='repoId1')
        self.assertEqual('2006-03-13T12:13:14Z', state["lastHarvestDate"])
        self.assertEqual('200', state["harvested"])
        self.assertEqual('199', state["uploaded"])
        self.assertEqual('1', state["deleted"])
        self.assertEqual('1542', state["total"])
        self.assertEqual(0, state["totalerrors"])
        self.assertEqual([], state["recenterrors"])

    def testOnlyErrors(self):
        logLine = '\t'.join([
            '[2006-03-11 12:13:14]', 'ERROR', 'repoId1',
            'Sorry, but the VM has crashed.'
        ])
        _writeFile(self.logDir, self.domainId, 'repoId1.events', data=logLine)
        state = self.status._parseEventsFile(domainId=self.domainId,
                                             repositoryId='repoId1')
        self.assertTrue("lastHarvestDate" not in state, list(state.keys()))
        self.assertTrue("harvested" not in state, list(state.keys()))
        self.assertTrue("uploaded" not in state, list(state.keys()))
        self.assertTrue("deleted" not in state, list(state.keys()))
        self.assertTrue("total" not in state, list(state.keys()))
        self.assertEqual(1, state["totalerrors"])
        self.assertEqual("2006-03-11T12:13:14Z", state["lastHarvestAttempt"])
        self.assertEqual(
            [('2006-03-11T12:13:14Z', 'Sorry, but the VM has crashed.')],
            state["recenterrors"])

    def testTwoErrors(self):
        logLine1 = '\t'.join([
            '[2006-03-11 12:13:14]', 'ERROR', 'repoId1',
            'Sorry, but the VM has crashed.'
        ])
        logLine2 = '\t'.join([
            '[2006-03-11 12:14:14]', 'ERROR', 'repoId1',
            'java.lang.NullPointerException.'
        ])
        _writeFile(self.logDir,
                   self.domainId,
                   'repoId1.events',
                   data=logLine1 + "\n" + logLine2)
        state = self.status._parseEventsFile(domainId=self.domainId,
                                             repositoryId='repoId1')
        self.assertEqual(2, state["totalerrors"])
        self.assertEqual("2006-03-11T12:14:14Z", state["lastHarvestAttempt"])
        self.assertEqual(
            [('2006-03-11T12:14:14Z', 'java.lang.NullPointerException.'),
             ('2006-03-11T12:13:14Z', 'Sorry, but the VM has crashed.')],
            state["recenterrors"])

    def testErrorAfterSucces(self):
        logLine1 = '\t'.join([
            '[2006-03-11 12:13:14]', 'SUCCES', 'repoId1',
            'Harvested/Uploaded/Deleted/Total: 200/199/1/1542, ResumptionToken: abcdef'
        ])
        logLine2 = '\t'.join([
            '[2006-03-11 12:14:14]', 'ERROR', 'repoId1',
            'java.lang.NullPointerException.'
        ])
        _writeFile(self.logDir,
                   self.domainId,
                   'repoId1.events',
                   data=logLine1 + "\n" + logLine2)
        state = self.status._parseEventsFile(domainId=self.domainId,
                                             repositoryId='repoId1')
        self.assertEqual("2006-03-11T12:13:14Z", state["lastHarvestDate"])
        self.assertEqual("200", state["harvested"])
        self.assertEqual("199", state["uploaded"])
        self.assertEqual("1", state["deleted"])
        self.assertEqual("1542", state["total"])
        self.assertEqual(1, state["totalerrors"])
        self.assertEqual("2006-03-11T12:14:14Z", state["lastHarvestAttempt"])
        self.assertEqual(
            [('2006-03-11T12:14:14Z', 'java.lang.NullPointerException.')],
            state["recenterrors"])

    def testErrorBeforeSucces(self):
        logLine1 = '\t'.join([
            '[2006-03-11 12:13:14]', 'ERROR', 'repoId1',
            'java.lang.NullPointerException.'
        ])
        logLine2 = '\t'.join([
            '[2006-03-11 12:14:14]', 'SUCCES', 'repoId1',
            'Harvested/Uploaded/Deleted/Total: 200/199/1/1542, ResumptionToken: abcdef'
        ])
        _writeFile(self.logDir,
                   self.domainId,
                   'repoId1.events',
                   data=logLine1 + "\n" + logLine2)
        state = self.status._parseEventsFile(domainId=self.domainId,
                                             repositoryId='repoId1')
        self.assertEqual("2006-03-11T12:14:14Z", state["lastHarvestDate"])
        self.assertEqual("200", state["harvested"])
        self.assertEqual("199", state["uploaded"])
        self.assertEqual("1", state["deleted"])
        self.assertEqual("1542", state["total"])
        self.assertEqual(0, state["totalerrors"])
        self.assertEqual([], state["recenterrors"])
        self.assertEqual("2006-03-11T12:14:14Z", state["lastHarvestAttempt"])

    def testLotOfErrors(self):
        with open(join(self.logDir, self.domainId, 'repoId1.events'),
                  'w') as f:
            for i in range(20):
                logLine = '\t'.join([
                    '[2006-03-11 12:%.2d:14]' % i, 'ERROR', 'repoId1',
                    'Error %d, Crash' % i
                ])
                f.write(logLine + "\n")
        state = self.status._parseEventsFile(domainId=self.domainId,
                                             repositoryId='repoId1')
        self.assertEqual(20, state["totalerrors"])
        self.assertEqual(10, len(state["recenterrors"]))
        self.assertEqual([('2006-03-11T12:19:14Z', 'Error 19, Crash'),
                          ('2006-03-11T12:18:14Z', 'Error 18, Crash'),
                          ('2006-03-11T12:17:14Z', 'Error 17, Crash'),
                          ('2006-03-11T12:16:14Z', 'Error 16, Crash'),
                          ('2006-03-11T12:15:14Z', 'Error 15, Crash'),
                          ('2006-03-11T12:14:14Z', 'Error 14, Crash'),
                          ('2006-03-11T12:13:14Z', 'Error 13, Crash'),
                          ('2006-03-11T12:12:14Z', 'Error 12, Crash'),
                          ('2006-03-11T12:11:14Z', 'Error 11, Crash'),
                          ('2006-03-11T12:10:14Z', 'Error 10, Crash')],
                         state["recenterrors"])

    def testIntegration(self):
        _writeFile(
            self.logDir,
            self.domainId,
            'repoId1.events',
            data="""[2005-08-20 20:00:00.456]\tERROR\t[repositoryId]\tError 1
[2005-08-21 20:00:00.456]\tSUCCES\t[repositoryId]\tHarvested/Uploaded/Deleted/Total: 4/3/2/10
[2005-08-22 00:00:00.456]\tSUCCES\t[repositoryId]\tHarvested/Uploaded/Deleted/Total: 8/4/3/16
[2005-08-22 20:00:00.456]\tERROR\t[repositoryId]\tError 2
[2005-08-23 20:00:00.456]\tERROR\t[repositoryId]\tError 3
[2005-08-23 20:00:01.456]\tERROR\t[repositoryId]\tError 4
[2005-08-23 20:00:02.456]\tERROR\t[repositoryId]\tError 5
[2005-08-24 00:00:00.456]\tSUCCES\t[repositoryId]\tHarvested/Uploaded/Deleted/Total: 8/4/3/20
[2005-08-24 20:00:00.456]\tERROR\t[repositoryId]\tError With Scary Characters < & > " '
""")
        self.assertEqual([{
            "repositoryId":
            "repoId1",
            "repositoryGroupId":
            "repoGroupId1",
            "lastHarvestDate":
            "2005-08-24T00:00:00Z",
            "harvested":
            8,
            "uploaded":
            4,
            "deleted":
            3,
            "total":
            20,
            "totalerrors":
            1,
            "recenterrors": [
                dict(date="2005-08-24T20:00:00Z",
                     error='Error With Scary Characters < & > " \'')
            ],
            "invalid":
            2,
            "recentinvalids": ['invalidId&2', 'invalidId1'],
            "lastHarvestAttempt":
            "2005-08-24T20:00:00Z"
        }],
                         self.status.getStatus(domainId=self.domainId,
                                               repositoryId='repoId1'))
class RepositoryStatusTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.stateDir = join(self.tempdir, "state")
        self.logDir = join(self.tempdir, "log")
        self.domainId = "adomain"
        makedirs(join(self.stateDir, self.domainId))
        repoId1LogDir = join(self.logDir, self.domainId, "invalid", "repoId1")
        repoId2LogDir = join(self.logDir, self.domainId, "invalid", escapeFilename("repoId/2"))
        makedirs(repoId1LogDir)
        makedirs(repoId2LogDir)
        open(join(repoId1LogDir, "invalidId1"), 'w').write("<diagnostic>ERROR1</diagnostic>")
        open(join(repoId1LogDir, "invalidId&2"), 'w').write("<diagnostic>ERROR2</diagnostic>")
        open(join(repoId2LogDir, escapeFilename("invalidId/3")), 'w').write("<diagnostic>ERROR3</diagnostic>")
        open(join(self.stateDir, self.domainId, "repoId1_invalid.ids"), 'w').write("invalidId1\ninvalidId&2")
        open(join(self.stateDir, self.domainId, escapeFilename("repoId/2_invalid.ids")), 'w').write("invalidId/3")
        open(join(self.stateDir, self.domainId, "repoId3_invalid.ids"), 'w').write("")
        self.status = RepositoryStatus(self.logDir, self.stateDir)
        observer = CallTrace("HarvesterData")
        observer.returnValues["getRepositoryGroupIds"] = ["repoGroupId1", "repoGroupId2"]
        def getRepositoryIds(domainId, repositoryGroupId):
            if repositoryGroupId == "repoGroupId1":
                return ["repoId1", "repoId/2"]
            return ["repoId3", "anotherRepoId"]
        observer.methods["getRepositoryIds"] = getRepositoryIds
        def getRepositoryGroupId(domainId, repositoryId):
            return 'repoGroupId1' if repositoryId in ['repoId1', 'repoId/2'] else 'repoGroupId2'
        observer.methods["getRepositoryGroupId"] = getRepositoryGroupId
        self.status.addObserver(observer)

    def testGetRunningStatesForDomain(self):
        jsonDump(
                {'changedate': "2012-08-14 12:00:00",'status': "Ok", 'message': ""},
                open(join(self.stateDir, self.domainId, "repoId1.running"), 'w')
        )
        jsonDump(
                {'changedate': "2012-08-13 12:00:00",'status': "Error", 'message': "an error message"},
                open(join(self.stateDir, self.domainId, "repoId3.running"), 'w')
        )
        jsonDump(
                {'changedate': "2012-08-16 12:00:00",'status': "Ok", 'message': ""},
                open(join(self.stateDir, self.domainId, "anotherRepoId.running"), 'w')
        )

        expected = [
            {'repositoryId': 'anotherRepoId', 'changedate': "2012-08-16 12:00:00",'status': "Ok", 'message': ""},
            {'repositoryId': 'repoId1', 'changedate': "2012-08-14 12:00:00",'status': "Ok", 'message': ""},
            {'repositoryId': 'repoId3', 'changedate': "2012-08-13 12:00:00",'status': "Error", 'message': "an error message"},
        ]
        self.assertEquals(expected, self.status.getRunningStatesForDomain(self.domainId))


    def testGetStatusForRepoIdAndDomainId(self):
        self.assertEquals([{
                        "repositoryId": "repoId1",
                        "repositoryGroupId": "repoGroupId1",
                        "lastHarvestDate": None,
                        "harvested": 0,
                        "uploaded": 0,
                        "deleted": 0,
                        "total": 0,
                        "totalerrors": 0,
                        "recenterrors": [],
                        "invalid": 2,
                        "recentinvalids": [
                            "invalidId&2",
                            "invalidId1"
                        ],
                        "lastHarvestAttempt": None
                    }], self.status.getStatus(domainId=self.domainId, repositoryId="repoId1"))
        self.assertEquals([{
                        "repositoryId": "anotherRepoId",
                        "repositoryGroupId": "repoGroupId2",
                        "lastHarvestDate": None,
                        "harvested": 0,
                        "uploaded": 0,
                        "deleted": 0,
                        "total": 0,
                        "totalerrors": 0,
                        "recenterrors": [],
                        "invalid": 0,
                        "recentinvalids": [],
                        "lastHarvestAttempt": None
                    }], self.status.getStatus(domainId=self.domainId, repositoryId="anotherRepoId"))

    def testGetStatusForDomainIdAndRepositoryGroupId(self):
        self.assertEquals([{
                        "repositoryId": "repoId1",
                        "repositoryGroupId": "repoGroupId1",
                        "lastHarvestDate": None,
                        "harvested": 0,
                        "uploaded": 0,
                        "deleted": 0,
                        "total": 0,
                        "totalerrors": 0,
                        "recenterrors": [],
                        "invalid": 2,
                        "recentinvalids": [
                            "invalidId&2",
                            "invalidId1"
                        ],
                        "lastHarvestAttempt": None
                    }, {
                        "repositoryId": "repoId/2",
                        "repositoryGroupId": "repoGroupId1",
                        "lastHarvestDate": None,
                        "harvested": 0,
                        "uploaded": 0,
                        "deleted": 0,
                        "total": 0,
                        "totalerrors": 0,
                        "recenterrors": [],
                        "invalid": 1,
                        "recentinvalids": ['invalidId/3'],
                        "lastHarvestAttempt": None
                    }], self.status.getStatus(domainId=self.domainId, repositoryGroupId='repoGroupId1'))

    def testGetStatusForDomainId(self):
        self.assertEquals([{
                        "repositoryId": "repoId1",
                        "repositoryGroupId": "repoGroupId1",
                        "lastHarvestDate": None,
                        "harvested": 0,
                        "uploaded": 0,
                        "deleted": 0,
                        "total": 0,
                        "totalerrors": 0,
                        "recenterrors": [],
                        "invalid": 2,
                        "recentinvalids": [
                            "invalidId&2",
                            "invalidId1"
                        ],
                        "lastHarvestAttempt": None
                    }, {
                        "repositoryId": "repoId/2",
                        "repositoryGroupId": "repoGroupId1",
                        "lastHarvestDate": None,
                        "harvested": 0,
                        "uploaded": 0,
                        "deleted": 0,
                        "total": 0,
                        "totalerrors": 0,
                        "recenterrors": [],
                        "invalid": 1,
                        "recentinvalids": ['invalidId/3'],
                        "lastHarvestAttempt": None
                    }, {
                        "repositoryId": "repoId3",
                        "repositoryGroupId": "repoGroupId2",
                        "lastHarvestDate": None,
                        "harvested": 0,
                        "uploaded": 0,
                        "deleted": 0,
                        "total": 0,
                        "totalerrors": 0,
                        "recenterrors": [],
                        "invalid": 0,
                        "recentinvalids": [],
                        "lastHarvestAttempt": None
                    }, {
                        "repositoryId": "anotherRepoId",
                        "repositoryGroupId": "repoGroupId2",
                        "lastHarvestDate": None,
                        "harvested": 0,
                        "uploaded": 0,
                        "deleted": 0,
                        "total": 0,
                        "totalerrors": 0,
                        "recenterrors": [],
                        "invalid": 0,
                        "recentinvalids": [],
                        "lastHarvestAttempt": None
                    }], self.status.getStatus(domainId=self.domainId))

    def testGetAllInvalidRecords(self):
        def invalidRecords(repoId):
            return list(self.status.invalidRecords(self.domainId, repoId))
        self.assertEquals(["invalidId&2", "invalidId1"], invalidRecords("repoId1"))
        self.assertEquals(["invalidId/3"], invalidRecords("repoId/2"))
        self.assertEquals([], invalidRecords("repoId3"))
        self.assertEquals([], invalidRecords("repoId4"))

    def testGetInvalidRecord(self):
        def getInvalidRecord(repoId, recordId):
            return tostring(self.status.getInvalidRecord(self.domainId, repoId, recordId))
        self.assertEquals("<diagnostic>ERROR1</diagnostic>", getInvalidRecord("repoId1", "invalidId1"))
        self.assertEquals("<diagnostic>ERROR2</diagnostic>", getInvalidRecord("repoId1", "invalidId&2"))
        self.assertEquals("<diagnostic>ERROR3</diagnostic>", getInvalidRecord("repoId/2", "invalidId/3"))

    def testRecentInvalidsOnlyGives10InCaseOfManyMoreInvalids(self):
        with open(join(self.stateDir, self.domainId, "repoId1_invalid.ids"), 'w') as f:
            for i in range(20):
                f.write("invalidId%d\n" % i)
        status = self.status.getStatus(domainId=self.domainId, repositoryId="repoId1")
        self.assertEquals(20, status[0]['invalid'])
        self.assertEquals(10, len(status[0]['recentinvalids']))

    def testSucces(self):
        logLine = '\t'.join(['[2006-03-13 12:13:14]', 'SUCCES', 'repoId1', 'Harvested/Uploaded/Deleted/Total: 200/199/1/1542, ResumptionToken: None'])
        open(join(self.logDir, self.domainId, 'repoId1.events'), 'w').write(logLine)
        state = self.status._parseEventsFile(domainId=self.domainId, repositoryId='repoId1')
        self.assertEquals('2006-03-13T12:13:14Z', state["lastHarvestDate"])
        self.assertEquals('200', state["harvested"])
        self.assertEquals('199', state["uploaded"])
        self.assertEquals('1', state["deleted"])
        self.assertEquals('1542', state["total"])
        self.assertEquals(0, state["totalerrors"])
        self.assertEquals([], state["recenterrors"])

    def testOnlyErrors(self):
        logLine = '\t'.join(['[2006-03-11 12:13:14]', 'ERROR', 'repoId1', 'Sorry, but the VM has crashed.'])
        open(join(self.logDir, self.domainId, 'repoId1.events'), 'w').write(logLine)
        state = self.status._parseEventsFile(domainId=self.domainId, repositoryId='repoId1')
        self.assertTrue("lastHarvestDate" not in state, state.keys())
        self.assertTrue("harvested" not in state, state.keys())
        self.assertTrue("uploaded" not in state, state.keys())
        self.assertTrue("deleted" not in state, state.keys())
        self.assertTrue("total" not in state, state.keys())
        self.assertEquals(1, state["totalerrors"])
        self.assertEquals("2006-03-11T12:13:14Z", state["lastHarvestAttempt"])
        self.assertEquals([('2006-03-11T12:13:14Z','Sorry, but the VM has crashed.')], state["recenterrors"])

    def testTwoErrors(self):
        logLine1 = '\t'.join(['[2006-03-11 12:13:14]', 'ERROR', 'repoId1', 'Sorry, but the VM has crashed.'])
        logLine2 = '\t'.join(['[2006-03-11 12:14:14]', 'ERROR', 'repoId1', 'java.lang.NullPointerException.'])
        open(join(self.logDir, self.domainId, 'repoId1.events'), 'w').write(logLine1 + "\n" + logLine2)
        state = self.status._parseEventsFile(domainId=self.domainId, repositoryId='repoId1')
        self.assertEquals(2, state["totalerrors"])
        self.assertEquals("2006-03-11T12:14:14Z", state["lastHarvestAttempt"])
        self.assertEquals([('2006-03-11T12:14:14Z', 'java.lang.NullPointerException.'), ('2006-03-11T12:13:14Z','Sorry, but the VM has crashed.')], state["recenterrors"])

    def testErrorAfterSucces(self):
        logLine1 = '\t'.join(['[2006-03-11 12:13:14]', 'SUCCES', 'repoId1', 'Harvested/Uploaded/Deleted/Total: 200/199/1/1542, ResumptionToken: abcdef'])
        logLine2 = '\t'.join(['[2006-03-11 12:14:14]', 'ERROR', 'repoId1', 'java.lang.NullPointerException.'])
        open(join(self.logDir, self.domainId, 'repoId1.events'), 'w').write(logLine1 + "\n" + logLine2)
        state = self.status._parseEventsFile(domainId=self.domainId, repositoryId='repoId1')
        self.assertEquals("2006-03-11T12:13:14Z", state["lastHarvestDate"])
        self.assertEquals("200", state["harvested"])
        self.assertEquals("199", state["uploaded"])
        self.assertEquals("1", state["deleted"])
        self.assertEquals("1542", state["total"])
        self.assertEquals(1, state["totalerrors"])
        self.assertEquals("2006-03-11T12:14:14Z", state["lastHarvestAttempt"])
        self.assertEquals([('2006-03-11T12:14:14Z', 'java.lang.NullPointerException.')], state["recenterrors"])

    def testErrorBeforeSucces(self):
        logLine1 = '\t'.join(['[2006-03-11 12:13:14]', 'ERROR', 'repoId1', 'java.lang.NullPointerException.'])
        logLine2 = '\t'.join(['[2006-03-11 12:14:14]', 'SUCCES', 'repoId1', 'Harvested/Uploaded/Deleted/Total: 200/199/1/1542, ResumptionToken: abcdef'])
        open(join(self.logDir, self.domainId, 'repoId1.events'), 'w').write(logLine1 + "\n" + logLine2)
        state = self.status._parseEventsFile(domainId=self.domainId, repositoryId='repoId1')
        self.assertEquals("2006-03-11T12:14:14Z", state["lastHarvestDate"])
        self.assertEquals("200", state["harvested"])
        self.assertEquals("199", state["uploaded"])
        self.assertEquals("1", state["deleted"])
        self.assertEquals("1542", state["total"])
        self.assertEquals(0, state["totalerrors"])
        self.assertEquals([], state["recenterrors"])
        self.assertEquals("2006-03-11T12:14:14Z", state["lastHarvestAttempt"])

    def testLotOfErrors(self):
        with open(join(self.logDir, self.domainId, 'repoId1.events'), 'w') as f:
            for i in range(20):
                logLine = '\t'.join(['[2006-03-11 12:%.2d:14]' % i, 'ERROR', 'repoId1', 'Error %d, Crash' % i])
                f.write(logLine + "\n")
        state = self.status._parseEventsFile(domainId=self.domainId, repositoryId='repoId1')
        self.assertEquals(20, state["totalerrors"])
        self.assertEquals(10, len(state["recenterrors"]))
        self.assertEquals([('2006-03-11T12:19:14Z', 'Error 19, Crash'), ('2006-03-11T12:18:14Z', 'Error 18, Crash'), ('2006-03-11T12:17:14Z', 'Error 17, Crash'), ('2006-03-11T12:16:14Z', 'Error 16, Crash'), ('2006-03-11T12:15:14Z', 'Error 15, Crash'), ('2006-03-11T12:14:14Z', 'Error 14, Crash'), ('2006-03-11T12:13:14Z', 'Error 13, Crash'), ('2006-03-11T12:12:14Z', 'Error 12, Crash'), ('2006-03-11T12:11:14Z', 'Error 11, Crash'), ('2006-03-11T12:10:14Z', 'Error 10, Crash')], state["recenterrors"])

    def testIntegration(self):
        open(join(self.logDir, self.domainId, 'repoId1.events'), 'w').write("""[2005-08-20 20:00:00.456]\tERROR\t[repositoryId]\tError 1
[2005-08-21 20:00:00.456]\tSUCCES\t[repositoryId]\tHarvested/Uploaded/Deleted/Total: 4/3/2/10
[2005-08-22 00:00:00.456]\tSUCCES\t[repositoryId]\tHarvested/Uploaded/Deleted/Total: 8/4/3/16
[2005-08-22 20:00:00.456]\tERROR\t[repositoryId]\tError 2
[2005-08-23 20:00:00.456]\tERROR\t[repositoryId]\tError 3
[2005-08-23 20:00:01.456]\tERROR\t[repositoryId]\tError 4
[2005-08-23 20:00:02.456]\tERROR\t[repositoryId]\tError 5
[2005-08-24 00:00:00.456]\tSUCCES\t[repositoryId]\tHarvested/Uploaded/Deleted/Total: 8/4/3/20
[2005-08-24 20:00:00.456]\tERROR\t[repositoryId]\tError With Scary Characters < & > " '
""")
        self.assertEquals([{
                "repositoryId": "repoId1",
                "repositoryGroupId": "repoGroupId1",
                "lastHarvestDate": "2005-08-24T00:00:00Z",
                "harvested": 8,
                "uploaded": 4,
                "deleted": 3,
                "total": 20,
                "totalerrors": 1,
                "recenterrors": [dict(date="2005-08-24T20:00:00Z", error='Error With Scary Characters < & > " \'')],
                "invalid": 2,
                "recentinvalids": ['invalidId&2', 'invalidId1'],
                "lastHarvestAttempt": "2005-08-24T20:00:00Z"
            }], self.status.getStatus(domainId=self.domainId, repositoryId='repoId1'))