Exemple #1
0
class HarvesterLog(object):
    def __init__(self, stateDir, logDir, name):
        self._name = name
        self._logDir = logDir
        ensureDirectory(logDir)
        ensureDirectory(stateDir)
        self._ids = Ids(stateDir, name)
        self._invalidIds = Ids(stateDir, name + "_invalid")
        self._state = State(stateDir, name)
        self._eventlogger = EventLogger(logDir + '/' + name +'.events')
        self._resetCounts()

    def isCurrentDay(self, date):
        return date.split('T')[0] == self._state.getTime().split()[0]

    def startRepository(self):
        self._resetCounts()
        self._state.markStarted()

    def _resetCounts(self):
        self._harvestedCount = 0
        self._uploadedCount = 0
        self._deletedCount = 0

    def totalIds(self):
        return len(self._ids)

    def totalInvalidIds(self):
        return len(self._invalidIds)

    def eventLogger(self):
        # Should be removed, but is still used in Harvester.
        return self._eventlogger

    def markDeleted(self):
        self._ids.clear()
        self._state.markDeleted()
        self._eventlogger.logSuccess('Harvested/Uploaded/Deleted/Total: 0/0/0/0, Done: Deleted all ids.', id=self._name)

    def endRepository(self, token, responseDate):
        self._state.markHarvested(self.countsSummary(), token, responseDate)
        self._eventlogger.logSuccess('Harvested/Uploaded/Deleted/Total: %s, ResumptionToken: %s' % (self.countsSummary(), token), id=self._name)

    def endWithException(self, exType, exValue, exTb):
        self._state.markException(exType, exValue, self.countsSummary())
        error = '|'.join(str.strip(s) for s in traceback.format_exception(exType, exValue, exTb))
        self._eventlogger.logError(error, id=self._name)

    def countsSummary(self):
        return '%d/%d/%d/%d' % (self._harvestedCount, self._uploadedCount, self._deletedCount, self.totalIds())

    def close(self):
        self._eventlogger.close()
        self._ids.close()
        self._invalidIds.close()
        self._state.close()

    def notifyHarvestedRecord(self, uploadid):
        self._removeFromInvalidData(uploadid)
        self._harvestedCount += 1

    def uploadIdentifier(self, uploadid):
        self._ids.add(uploadid)
        self._uploadedCount += 1

    def deleteIdentifier(self, uploadid):
        self._ids.remove(uploadid)
        self._deletedCount += 1

    def logInvalidData(self, uploadid, message):
        self._invalidIds.add(uploadid)
        filePath = self._invalidDataMessageFilePath(uploadid)
        ensureDirectory(dirname(filePath))
        open(filePath, 'w').write(message)

    def logIgnoredIdentifierWarning(self, uploadid):
        self._eventlogger.logWarning('IGNORED', uploadid)

    def clearInvalidData(self, repositoryId):
        for id in list(self._invalidIds):
            if id.startswith("%s:" % repositoryId):
                self._invalidIds.remove(id)
        rmtree(join(self._logDir, INVALID_DATA_MESSAGES_DIR, repositoryId))

    def hasWork(self, continuousInterval=None):
        if continuousInterval is not None:
            from_ = self._state.from_
            if from_ and 'T' not in from_:
                from_ += "T00:00:00Z"
            return from_ is None or ZuluTime().epoch - ZuluTime(from_).epoch > continuousInterval
        return self._state.token or self._state.from_ is None or not self.isCurrentDay(self._state.from_)

    def state(self):
        return self._state

    def invalidIds(self):
        return list(self._invalidIds)

    def _removeFromInvalidData(self, uploadid):
        self._invalidIds.remove(uploadid)
        invalidDataMessageFilePath = self._invalidDataMessageFilePath(uploadid)
        if isfile(invalidDataMessageFilePath):
            remove(invalidDataMessageFilePath)

    def _invalidDataMessageFilePath(self, uploadid):
        repositoryId, recordId = uploadid.split(":", 1)
        return join(self._logDir, INVALID_DATA_MESSAGES_DIR, escapeFilename(repositoryId), escapeFilename(recordId))