def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL) self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000) self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200)
class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL) self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000) self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200) @timeFunction def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows'] logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data)) for slicedData in grouper(data, self.numDocsUploadPerCall): jobIDs = [] archiveDocs = [] for job in slicedData: doc = createArchiverDoc(job) archiveDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiveDocs) # Partial success is not allowed either all the insert is successful or none is if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): archiveIDs = response[0]['ids'] for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) logging.info("...successfully uploaded %d docs", len(jobIDs)) logging.debug("JobIDs uploaded: %s", jobIDs) logging.debug("Archived IDs returned: %s", archiveIDs) else: logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.", response[0]['status'], response[0]['reason']) logging.debug("failed JobIds %s", jobIDs) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s", traceback.format_exc())
class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL) self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000) self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows'] logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data)) for slicedData in grouper(data, self.numDocsUploadPerCall): jobIDs = [] archiveDocs = [] for job in slicedData: doc = createArchiverDoc(job) archiveDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiveDocs) # Partial success is not allowed either all the insert is successful or none is if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): archiveIDs = response[0]['ids'] for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) logging.info("...successfully uploaded %d docs", len(jobIDs)) logging.debug("JobIDs uploaded: %s", jobIDs) logging.debug("Archived IDs returned: %s", archiveIDs) else: logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.", response[0]['status'], response[0]['reason']) logging.debug("failed JobIds %s", jobIDs) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s", traceback.format_exc())
def main(): """ Main loop """ signal.signal(signal.SIGINT, quit) signal.signal(signal.SIGTERM, quit) loadConf() addFileToLog() logger = logging.getLogger() logger.info("Starting main loop") wmarchiver = WMArchive(WMARCHIVE_URL, {'pycurl' : True, "key" : UPLOAD_KEY, "cert" : UPLOAD_KEY}) while not QUIT: reports = os.listdir(os.path.join(BASE_DIR, NEW_FJR_DIR)) currentReps = sorted(reports[:BULK_SIZE]) logger.debug("Current reports are %s" % currentReps) docs = [] if docs: for rep in currentReps: repFullname = os.path.join(BASE_DIR, NEW_FJR_DIR, rep) with open(repFullname) as fd: docs.append(json.load(fd)) response = wmarchiver.archiveData(docs) # Partial success is not allowed either all the insert is successful or none is if response[0]['status'] == "ok" and len(response[0]['ids']) == len(docs): logger.info("Successfully uploaded %d docs", len(docs)) for rep in currentReps: repFullname = os.path.join(BASE_DIR, NEW_FJR_DIR, rep) repDestName = os.path.join(BASE_DIR, PROCESSED_FJR_DIR, rep) os.rename(repFullname, repDestName) else: logger.warning("Upload failed and it will be retried in the next cycle: %s: %s.", response[0]['status'], response[0]['reason']) else: time.sleep(60)
def run(self): """ Main loop """ signal.signal(signal.SIGTERM, self.quit) logger = logging.getLogger() logger.info("Starting main loop") wmarchiver = WMArchive(self.wmarchiveURL, { 'pycurl': True, "key": self.uploadKey, "cert": self.uploadCert }) while not self.stopFlag: reports = os.listdir(os.path.join(self.baseDir, self.newFjrDir)) currentReps = sorted(reports[:self.bulksize]) logger.debug("Current reports are %s", currentReps) docs = [] if currentReps: for rep in currentReps: repFullname = os.path.join(self.baseDir, self.newFjrDir, rep) with open(repFullname) as fd: #Some params have to be int, see https://github.com/dmwm/CRABServer/issues/5578 #TODO Remove the following 4 lines once we are sure old task are not in the system tmpdoc = json.load(fd) for step in tmpdoc["steps"]: for key in ('NumberOfThreads', 'NumberOfStreams'): if key in step["performance"]["cpu"]: step["performance"]["cpu"][key] = int( float(step["performance"]["cpu"][key])) for key in ('TotalInitTime', 'TotalInitCPU'): if key in step["performance"]["cpu"]: step["performance"]["cpu"][key] = float( step["performance"]["cpu"][key]) docs.append(tmpdoc) try: response = wmarchiver.archiveData(docs) except (pycurl.error, HTTPException, WMException) as e: logger.error("Error uploading docs: %s", e) time.sleep(60) continue # Partial success is not allowed either all the insert is successful or none is if response and response[0]['status'] == "ok" and len( response[0]['ids']) == len(docs): logger.info("Successfully uploaded %d docs", len(docs)) for rep in currentReps: repFullname = os.path.join(self.baseDir, self.newFjrDir, rep) repDestName = os.path.join(self.baseDir, self.processedFjrDir, self.checkIfFolderExists(), rep) os.rename(repFullname, repDestName) else: logger.warning( "Upload failed and it will be retried in the next cycle: %s: %s.", response[0]['status'], response[0]['reason']) time.sleep(60) else: time.sleep(60) logger.info("Exiting")