class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL) self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000) self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200) @timeFunction def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows'] logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data)) for slicedData in grouper(data, self.numDocsUploadPerCall): jobIDs = [] archiveDocs = [] for job in slicedData: doc = createArchiverDoc(job) archiveDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiveDocs) # Partial success is not allowed either all the insert is successful or none is if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): archiveIDs = response[0]['ids'] for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) logging.info("...successfully uploaded %d docs", len(jobIDs)) logging.debug("JobIDs uploaded: %s", jobIDs) logging.debug("Archived IDs returned: %s", archiveIDs) else: logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.", response[0]['status'], response[0]['reason']) logging.debug("failed JobIds %s", jobIDs) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s", traceback.format_exc())
class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL) self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000) self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows'] logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data)) for slicedData in grouper(data, self.numDocsUploadPerCall): jobIDs = [] archiveDocs = [] for job in slicedData: doc = createArchiverDoc(job) archiveDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiveDocs) # Partial success is not allowed either all the insert is successful or none is if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): archiveIDs = response[0]['ids'] for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) logging.info("...successfully uploaded %d docs", len(jobIDs)) logging.debug("JobIDs uploaded: %s", jobIDs) logging.debug("Archived IDs returned: %s", archiveIDs) else: logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.", response[0]['status'], response[0]['reason']) logging.debug("failed JobIds %s", jobIDs) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s", traceback.format_exc())
class FWJRDBAPITest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["FWJRDump"] self.testInit = TestInitCouchApp('FWJRAPITest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = 'fwjrdump_t' self.testInit.setupCouch(dbName, *self.couchApps) fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.fwjrAPI = FWJRDBAPI(fwjrDBURL) self.fwjrAPI.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testFWJRDBAPI(self): jobID = 1 retryCount = 0 fwjrDocument = { "_id": "%s-%s" % (jobID, retryCount), "jobid": jobID, "retrycount": retryCount, "archivestatus": "ready", "fwjr": SAMPLE_FWJR, "jobtype": "Merge", "type": "fwjr" } workflow = SAMPLE_FWJR['task'].split('/')[1] self.assertEqual( self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]['id'], fwjrDocument['_id']) self.assertEqual( self.fwjrAPI.getFWJRByArchiveStatus("ready")['rows'][0]['id'], fwjrDocument['_id']) self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), False) self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument['_id']) self.assertEqual( self.fwjrAPI.getFWJRByArchiveStatus("uploaded")['rows'][0]['id'], fwjrDocument['_id']) self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), True) self.assertEqual( self.fwjrAPI.getFWJRWithSkippedFiles()['rows'][0]['value'] ['skippedFiles'], 2)
class FWJRDBAPITest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["FWJRDump"] self.testInit = TestInitCouchApp("FWJRAPITest") self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = "fwjrdump_t" self.testInit.setupCouch(dbName, *self.couchApps) fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.fwjrAPI = FWJRDBAPI(fwjrDBURL) self.fwjrAPI.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testFWJRDBAPI(self): jobID = 1 retryCount = 0 fwjrDocument = { "_id": "%s-%s" % (jobID, retryCount), "jobid": jobID, "retrycount": retryCount, "archivestatus": "ready", "fwjr": SAMPLE_FWJR, "jobtype": "Merge", "type": "fwjr", } workflow = SAMPLE_FWJR["task"].split("/")[1] self.assertEqual(self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]["id"], fwjrDocument["_id"]) self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("ready")["rows"][0]["id"], fwjrDocument["_id"]) self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), False) self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument["_id"]) self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("uploaded")["rows"][0]["id"], fwjrDocument["_id"]) self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), True) self.assertEqual(self.fwjrAPI.getFWJRWithSkippedFiles()["rows"][0]["value"]["skippedFiles"], 2)
class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchiver( self.config.ArchiveDataReporter.WMArchiverURL) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Getting not archived data info from FWRJ db...") data = self.fwjrAPI.getFWJRByArchiveStatus('ready')['rows'] #TODO need to send bulk update update bulk archive jobIDs = [] archiverDocs = [] for job in data: doc = self.wmarchiver.createArchiverDoc( job["id"], job['doc']["fwjr"]) archiverDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiverDocs) # Partial success is not allowed either all the insert is successful of none is successful. if response[0]['status'] == "ok" and len( response[0]['ids']) == len(jobIDs): for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s" % traceback.format_exc())
class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchiver(self.config.ArchiveDataReporter.WMArchiverURL) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Getting not archived data info from FWRJ db...") data = self.fwjrAPI.getFWJRByArchiveStatus('ready')['rows'] #TODO need to send bulk update update bulk archive jobIDs = [] archiverDocs = [] for job in data: doc = self.wmarchiver.createArchiverDoc(job["id"], job['doc']["fwjr"]) archiverDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiverDocs) # Partial success is not allowed either all the insert is successful of none is successful. if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s" % traceback.format_exc())
class FWJRDBAPITest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["FWJRDump"] self.testInit = TestInitCouchApp('FWJRAPITest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = self.schema, useDefault = False) dbName = 'fwjrdump_t' self.testInit.setupCouch(dbName, *self.couchApps) fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.fwjrAPI= FWJRDBAPI(fwjrDBURL) self.fwjrAPI.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testFWJRDBAPI(self): jobID = 1 retryCount = 0 fwjrDocument = {"_id": "%s-%s" % (jobID, retryCount), "jobid": jobID, "retrycount": retryCount, "archivestatus": "ready", "fwjr": SAMPLE_FWJR, "type": "fwjr"} self.assertEqual(self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]['id'], fwjrDocument['_id']) self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("ready")['rows'][0]['id'], fwjrDocument['_id']) self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument['_id']) self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("uploaded")['rows'][0]['id'], fwjrDocument['_id'])