class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL) self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000) self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200) @timeFunction def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows'] logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data)) for slicedData in grouper(data, self.numDocsUploadPerCall): jobIDs = [] archiveDocs = [] for job in slicedData: doc = createArchiverDoc(job) archiveDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiveDocs) # Partial success is not allowed either all the insert is successful or none is if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): archiveIDs = response[0]['ids'] for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) logging.info("...successfully uploaded %d docs", len(jobIDs)) logging.debug("JobIDs uploaded: %s", jobIDs) logging.debug("Archived IDs returned: %s", archiveIDs) else: logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.", response[0]['status'], response[0]['reason']) logging.debug("failed JobIds %s", jobIDs) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s", traceback.format_exc())
class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL) self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000) self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows'] logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data)) for slicedData in grouper(data, self.numDocsUploadPerCall): jobIDs = [] archiveDocs = [] for job in slicedData: doc = createArchiverDoc(job) archiveDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiveDocs) # Partial success is not allowed either all the insert is successful or none is if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): archiveIDs = response[0]['ids'] for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) logging.info("...successfully uploaded %d docs", len(jobIDs)) logging.debug("JobIDs uploaded: %s", jobIDs) logging.debug("Archived IDs returned: %s", archiveIDs) else: logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.", response[0]['status'], response[0]['reason']) logging.debug("failed JobIds %s", jobIDs) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s", traceback.format_exc())
def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) fwjrDBname = "%s/fwjrs" % self.dbName self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False) self.summaryLevel = summaryLevel
def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchiver(self.config.ArchiveDataReporter.WMArchiverURL) self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000) self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200)
def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchiver( self.config.ArchiveDataReporter.WMArchiverURL)
class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchiver( self.config.ArchiveDataReporter.WMArchiverURL) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Getting not archived data info from FWRJ db...") data = self.fwjrAPI.getFWJRByArchiveStatus('ready')['rows'] #TODO need to send bulk update update bulk archive jobIDs = [] archiverDocs = [] for job in data: doc = self.wmarchiver.createArchiverDoc( job["id"], job['doc']["fwjr"]) archiverDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiverDocs) # Partial success is not allowed either all the insert is successful of none is successful. if response[0]['status'] == "ok" and len( response[0]['ids']) == len(jobIDs): for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s" % traceback.format_exc())
class FWJRDBAPITest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["FWJRDump"] self.testInit = TestInitCouchApp('FWJRAPITest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = 'fwjrdump_t' self.testInit.setupCouch(dbName, *self.couchApps) fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.fwjrAPI = FWJRDBAPI(fwjrDBURL) self.fwjrAPI.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testFWJRDBAPI(self): jobID = 1 retryCount = 0 fwjrDocument = { "_id": "%s-%s" % (jobID, retryCount), "jobid": jobID, "retrycount": retryCount, "archivestatus": "ready", "fwjr": SAMPLE_FWJR, "type": "fwjr" } self.assertEqual( self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]['id'], fwjrDocument['_id']) self.assertEqual( self.fwjrAPI.getFWJRByArchiveStatus("ready")['rows'][0]['id'], fwjrDocument['_id']) self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument['_id']) self.assertEqual( self.fwjrAPI.getFWJRByArchiveStatus("uploaded")['rows'][0]['id'], fwjrDocument['_id'])
class ArchiveDataPoller(BaseWorkerThread): """ Gather fwjr data and update to archiver\ """ def __init__(self, config): """ initialize properties specified from config """ BaseWorkerThread.__init__(self) self.config = config def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchiver(self.config.ArchiveDataReporter.WMArchiverURL) def algorithm(self, parameters): """ get information from wmbs, workqueue and local couch """ try: logging.info("Getting not archived data info from FWRJ db...") data = self.fwjrAPI.getFWJRByArchiveStatus('ready')['rows'] #TODO need to send bulk update update bulk archive jobIDs = [] archiverDocs = [] for job in data: doc = self.wmarchiver.createArchiverDoc(job["id"], job['doc']["fwjr"]) archiverDocs.append(doc) jobIDs.append(job["id"]) response = self.wmarchiver.archiveData(archiverDocs) # Partial success is not allowed either all the insert is successful of none is successful. if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs): for docID in jobIDs: self.fwjrAPI.updateArchiveUploadedStatus(docID) except Exception as ex: logging.error("Error occurred, will retry later:") logging.error(str(ex)) logging.error("Trace back: \n%s" % traceback.format_exc())
def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["FWJRDump"] self.testInit = TestInitCouchApp('FWJRAPITest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = 'fwjrdump_t' self.testInit.setupCouch(dbName, *self.couchApps) fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.fwjrAPI = FWJRDBAPI(fwjrDBURL) self.fwjrAPI.defaultStale = {} return
def setup(self, parameters): """ set db connection(couchdb, wmbs) to prepare to gather information """ baseURL = self.config.JobStateMachine.couchurl dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName") self.fwjrAPI = FWJRDBAPI(baseURL, dbname) self.wmarchiver = WMArchiver(self.config.ArchiveDataReporter.WMArchiverURL)
class FWJRDBAPITest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["FWJRDump"] self.testInit = TestInitCouchApp('FWJRAPITest') self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = self.schema, useDefault = False) dbName = 'fwjrdump_t' self.testInit.setupCouch(dbName, *self.couchApps) fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.fwjrAPI= FWJRDBAPI(fwjrDBURL) self.fwjrAPI.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testFWJRDBAPI(self): jobID = 1 retryCount = 0 fwjrDocument = {"_id": "%s-%s" % (jobID, retryCount), "jobid": jobID, "retrycount": retryCount, "archivestatus": "ready", "fwjr": SAMPLE_FWJR, "type": "fwjr"} self.assertEqual(self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]['id'], fwjrDocument['_id']) self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("ready")['rows'][0]['id'], fwjrDocument['_id']) self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument['_id']) self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("uploaded")['rows'][0]['id'], fwjrDocument['_id'])
class FWJRDBAPITest(unittest.TestCase): """ """ def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["FWJRDump"] self.testInit = TestInitCouchApp("FWJRAPITest") self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = "fwjrdump_t" self.testInit.setupCouch(dbName, *self.couchApps) fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.fwjrAPI = FWJRDBAPI(fwjrDBURL) self.fwjrAPI.defaultStale = {} return def tearDown(self): """ _tearDown_ Drop all the WMBS tables. """ self.testInit.tearDownCouch() def testFWJRDBAPI(self): jobID = 1 retryCount = 0 fwjrDocument = { "_id": "%s-%s" % (jobID, retryCount), "jobid": jobID, "retrycount": retryCount, "archivestatus": "ready", "fwjr": SAMPLE_FWJR, "jobtype": "Merge", "type": "fwjr", } workflow = SAMPLE_FWJR["task"].split("/")[1] self.assertEqual(self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]["id"], fwjrDocument["_id"]) self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("ready")["rows"][0]["id"], fwjrDocument["_id"]) self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), False) self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument["_id"]) self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("uploaded")["rows"][0]["id"], fwjrDocument["_id"]) self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), True) self.assertEqual(self.fwjrAPI.getFWJRWithSkippedFiles()["rows"][0]["value"]["skippedFiles"], 2)
def setUp(self): """ _setUp_ """ self.schema = [] self.couchApps = ["FWJRDump"] self.testInit = TestInitCouchApp("FWJRAPITest") self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=self.schema, useDefault=False) dbName = "fwjrdump_t" self.testInit.setupCouch(dbName, *self.couchApps) fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName) self.fwjrAPI = FWJRDBAPI(fwjrDBURL) self.fwjrAPI.defaultStale = {} return
class LocalCouchDBData(object): def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False) fwjrDBname = "%s/fwjrs" % self.dbName self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False) self.summaryLevel = summaryLevel def getJobSummaryByWorkflowAndSite(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100}, {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100}, {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100}, {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\ ]} and convert to {'request_name1': {'queue_first': { 'siteA': 100}} 'request_name1': {'queue_first': { 'siteB': 100}} } if taskflag is set, convert to {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}}, 'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}}, } """ options = {"group": True, "stale": "ok"} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite", options) # reformat the doc to upload to reqmon db data = {} if self.summaryLevel == "task": for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][x['key'][3]] = x['value'] else: for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault(x['key'][2], {}) # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value'] logging.info("Found %i requests", len(data)) return data def getJobPerformanceByTaskAndSiteFromSummaryDB(self): options = {"include_docs": True} results = self.summaryStatsDB.allDocs(options) data = {} for row in results['rows']: if not row['id'].startswith("_"): data[row['id']] = {} data[row['id']]['tasks'] = row['doc']['tasks'] return data def getEventSummaryByWorkflow(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", "/test/output_dataset1"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}}, {"key":['request_name1", "/test/output_dataset2"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}}, {"key":['request_name1", "/test/output_dataset3"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset3"}}, {"key":['request_name1", "/test/output_dataset4"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset4"}}, ]} and convert to {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}, {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}]} 'request_name2': ... """ results = self.fwjrAPI.outputByWorkflowName() # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], []) data[x['key'][0]].append(x['value']) logging.info("Found %i requests", len(data)) return data def getHeartbeat(self): try: return self.jobCouchDB.info() except Exception as ex: return {'error_message': str(ex)} def getSkippedFilesSummaryByWorkflow(self): """ get skipped file summary gets the data with following format {u'rows': [{u'value': {u'skippedFile': 5}, u'key': ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]} and covert to {'sryu_TaskChain_Data_wq_testt_160204_061048_5587': {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD : {'skippedFiles':2}}}} """ results = self.fwjrAPI.getFWJRWithSkippedFiles() # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value'] data[x['key'][0]]['skipped'] = True return data
class LocalCouchDBData(object): def __init__(self, couchURL, statSummaryDB, summaryLevel): # set the connection for local couchDB call self.couchURL = couchURL self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL) self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase( self.dbName + "/jobs", False) fwjrDBname = "%s/fwjrs" % self.dbName self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname) self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase( statSummaryDB, False) self.summaryLevel = summaryLevel def getJobSummaryByWorkflowAndSite(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100}, {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100}, {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100}, {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\ ]} and convert to {'request_name1': {'queue_first': { 'siteA': 100}} 'request_name1': {'queue_first': { 'siteB': 100}} } if taskflag is set, convert to {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}}, 'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}} 'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}}, } """ options = {"group": True, "stale": "ok"} # site of data should be relatively small (~1M) for put in the memory # If not, find a way to stream results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite", options) # reformat the doc to upload to reqmon db data = {} if self.summaryLevel == "task": for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]].setdefault( x['key'][2], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][ x['key'][3]] = x['value'] else: for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault(x['key'][2], {}) # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {}) data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value'] logging.info("Found %i requests", len(data)) return data def getJobPerformanceByTaskAndSiteFromSummaryDB(self): options = {"include_docs": True} results = self.summaryStatsDB.allDocs(options) data = {} for row in results['rows']: if not row['id'].startswith("_"): data[row['id']] = {} data[row['id']]['tasks'] = row['doc']['tasks'] return data def getEventSummaryByWorkflow(self): """ gets the job status information by workflow example {"rows":[ {"key":['request_name1", "/test/output_dataset1"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}}, {"key":['request_name1", "/test/output_dataset2"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}}, {"key":['request_name1", "/test/output_dataset3"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset3"}}, {"key":['request_name1", "/test/output_dataset4"], "value": {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset4"}}, ]} and convert to {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset1"}, {size: 20286644784714, events: 38938099, count: 6319, dataset: "/test/output_dataset2"}]} 'request_name2': ... """ results = self.fwjrAPI.outputByWorkflowName() # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], []) data[x['key'][0]].append(x['value']) logging.info("Found %i requests", len(data)) return data def getHeartbeat(self): try: return self.jobCouchDB.info() except Exception as ex: return {'error_message': str(ex)} def getSkippedFilesSummaryByWorkflow(self): """ get skipped file summary gets the data with following format {u'rows': [{u'value': {u'skippedFile': 5}, u'key': ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]} and covert to {'sryu_TaskChain_Data_wq_testt_160204_061048_5587': {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD : {'skippedFiles':2}}}} """ results = self.fwjrAPI.getFWJRWithSkippedFiles() # reformat the doc to upload to reqmon db data = {} for x in results.get('rows', []): data.setdefault(x['key'][0], {}) data[x['key'][0]].setdefault('tasks', {}) data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value'] data[x['key'][0]]['skipped'] = True return data