예제 #1
0
class ArchiveDataPoller(BaseWorkerThread):
    """
    Gather fwjr data and update to archiver\
    """

    def __init__(self, config):
        """
        initialize properties specified from config
        """
        BaseWorkerThread.__init__(self)
        self.config = config

    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        baseURL = self.config.JobStateMachine.couchurl
        dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName")

        self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
        self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL)
        self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000)
        self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200)

    @timeFunction
    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows']
            logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data))

            for slicedData in grouper(data, self.numDocsUploadPerCall):
                jobIDs = []
                archiveDocs = []
                for job in slicedData:
                    doc = createArchiverDoc(job)
                    archiveDocs.append(doc)
                    jobIDs.append(job["id"])

                response = self.wmarchiver.archiveData(archiveDocs)

                # Partial success is not allowed either all the insert is successful or none is
                if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs):
                    archiveIDs = response[0]['ids']
                    for docID in jobIDs:
                        self.fwjrAPI.updateArchiveUploadedStatus(docID)
                    logging.info("...successfully uploaded %d docs", len(jobIDs))
                    logging.debug("JobIDs uploaded: %s", jobIDs)
                    logging.debug("Archived IDs returned: %s", archiveIDs)
                else:
                    logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.",
                                    response[0]['status'], response[0]['reason'])
                    logging.debug("failed JobIds %s", jobIDs)
        except Exception as ex:
            logging.error("Error occurred, will retry later:")
            logging.error(str(ex))
            logging.error("Trace back: \n%s", traceback.format_exc())
예제 #2
0
class ArchiveDataPoller(BaseWorkerThread):
    """
    Gather fwjr data and update to archiver\
    """

    def __init__(self, config):
        """
        initialize properties specified from config
        """
        BaseWorkerThread.__init__(self)
        self.config = config

    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        baseURL = self.config.JobStateMachine.couchurl
        dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName")

        self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
        self.wmarchiver = WMArchive(self.config.ArchiveDataReporter.WMArchiveURL)
        self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000)
        self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            data = self.fwjrAPI.getFWJRByArchiveStatus('ready', limit=self.numDocsRetrievePerPolling)['rows']
            logging.info("Found %i not archived documents from FWRJ db to upload to WMArchive.", len(data))

            for slicedData in grouper(data, self.numDocsUploadPerCall):
                jobIDs = []
                archiveDocs = []
                for job in slicedData:
                    doc = createArchiverDoc(job)
                    archiveDocs.append(doc)
                    jobIDs.append(job["id"])

                response = self.wmarchiver.archiveData(archiveDocs)

                # Partial success is not allowed either all the insert is successful or none is
                if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs):
                    archiveIDs = response[0]['ids']
                    for docID in jobIDs:
                        self.fwjrAPI.updateArchiveUploadedStatus(docID)
                    logging.info("...successfully uploaded %d docs", len(jobIDs))
                    logging.debug("JobIDs uploaded: %s", jobIDs)
                    logging.debug("Archived IDs returned: %s", archiveIDs)
                else:
                    logging.warning("Upload failed and it will be retried in the next cycle: %s: %s.",
                                    response[0]['status'], response[0]['reason'])
                    logging.debug("failed JobIds %s", jobIDs)
        except Exception as ex:
            logging.error("Error occurred, will retry later:")
            logging.error(str(ex))
            logging.error("Trace back: \n%s", traceback.format_exc())
예제 #3
0
 def __init__(self, couchURL, statSummaryDB, summaryLevel):
     # set the connection for local couchDB call
     self.couchURL = couchURL
     self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
     self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
     fwjrDBname = "%s/fwjrs" % self.dbName
     self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname)
     self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False)
     self.summaryLevel = summaryLevel
예제 #4
0
 def setup(self, parameters):
     """
     set db connection(couchdb, wmbs) to prepare to gather information
     """
     baseURL = self.config.JobStateMachine.couchurl
     dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName")
      
     self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
     self.wmarchiver = WMArchiver(self.config.ArchiveDataReporter.WMArchiverURL)
     self.numDocsRetrievePerPolling = getattr(self.config.ArchiveDataReporter, "numDocsRetrievePerPolling", 1000)
     self.numDocsUploadPerCall = getattr(self.config.ArchiveDataReporter, "numDocsUploadPerCall", 200)
예제 #5
0
    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        baseURL = self.config.JobStateMachine.couchurl
        dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine,
                                      "couchDBName")

        self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
        self.wmarchiver = WMArchiver(
            self.config.ArchiveDataReporter.WMArchiverURL)
예제 #6
0
class ArchiveDataPoller(BaseWorkerThread):
    """
    Gather fwjr data and update to archiver\
    """
    def __init__(self, config):
        """
        initialize properties specified from config
        """
        BaseWorkerThread.__init__(self)
        self.config = config

    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        baseURL = self.config.JobStateMachine.couchurl
        dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine,
                                      "couchDBName")

        self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
        self.wmarchiver = WMArchiver(
            self.config.ArchiveDataReporter.WMArchiverURL)

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Getting not archived data info from FWRJ db...")
            data = self.fwjrAPI.getFWJRByArchiveStatus('ready')['rows']

            #TODO need to send bulk update update bulk archive
            jobIDs = []
            archiverDocs = []
            for job in data:
                doc = self.wmarchiver.createArchiverDoc(
                    job["id"], job['doc']["fwjr"])
                archiverDocs.append(doc)
                jobIDs.append(job["id"])

            response = self.wmarchiver.archiveData(archiverDocs)

            # Partial success is not allowed either all the insert is successful of none is successful.
            if response[0]['status'] == "ok" and len(
                    response[0]['ids']) == len(jobIDs):
                for docID in jobIDs:
                    self.fwjrAPI.updateArchiveUploadedStatus(docID)
        except Exception as ex:
            logging.error("Error occurred, will retry later:")
            logging.error(str(ex))
            logging.error("Trace back: \n%s" % traceback.format_exc())
예제 #7
0
class FWJRDBAPITest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["FWJRDump"]
        self.testInit = TestInitCouchApp('FWJRAPITest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = 'fwjrdump_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.fwjrAPI = FWJRDBAPI(fwjrDBURL)
        self.fwjrAPI.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testFWJRDBAPI(self):

        jobID = 1
        retryCount = 0
        fwjrDocument = {
            "_id": "%s-%s" % (jobID, retryCount),
            "jobid": jobID,
            "retrycount": retryCount,
            "archivestatus": "ready",
            "fwjr": SAMPLE_FWJR,
            "type": "fwjr"
        }
        self.assertEqual(
            self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]['id'],
            fwjrDocument['_id'])
        self.assertEqual(
            self.fwjrAPI.getFWJRByArchiveStatus("ready")['rows'][0]['id'],
            fwjrDocument['_id'])
        self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument['_id'])
        self.assertEqual(
            self.fwjrAPI.getFWJRByArchiveStatus("uploaded")['rows'][0]['id'],
            fwjrDocument['_id'])
예제 #8
0
class ArchiveDataPoller(BaseWorkerThread):
    """
    Gather fwjr data and update to archiver\
    """
    
    def __init__(self, config):
        """
        initialize properties specified from config
        """
        BaseWorkerThread.__init__(self)
        self.config = config
                         
    def setup(self, parameters):
        """
        set db connection(couchdb, wmbs) to prepare to gather information
        """
        baseURL = self.config.JobStateMachine.couchurl
        dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName")
         
        self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
        self.wmarchiver = WMArchiver(self.config.ArchiveDataReporter.WMArchiverURL)
        

    def algorithm(self, parameters):
        """
        get information from wmbs, workqueue and local couch
        """
        try:
            logging.info("Getting not archived data info from FWRJ db...")
            data = self.fwjrAPI.getFWJRByArchiveStatus('ready')['rows']
            
            #TODO need to send bulk update update bulk archive
            jobIDs = []
            archiverDocs = []
            for job in data:
                doc = self.wmarchiver.createArchiverDoc(job["id"], job['doc']["fwjr"])
                archiverDocs.append(doc)
                jobIDs.append(job["id"])
                
            response = self.wmarchiver.archiveData(archiverDocs)
            
            # Partial success is not allowed either all the insert is successful of none is successful.
            if response[0]['status'] == "ok" and len(response[0]['ids']) == len(jobIDs):
                for docID in jobIDs:
                    self.fwjrAPI.updateArchiveUploadedStatus(docID)
        except Exception as ex:
            logging.error("Error occurred, will retry later:")
            logging.error(str(ex))
            logging.error("Trace back: \n%s" % traceback.format_exc())
예제 #9
0
 def setUp(self):
     """
     _setUp_
     """
     self.schema = []
     self.couchApps = ["FWJRDump"]
     self.testInit = TestInitCouchApp('FWJRAPITest')
     self.testInit.setLogging()
     self.testInit.setDatabaseConnection()
     self.testInit.setSchema(customModules=self.schema, useDefault=False)
     dbName = 'fwjrdump_t'
     self.testInit.setupCouch(dbName, *self.couchApps)
     fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
     self.fwjrAPI = FWJRDBAPI(fwjrDBURL)
     self.fwjrAPI.defaultStale = {}
     return
예제 #10
0
 def __init__(self, couchURL, statSummaryDB, summaryLevel):
     # set the connection for local couchDB call
     self.couchURL = couchURL
     self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
     self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
     fwjrDBname = "%s/fwjrs" % self.dbName
     self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname)
     self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False)
     self.summaryLevel = summaryLevel
예제 #11
0
 def setup(self, parameters):
     """
     set db connection(couchdb, wmbs) to prepare to gather information
     """
     baseURL = self.config.JobStateMachine.couchurl
     dbname = "%s/fwjrs" % getattr(self.config.JobStateMachine, "couchDBName")
      
     self.fwjrAPI = FWJRDBAPI(baseURL, dbname)
     self.wmarchiver = WMArchiver(self.config.ArchiveDataReporter.WMArchiverURL)
예제 #12
0
class FWJRDBAPITest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["FWJRDump"]
        self.testInit = TestInitCouchApp('FWJRAPITest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules = self.schema,
                                useDefault = False)
        dbName = 'fwjrdump_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.fwjrAPI= FWJRDBAPI(fwjrDBURL)
        self.fwjrAPI.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testFWJRDBAPI(self):
        
        jobID = 1
        retryCount = 0
        fwjrDocument = {"_id": "%s-%s" % (jobID, retryCount),
                        "jobid": jobID,
                        "retrycount": retryCount,
                        "archivestatus": "ready",
                        "fwjr": SAMPLE_FWJR,
                        "type": "fwjr"}
        self.assertEqual(self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]['id'], fwjrDocument['_id'])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("ready")['rows'][0]['id'], fwjrDocument['_id'])      
        self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument['_id'])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("uploaded")['rows'][0]['id'], fwjrDocument['_id'])
예제 #13
0
class FWJRDBAPITest(unittest.TestCase):
    """
    """

    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["FWJRDump"]
        self.testInit = TestInitCouchApp("FWJRAPITest")
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = "fwjrdump_t"
        self.testInit.setupCouch(dbName, *self.couchApps)
        fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.fwjrAPI = FWJRDBAPI(fwjrDBURL)
        self.fwjrAPI.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testFWJRDBAPI(self):

        jobID = 1
        retryCount = 0
        fwjrDocument = {
            "_id": "%s-%s" % (jobID, retryCount),
            "jobid": jobID,
            "retrycount": retryCount,
            "archivestatus": "ready",
            "fwjr": SAMPLE_FWJR,
            "jobtype": "Merge",
            "type": "fwjr",
        }
        workflow = SAMPLE_FWJR["task"].split("/")[1]

        self.assertEqual(self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]["id"], fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("ready")["rows"][0]["id"], fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), False)
        self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("uploaded")["rows"][0]["id"], fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), True)
        self.assertEqual(self.fwjrAPI.getFWJRWithSkippedFiles()["rows"][0]["value"]["skippedFiles"], 2)
예제 #14
0
 def setUp(self):
     """
     _setUp_
     """
     self.schema = []
     self.couchApps = ["FWJRDump"]
     self.testInit = TestInitCouchApp("FWJRAPITest")
     self.testInit.setLogging()
     self.testInit.setDatabaseConnection()
     self.testInit.setSchema(customModules=self.schema, useDefault=False)
     dbName = "fwjrdump_t"
     self.testInit.setupCouch(dbName, *self.couchApps)
     fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
     self.fwjrAPI = FWJRDBAPI(fwjrDBURL)
     self.fwjrAPI.defaultStale = {}
     return
예제 #15
0
class LocalCouchDBData(object):
    def __init__(self, couchURL, statSummaryDB, summaryLevel):
        # set the connection for local couchDB call
        self.couchURL = couchURL
        self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
        self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
        fwjrDBname = "%s/fwjrs" % self.dbName
        self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname)
        self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False)
        self.summaryLevel = summaryLevel

    def getJobSummaryByWorkflowAndSite(self):
        """
        gets the job status information by workflow

        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100},
            {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100},
            {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100},
            {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\
         ]}
         and convert to
         {'request_name1': {'queue_first': { 'siteA': 100}}
          'request_name1': {'queue_first': { 'siteB': 100}}
         }
         if taskflag is set,
         convert to
         {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}},
          'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}},
         }
        """
        options = {"group": True, "stale": "ok"}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite",
                                           options)

        # reformat the doc to upload to reqmon db
        data = {}
        if self.summaryLevel == "task":
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault('tasks', {})
                data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
                data[x['key'][0]]['tasks'][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][x['key'][3]] = x['value']
        else:
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault(x['key'][2], {})
                # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value']
        logging.info("Found %i requests", len(data))
        return data

    def getJobPerformanceByTaskAndSiteFromSummaryDB(self):

        options = {"include_docs": True}
        results = self.summaryStatsDB.allDocs(options)
        data = {}
        for row in results['rows']:
            if not row['id'].startswith("_"):
                data[row['id']] = {}
                data[row['id']]['tasks'] = row['doc']['tasks']
        return data

    def getEventSummaryByWorkflow(self):
        """
        gets the job status information by workflow

        example
        {"rows":[
            {"key":['request_name1", "/test/output_dataset1"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset1"}},
            {"key":['request_name1", "/test/output_dataset2"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset2"}},
            {"key":['request_name1", "/test/output_dataset3"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset3"}},
            {"key":['request_name1", "/test/output_dataset4"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset4"}},
         ]}
         and convert to
         {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset1"},
                             {size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset2"}]}

          'request_name2': ...
        """
        results = self.fwjrAPI.outputByWorkflowName()

        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], [])
            data[x['key'][0]].append(x['value'])
        logging.info("Found %i requests", len(data))
        return data

    def getHeartbeat(self):
        try:
            return self.jobCouchDB.info()
        except Exception as ex:
            return {'error_message': str(ex)}

    def getSkippedFilesSummaryByWorkflow(self):
        """
        get skipped file summary
        gets the data with following format
        {u'rows': [{u'value': {u'skippedFile': 5}, u'key':
        ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]}

        and covert to
        {'sryu_TaskChain_Data_wq_testt_160204_061048_5587':
         {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD :
                      {'skippedFiles':2}}}}
        """
        results = self.fwjrAPI.getFWJRWithSkippedFiles()
        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], {})
            data[x['key'][0]].setdefault('tasks', {})
            data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
            data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value']
            data[x['key'][0]]['skipped'] =  True

        return data
예제 #16
0
class LocalCouchDBData(object):
    def __init__(self, couchURL, statSummaryDB, summaryLevel):
        # set the connection for local couchDB call
        self.couchURL = couchURL
        self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
        self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(
            self.dbName + "/jobs", False)
        fwjrDBname = "%s/fwjrs" % self.dbName
        self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname)
        self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(
            statSummaryDB, False)
        self.summaryLevel = summaryLevel

    def getJobSummaryByWorkflowAndSite(self):
        """
        gets the job status information by workflow

        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100},
            {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100},
            {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100},
            {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\
         ]}
         and convert to
         {'request_name1': {'queue_first': { 'siteA': 100}}
          'request_name1': {'queue_first': { 'siteB': 100}}
         }
         if taskflag is set,
         convert to
         {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}},
          'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}},
         }
        """
        options = {"group": True, "stale": "ok"}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.jobCouchDB.loadView("JobDump",
                                           "jobStatusByWorkflowAndSite",
                                           options)

        # reformat the doc to upload to reqmon db
        data = {}
        if self.summaryLevel == "task":
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault('tasks', {})
                data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
                data[x['key'][0]]['tasks'][x['key'][1]].setdefault(
                    x['key'][2], {})
                data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][
                    x['key'][3]] = x['value']
        else:
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault(x['key'][2], {})
                # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value']
        logging.info("Found %i requests", len(data))
        return data

    def getJobPerformanceByTaskAndSiteFromSummaryDB(self):

        options = {"include_docs": True}
        results = self.summaryStatsDB.allDocs(options)
        data = {}
        for row in results['rows']:
            if not row['id'].startswith("_"):
                data[row['id']] = {}
                data[row['id']]['tasks'] = row['doc']['tasks']
        return data

    def getEventSummaryByWorkflow(self):
        """
        gets the job status information by workflow

        example
        {"rows":[
            {"key":['request_name1", "/test/output_dataset1"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset1"}},
            {"key":['request_name1", "/test/output_dataset2"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset2"}},
            {"key":['request_name1", "/test/output_dataset3"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset3"}},
            {"key":['request_name1", "/test/output_dataset4"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset4"}},
         ]}
         and convert to
         {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset1"},
                             {size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset2"}]}

          'request_name2': ...
        """
        results = self.fwjrAPI.outputByWorkflowName()

        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], [])
            data[x['key'][0]].append(x['value'])
        logging.info("Found %i requests", len(data))
        return data

    def getHeartbeat(self):
        try:
            return self.jobCouchDB.info()
        except Exception as ex:
            return {'error_message': str(ex)}

    def getSkippedFilesSummaryByWorkflow(self):
        """
        get skipped file summary
        gets the data with following format
        {u'rows': [{u'value': {u'skippedFile': 5}, u'key':
        ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]}

        and covert to
        {'sryu_TaskChain_Data_wq_testt_160204_061048_5587':
         {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD :
                      {'skippedFiles':2}}}}
        """
        results = self.fwjrAPI.getFWJRWithSkippedFiles()
        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], {})
            data[x['key'][0]].setdefault('tasks', {})
            data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
            data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value']
            data[x['key'][0]]['skipped'] = True

        return data