Python CouchServer.loadView Examples, WMCore.Database.CMSCouch.CouchServer.loadView Python Examples

Example #1

0

Show file

    def testTimestampAccounting(self):
        """
        _testTimestampAccounting_

        Check the correct functioning of the timestamp view in the ACDC
        couchapp and the function to remove old filesets.
        """
        self.populateCouchDB()
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)

        currentTime = time.time()
        database = CouchServer(self.testInit.couchUrl).connectDatabase(self.testInit.couchDbName)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime})
        self.assertEqual(len(results["rows"]), 4)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime - 2})
        self.assertEqual(len(results["rows"]), 2)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime - 3})
        self.assertEqual(len(results["rows"]), 1)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime - 5})
        self.assertEqual(len(results["rows"]), 0)
        svc.removeOldFilesets(0)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime})
        self.assertEqual(len(results["rows"]), 0)
        return

Example #2

0

Show file

    def testRemoveByCollectionName(self):
        """
        _testRemoveByCollectionName_

        Check the function to obliterate all the filesets of a collection
        """
        self.populateCouchDB()
        svc = CouchService(url=self.testInit.couchUrl,
                           database=self.testInit.couchDbName)
        database = CouchServer(self.testInit.couchUrl).connectDatabase(
            self.testInit.couchDbName)

        results = database.loadView("ACDC",
                                    "byCollectionName",
                                    keys=["Thunderstruck"])
        self.assertTrue(len(results["rows"]) > 0)
        svc.removeFilesetsByCollectionName("Thunderstruck")
        results = database.loadView("ACDC",
                                    "byCollectionName",
                                    keys=["Thunderstruck"])
        self.assertEqual(len(results["rows"]), 0)
        results = database.loadView("ACDC",
                                    "byCollectionName",
                                    keys=["Struckthunder"])
        self.assertTrue(len(results["rows"]) > 0)
        svc.removeFilesetsByCollectionName("Struckthunder")
        results = database.loadView("ACDC",
                                    "byCollectionName",
                                    keys=["Struckthunder"])
        self.assertEqual(len(results["rows"]), 0)
        return

Example #3

0

Show file

File: WMStatsReader.py Project: ticoann/WMCore

class WMStatsReader():

    def __init__(self, couchURL, dbName = None):
        couchURL = sanitizeURL(couchURL)['url']
        # set the connection for local couchDB call
        if dbName:
            self.couchURL = couchURL
            self.dbName = dbName
        else:
            self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False)

    def workflowsByStatus(self, statusList, format = "list", stale = "update_after"):
        keys = statusList
        options = {}
        if stale:
            options = {"stale": stale}
        result = self.couchDB.loadView("WMStats", "requestByStatus", options, keys)

        if format == "dict":
            workflowDict = {}
            for item in result["rows"]:
                workflowDict[item["id"]] = None
            return workflowDict
        else:
            workflowList = []
            for item in result["rows"]:
                workflowList.append(item["id"])
            return workflowList

    def workflowStatus(self, stale = "update_after"):
        """
        _workflowStatus_

        Return a dictionary with all available workflows,
        grouped by status and with the timestamp of the status
        """
        options = {}
        if stale:
            options = {"stale" : stale}
        result = self.couchDB.loadView("WMStats", "requestByStatus", options)

        stateDict = {}
        for item in result['rows']:
            if item["key"] not in stateDict:
                stateDict[item["key"]] = {}
            stateDict[item["key"]][item["id"]] = item["value"]

        return stateDict

    def getDBInstance(self):
        return self.couchDB
    
    def getHeartbeat(self):
        try:
            return self.couchDB.info();
        except Exception, ex:
            return {'error_message': str(ex)}

Example #4

0

Show file

File: WMStatsReader.py Project: stuartw/WMCore

class WMStatsReader():

    def __init__(self, couchURL, dbName = None):
        couchURL = sanitizeURL(couchURL)['url']
        # set the connection for local couchDB call
        if dbName:
            self.couchURL = couchURL
            self.dbName = dbName
        else:
            self.couchURL, self.dbName = splitCouchServiceURL(couchURL)
        self.couchServer = CouchServer(self.couchURL)
        self.couchDB = CouchServer(self.couchURL).connectDatabase(self.dbName, False)

    def workflowsByStatus(self, statusList):
        keys = statusList
        options = {"stale": "update_after"}
        result = self.couchDB.loadView("WMStats", "requestByStatus", options, keys)
        workflowList = []
        for item in result["rows"]:
            workflowList.append(item["id"])
        return workflowList
    
    def replicate(self, target):
        self.couchServer.replicate(self.dbName, target, 
                                   continuous = True)

Example #5

0

Show file

File: CouchService_t.py Project: dciangot/WMCore

    def testRemoveByCollectionName(self):
        """
        _testRemoveByCollectionName_

        Check the function to obliterate all the filesets of a collection
        """
        self.populateCouchDB()
        svc = CouchService(url=self.testInit.couchUrl, database=self.testInit.couchDbName)
        database = CouchServer(self.testInit.couchUrl).connectDatabase(self.testInit.couchDbName)

        results = database.loadView("ACDC", "byCollectionName", keys=["Thunderstruck"])
        self.assertTrue(len(results["rows"]) > 0)
        svc.removeFilesetsByCollectionName("Thunderstruck")
        results = database.loadView("ACDC", "byCollectionName", keys=["Thunderstruck"])
        self.assertEqual(len(results["rows"]), 0)
        results = database.loadView("ACDC", "byCollectionName", keys=["Struckthunder"])
        self.assertTrue(len(results["rows"]) > 0)
        svc.removeFilesetsByCollectionName("Struckthunder")
        results = database.loadView("ACDC", "byCollectionName", keys=["Struckthunder"])
        self.assertEqual(len(results["rows"]), 0)
        return

Example #6

0

Show file

File: CouchService_t.py Project: dciangot/WMCore

    def testTimestampAccounting(self):
        """
        _testTimestampAccounting_

        Check the correct functioning of the timestamp view in the ACDC
        couchapp and the function to remove old filesets.
        """
        self.populateCouchDB()
        svc = CouchService(url=self.testInit.couchUrl, database=self.testInit.couchDbName)

        currentTime = time.time()
        database = CouchServer(self.testInit.couchUrl).connectDatabase(self.testInit.couchDbName)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime})
        self.assertEqual(len(results["rows"]), 4)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime - 2})
        self.assertEqual(len(results["rows"]), 3)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime - 3})
        self.assertEqual(len(results["rows"]), 2)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime - 4})
        self.assertEqual(len(results["rows"]), 1)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime - 5})
        self.assertEqual(len(results["rows"]), 0)
        svc.removeOldFilesets(0)
        results = database.loadView("ACDC", "byTimestamp", {"endkey": currentTime})
        self.assertEqual(len(results["rows"]), 0)
        return

Example #7

0

Show file

File: DataCollectAPI.py Project: cinquo/WMCore

class LocalCouchDBData():

    def __init__(self, couchURL, summaryLevel):
        # set the connection for local couchDB call
        self.couchURL = couchURL
        self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
        self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
        self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False)
        self.summaryLevel = summaryLevel

    def getJobSummaryByWorkflowAndSite(self):
        """
        gets the job status information by workflow

        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100},
            {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100},
            {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100},
            {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\
         ]}
         and convert to
         {'request_name1': {'queue_first': { 'siteA': 100}}
          'request_name1': {'queue_first': { 'siteB': 100}}
         }
         if taskflag is set,
         convert to
         {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}},
          'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}},
         }
        """
        options = {"group": True, "stale": "ok"}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite",
                                        options)

        # reformat the doc to upload to reqmon db
        data = {}
        if self.summaryLevel == "task":
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault('tasks', {})
                data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
                data[x['key'][0]]['tasks'][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][x['key'][3]] = x['value']
        else:
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault(x['key'][2], {})
                #data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value']
        logging.info("Found %i requests" % len(data))
        return data
    
    def getJobPerformanceByTaskAndSite(self):
        """
        gets the job status information by workflow
    
        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "siteA"],
             "value": {wrappedTotalJobTime: 1612, 
                       cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, 
                                              totalJobTime: 421.0489, 
                                              totalEventCPU: 4.064402}, 
                       inputEvents: 0, 
                       dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: 
                                             {size: 5093504, events: 10000,
                                              totalLumis: 100}}}},
            {"key":['request_name1", 'task_name2', "siteA"],
             "value":{wrappedTotalJobTime: 1612, 
                       cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, 
                                              totalJobTime: 421.0489, 
                                              totalEventCPU: 4.064402}, 
                       inputEvents: 0, 
                       dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: 
                                             {size: 5093504, events: 10000,
                                              totalLumis: 100}}}}}
         ]}
         and convert to
         {'request_name1': {'tasks': 
                    {'task_name1' : { 'siteA': 
                        {wrappedTotalJobTime: 1612, 
                         cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, 
                                              totalJobTime: 421.0489, 
                                              totalEventCPU: 4.064402}, 
                        inputEvents: 0, 
                        dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: 
                                             {size: 5093504, events: 10000,
                                              totalLumis: 100}}},
                                             
                    {'task_name2' : { 'siteA': 
                        {wrappedTotalJobTime: 1612, 
                         cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, 
                                              totalJobTime: 421.0489, 
                                              totalEventCPU: 4.064402,
                                              totalLumis: 100}, 
                        inputEvents: 0, 
                        dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: 
                                             {size: 5093504, events: 10000,
                                              totalLumis: 100}}}
                                             }}
          
         }
        """
        options = {"group": True, "stale": "ok", "reduce":True}
        # site of data should be relatively small (~1M) for put in the memory 
        # If not, find a way to stream
        results = self.fwjrsCouchDB.loadView("FWJRDump", "performanceSummaryByTask",
                                        options)
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], {})
            data[x['key'][0]].setdefault('tasks', {})
            data[x['key'][0]]['tasks'].setdefault(x['key'][1], {}) 
            data[x['key'][0]]['tasks'][x['key'][1]].setdefault('sites', {})
            data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]] = x['value']
            data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]].setdefault('dataset', {})
            if x['key'][3]:
                data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]]['dataset'][x['key'][3]] = x['value']['datasetStat'] 
                #there is duplicate datasets in data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]], just ignore
                
        return data
    
    
    def getEventSummaryByWorkflow(self):
        """
        gets the job status information by workflow

        example
        {"rows":[
            {"key":['request_name1", "/test/output_dataset1"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset1"}},
            {"key":['request_name1", "/test/output_dataset2"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset2"}},
            {"key":['request_name1", "/test/output_dataset3"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset3"}},
            {"key":['request_name1", "/test/output_dataset4"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset4"}},
         ]}
         and convert to
         {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset1"},
                             {size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset2"}]}

          'request_name2': ...
        """
        options = {"group": True, "stale": "ok", "reduce":True}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.fwjrsCouchDB.loadView("FWJRDump", "outputByWorkflowName",
                                        options)

        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], [])
            data[x['key'][0]].append(x['value'])
        logging.info("Found %i requests" % len(data))
        return data
    
    def getHeartbeat(self):
        try:
            return self.jobCouchDB.info();
        except Exception, ex:
            return {'error_message': str(ex)}

Example #8

0

Show file

File: DataCollectAPI.py Project: DAMason/WMCore

class LocalCouchDBData(object):
    def __init__(self, couchURL, statSummaryDB, summaryLevel):
        # set the connection for local couchDB call
        self.couchURL = couchURL
        self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
        self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
        fwjrDBname = "%s/fwjrs" % self.dbName
        self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname)
        self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False)
        self.summaryLevel = summaryLevel

    def getJobSummaryByWorkflowAndSite(self):
        """
        gets the job status information by workflow

        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100},
            {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100},
            {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100},
            {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\
         ]}
         and convert to
         {'request_name1': {'queue_first': { 'siteA': 100}}
          'request_name1': {'queue_first': { 'siteB': 100}}
         }
         if taskflag is set,
         convert to
         {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}},
          'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}},
         }
        """
        options = {"group": True, "stale": "ok"}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite",
                                           options)

        # reformat the doc to upload to reqmon db
        data = {}
        if self.summaryLevel == "task":
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault('tasks', {})
                data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
                data[x['key'][0]]['tasks'][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][x['key'][3]] = x['value']
        else:
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault(x['key'][2], {})
                # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value']
        logging.info("Found %i requests", len(data))
        return data

    def getJobPerformanceByTaskAndSiteFromSummaryDB(self):

        options = {"include_docs": True}
        results = self.summaryStatsDB.allDocs(options)
        data = {}
        for row in results['rows']:
            if not row['id'].startswith("_"):
                data[row['id']] = {}
                data[row['id']]['tasks'] = row['doc']['tasks']
        return data

    def getEventSummaryByWorkflow(self):
        """
        gets the job status information by workflow

        example
        {"rows":[
            {"key":['request_name1", "/test/output_dataset1"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset1"}},
            {"key":['request_name1", "/test/output_dataset2"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset2"}},
            {"key":['request_name1", "/test/output_dataset3"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset3"}},
            {"key":['request_name1", "/test/output_dataset4"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset4"}},
         ]}
         and convert to
         {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset1"},
                             {size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset2"}]}

          'request_name2': ...
        """
        results = self.fwjrAPI.outputByWorkflowName()

        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], [])
            data[x['key'][0]].append(x['value'])
        logging.info("Found %i requests", len(data))
        return data

    def getHeartbeat(self):
        try:
            return self.jobCouchDB.info()
        except Exception as ex:
            return {'error_message': str(ex)}

    def getSkippedFilesSummaryByWorkflow(self):
        """
        get skipped file summary
        gets the data with following format
        {u'rows': [{u'value': {u'skippedFile': 5}, u'key':
        ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]}

        and covert to
        {'sryu_TaskChain_Data_wq_testt_160204_061048_5587':
         {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD :
                      {'skippedFiles':2}}}}
        """
        results = self.fwjrAPI.getFWJRWithSkippedFiles()
        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], {})
            data[x['key'][0]].setdefault('tasks', {})
            data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
            data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value']
            data[x['key'][0]]['skipped'] =  True

        return data

Example #9

0

Show file

File: DataCollectAPI.py Project: rishiloyola/WMCore

class LocalCouchDBData(object):
    def __init__(self, couchURL, statSummaryDB, summaryLevel):
        # set the connection for local couchDB call
        self.couchURL = couchURL
        self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
        self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(
            self.dbName + "/jobs", False)
        fwjrDBname = "%s/fwjrs" % self.dbName
        self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname)
        self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(
            statSummaryDB, False)
        self.summaryLevel = summaryLevel

    def getJobSummaryByWorkflowAndSite(self):
        """
        gets the job status information by workflow

        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100},
            {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100},
            {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100},
            {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\
         ]}
         and convert to
         {'request_name1': {'queue_first': { 'siteA': 100}}
          'request_name1': {'queue_first': { 'siteB': 100}}
         }
         if taskflag is set,
         convert to
         {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}},
          'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}},
         }
        """
        options = {"group": True, "stale": "ok"}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.jobCouchDB.loadView("JobDump",
                                           "jobStatusByWorkflowAndSite",
                                           options)

        # reformat the doc to upload to reqmon db
        data = {}
        if self.summaryLevel == "task":
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault('tasks', {})
                data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
                data[x['key'][0]]['tasks'][x['key'][1]].setdefault(
                    x['key'][2], {})
                data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][
                    x['key'][3]] = x['value']
        else:
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault(x['key'][2], {})
                # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value']
        logging.info("Found %i requests", len(data))
        return data

    def getJobPerformanceByTaskAndSiteFromSummaryDB(self):

        options = {"include_docs": True}
        results = self.summaryStatsDB.allDocs(options)
        data = {}
        for row in results['rows']:
            if not row['id'].startswith("_"):
                data[row['id']] = {}
                data[row['id']]['tasks'] = row['doc']['tasks']
        return data

    def getEventSummaryByWorkflow(self):
        """
        gets the job status information by workflow

        example
        {"rows":[
            {"key":['request_name1", "/test/output_dataset1"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset1"}},
            {"key":['request_name1", "/test/output_dataset2"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset2"}},
            {"key":['request_name1", "/test/output_dataset3"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset3"}},
            {"key":['request_name1", "/test/output_dataset4"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset4"}},
         ]}
         and convert to
         {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset1"},
                             {size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset2"}]}

          'request_name2': ...
        """
        results = self.fwjrAPI.outputByWorkflowName()

        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], [])
            data[x['key'][0]].append(x['value'])
        logging.info("Found %i requests", len(data))
        return data

    def getHeartbeat(self):
        try:
            return self.jobCouchDB.info()
        except Exception as ex:
            return {'error_message': str(ex)}

    def getSkippedFilesSummaryByWorkflow(self):
        """
        get skipped file summary
        gets the data with following format
        {u'rows': [{u'value': {u'skippedFile': 5}, u'key':
        ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]}

        and covert to
        {'sryu_TaskChain_Data_wq_testt_160204_061048_5587':
         {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD :
                      {'skippedFiles':2}}}}
        """
        results = self.fwjrAPI.getFWJRWithSkippedFiles()
        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], {})
            data[x['key'][0]].setdefault('tasks', {})
            data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
            data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value']
            data[x['key'][0]]['skipped'] = True

        return data

Example #10

0

Show file

File: DataCollectAPI.py Project: ticoann/WMCore

class LocalCouchDBData():
    def __init__(self, couchURL, summaryLevel):
        # set the connection for local couchDB call
        self.couchURL = couchURL
        self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
        self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(
            self.dbName + "/jobs", False)
        self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(
            self.dbName + "/fwjrs", False)
        self.summaryLevel = summaryLevel

    def getJobSummaryByWorkflowAndSite(self):
        """
        gets the job status information by workflow

        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100},
            {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100},
            {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100},
            {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\
         ]}
         and convert to
         {'request_name1': {'queue_first': { 'siteA': 100}}
          'request_name1': {'queue_first': { 'siteB': 100}}
         }
         if taskflag is set,
         convert to
         {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}},
          'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}},
         }
        """
        options = {"group": True, "stale": "ok"}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.jobCouchDB.loadView("JobDump",
                                           "jobStatusByWorkflowAndSite",
                                           options)

        # reformat the doc to upload to reqmon db
        data = {}
        if self.summaryLevel == "task":
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault('tasks', {})
                data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
                data[x['key'][0]]['tasks'][x['key'][1]].setdefault(
                    x['key'][2], {})
                data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][
                    x['key'][3]] = x['value']
        else:
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault(x['key'][2], {})
                #data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value']
        logging.info("Found %i requests" % len(data))
        return data

    def getJobPerformanceByTaskAndSite(self):
        """
        gets the job status information by workflow
    
        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "siteA"],
             "value": {wrappedTotalJobTime: 1612, 
                       cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, 
                                              totalJobTime: 421.0489, 
                                              totalEventCPU: 4.064402}, 
                       inputEvents: 0, 
                       dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: 
                                             {size: 5093504, events: 10000,
                                              totalLumis: 100}}}},
            {"key":['request_name1", 'task_name2', "siteA"],
             "value":{wrappedTotalJobTime: 1612, 
                       cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, 
                                              totalJobTime: 421.0489, 
                                              totalEventCPU: 4.064402}, 
                       inputEvents: 0, 
                       dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: 
                                             {size: 5093504, events: 10000,
                                              totalLumis: 100}}}}}
         ]}
         and convert to
         {'request_name1': {'tasks': 
                    {'task_name1' : { 'siteA': 
                        {wrappedTotalJobTime: 1612, 
                         cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, 
                                              totalJobTime: 421.0489, 
                                              totalEventCPU: 4.064402}, 
                        inputEvents: 0, 
                        dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: 
                                             {size: 5093504, events: 10000,
                                              totalLumis: 100}}},
                                             
                    {'task_name2' : { 'siteA': 
                        {wrappedTotalJobTime: 1612, 
                         cmsRunCPUPerformance: {totalJobCPU: 20.132924000000003, 
                                              totalJobTime: 421.0489, 
                                              totalEventCPU: 4.064402,
                                              totalLumis: 100}, 
                        inputEvents: 0, 
                        dataset: {/TestLHE/TEST_Subscriptions_WMA-Test-v1/GEN: 
                                             {size: 5093504, events: 10000,
                                              totalLumis: 100}}}
                                             }}
          
         }
        """
        options = {"group": True, "stale": "ok", "reduce": True}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.fwjrsCouchDB.loadView("FWJRDump",
                                             "performanceSummaryByTask",
                                             options)
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], {})
            data[x['key'][0]].setdefault('tasks', {})
            data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
            data[x['key'][0]]['tasks'][x['key'][1]].setdefault('sites', {})
            data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key']
                                                             [2]] = x['value']
            data[x['key'][0]]['tasks'][x['key'][1]]['sites'][
                x['key'][2]].setdefault('dataset', {})
            if x['key'][3]:
                data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]][
                    'dataset'][x['key'][3]] = x['value']['datasetStat']
                #there is duplicate datasets in data[x['key'][0]]['tasks'][x['key'][1]]['sites'][x['key'][2]], just ignore

        return data

    def getEventSummaryByWorkflow(self):
        """
        gets the job status information by workflow

        example
        {"rows":[
            {"key":['request_name1", "/test/output_dataset1"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset1"}},
            {"key":['request_name1", "/test/output_dataset2"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset2"}},
            {"key":['request_name1", "/test/output_dataset3"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset3"}},
            {"key":['request_name1", "/test/output_dataset4"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset4"}},
         ]}
         and convert to
         {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset1"},
                             {size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset2"}]}

          'request_name2': ...
        """
        options = {"group": True, "stale": "ok", "reduce": True}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.fwjrsCouchDB.loadView("FWJRDump",
                                             "outputByWorkflowName", options)

        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], [])
            data[x['key'][0]].append(x['value'])
        logging.info("Found %i requests" % len(data))
        return data

    def getHeartbeat(self):
        try:
            return self.jobCouchDB.info()
        except Exception, ex:
            return {'error_message': str(ex)}

Example #11

0

Show file

File: DataCollectAPI.py Project: prozober/WMCore

class LocalCouchDBData():

    def __init__(self, couchURL, statSummaryDB, summaryLevel):
        # set the connection for local couchDB call
        self.couchURL = couchURL
        self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
        self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
        self.fwjrsCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/fwjrs", False)
        #TODO: remove the hard coded name (wma_summarydb)
        self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False)
        self.summaryLevel = summaryLevel

    def getJobSummaryByWorkflowAndSite(self):
        """
        gets the job status information by workflow

        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100},
            {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100},
            {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100},
            {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\
         ]}
         and convert to
         {'request_name1': {'queue_first': { 'siteA': 100}}
          'request_name1': {'queue_first': { 'siteB': 100}}
         }
         if taskflag is set,
         convert to
         {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}},
          'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}},
         }
        """
        options = {"group": True, "stale": "ok"}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite",
                                        options)

        # reformat the doc to upload to reqmon db
        data = {}
        if self.summaryLevel == "task":
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault('tasks', {})
                data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
                data[x['key'][0]]['tasks'][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][x['key'][3]] = x['value']
        else:
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault(x['key'][2], {})
                #data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value']
        logging.info("Found %i requests" % len(data))
        return data
        
    def getJobPerformanceByTaskAndSiteFromSummaryDB(self):
        
        options = {"include_docs": True}
        results = self.summaryStatsDB.allDocs(options)
        data = {}
        for row in results['rows']:
            if not row['id'].startswith("_"):
                data[row['id']] = {}
                data[row['id']]['tasks'] = row['doc']['tasks']
        return data
    
    def getEventSummaryByWorkflow(self):
        """
        gets the job status information by workflow

        example
        {"rows":[
            {"key":['request_name1", "/test/output_dataset1"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset1"}},
            {"key":['request_name1", "/test/output_dataset2"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset2"}},
            {"key":['request_name1", "/test/output_dataset3"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset3"}},
            {"key":['request_name1", "/test/output_dataset4"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset4"}},
         ]}
         and convert to
         {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset1"},
                             {size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset2"}]}

          'request_name2': ...
        """
        options = {"group": True, "stale": "ok", "reduce":True}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.fwjrsCouchDB.loadView("FWJRDump", "outputByWorkflowName",
                                        options)

        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], [])
            data[x['key'][0]].append(x['value'])
        logging.info("Found %i requests" % len(data))
        return data
    
    def getHeartbeat(self):
        try:
            return self.jobCouchDB.info();
        except Exception as ex:
            return {'error_message': str(ex)}