Esempio n. 1
0
class FWJRDBAPITest(unittest.TestCase):
    """
    """
    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["FWJRDump"]
        self.testInit = TestInitCouchApp('FWJRAPITest')
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = 'fwjrdump_t'
        self.testInit.setupCouch(dbName, *self.couchApps)
        fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.fwjrAPI = FWJRDBAPI(fwjrDBURL)
        self.fwjrAPI.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testFWJRDBAPI(self):

        jobID = 1
        retryCount = 0
        fwjrDocument = {
            "_id": "%s-%s" % (jobID, retryCount),
            "jobid": jobID,
            "retrycount": retryCount,
            "archivestatus": "ready",
            "fwjr": SAMPLE_FWJR,
            "jobtype": "Merge",
            "type": "fwjr"
        }
        workflow = SAMPLE_FWJR['task'].split('/')[1]

        self.assertEqual(
            self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]['id'],
            fwjrDocument['_id'])
        self.assertEqual(
            self.fwjrAPI.getFWJRByArchiveStatus("ready")['rows'][0]['id'],
            fwjrDocument['_id'])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), False)
        self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument['_id'])
        self.assertEqual(
            self.fwjrAPI.getFWJRByArchiveStatus("uploaded")['rows'][0]['id'],
            fwjrDocument['_id'])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), True)
        self.assertEqual(
            self.fwjrAPI.getFWJRWithSkippedFiles()['rows'][0]['value']
            ['skippedFiles'], 2)
Esempio n. 2
0
class FWJRDBAPITest(unittest.TestCase):
    """
    """

    def setUp(self):
        """
        _setUp_
        """
        self.schema = []
        self.couchApps = ["FWJRDump"]
        self.testInit = TestInitCouchApp("FWJRAPITest")
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=self.schema, useDefault=False)
        dbName = "fwjrdump_t"
        self.testInit.setupCouch(dbName, *self.couchApps)
        fwjrDBURL = "%s/%s" % (self.testInit.couchUrl, dbName)
        self.fwjrAPI = FWJRDBAPI(fwjrDBURL)
        self.fwjrAPI.defaultStale = {}
        return

    def tearDown(self):
        """
        _tearDown_

        Drop all the WMBS tables.
        """
        self.testInit.tearDownCouch()

    def testFWJRDBAPI(self):

        jobID = 1
        retryCount = 0
        fwjrDocument = {
            "_id": "%s-%s" % (jobID, retryCount),
            "jobid": jobID,
            "retrycount": retryCount,
            "archivestatus": "ready",
            "fwjr": SAMPLE_FWJR,
            "jobtype": "Merge",
            "type": "fwjr",
        }
        workflow = SAMPLE_FWJR["task"].split("/")[1]

        self.assertEqual(self.fwjrAPI.couchDB.commitOne(fwjrDocument)[0]["id"], fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("ready")["rows"][0]["id"], fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), False)
        self.fwjrAPI.updateArchiveUploadedStatus(fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.getFWJRByArchiveStatus("uploaded")["rows"][0]["id"], fwjrDocument["_id"])
        self.assertEqual(self.fwjrAPI.isAllFWJRArchived(workflow), True)
        self.assertEqual(self.fwjrAPI.getFWJRWithSkippedFiles()["rows"][0]["value"]["skippedFiles"], 2)
Esempio n. 3
0
class LocalCouchDBData(object):
    def __init__(self, couchURL, statSummaryDB, summaryLevel):
        # set the connection for local couchDB call
        self.couchURL = couchURL
        self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
        self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(self.dbName + "/jobs", False)
        fwjrDBname = "%s/fwjrs" % self.dbName
        self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname)
        self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(statSummaryDB, False)
        self.summaryLevel = summaryLevel

    def getJobSummaryByWorkflowAndSite(self):
        """
        gets the job status information by workflow

        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100},
            {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100},
            {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100},
            {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\
         ]}
         and convert to
         {'request_name1': {'queue_first': { 'siteA': 100}}
          'request_name1': {'queue_first': { 'siteB': 100}}
         }
         if taskflag is set,
         convert to
         {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}},
          'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}},
         }
        """
        options = {"group": True, "stale": "ok"}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.jobCouchDB.loadView("JobDump", "jobStatusByWorkflowAndSite",
                                           options)

        # reformat the doc to upload to reqmon db
        data = {}
        if self.summaryLevel == "task":
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault('tasks', {})
                data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
                data[x['key'][0]]['tasks'][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][x['key'][3]] = x['value']
        else:
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault(x['key'][2], {})
                # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value']
        logging.info("Found %i requests", len(data))
        return data

    def getJobPerformanceByTaskAndSiteFromSummaryDB(self):

        options = {"include_docs": True}
        results = self.summaryStatsDB.allDocs(options)
        data = {}
        for row in results['rows']:
            if not row['id'].startswith("_"):
                data[row['id']] = {}
                data[row['id']]['tasks'] = row['doc']['tasks']
        return data

    def getEventSummaryByWorkflow(self):
        """
        gets the job status information by workflow

        example
        {"rows":[
            {"key":['request_name1", "/test/output_dataset1"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset1"}},
            {"key":['request_name1", "/test/output_dataset2"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset2"}},
            {"key":['request_name1", "/test/output_dataset3"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset3"}},
            {"key":['request_name1", "/test/output_dataset4"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset4"}},
         ]}
         and convert to
         {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset1"},
                             {size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset2"}]}

          'request_name2': ...
        """
        results = self.fwjrAPI.outputByWorkflowName()

        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], [])
            data[x['key'][0]].append(x['value'])
        logging.info("Found %i requests", len(data))
        return data

    def getHeartbeat(self):
        try:
            return self.jobCouchDB.info()
        except Exception as ex:
            return {'error_message': str(ex)}

    def getSkippedFilesSummaryByWorkflow(self):
        """
        get skipped file summary
        gets the data with following format
        {u'rows': [{u'value': {u'skippedFile': 5}, u'key':
        ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]}

        and covert to
        {'sryu_TaskChain_Data_wq_testt_160204_061048_5587':
         {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD :
                      {'skippedFiles':2}}}}
        """
        results = self.fwjrAPI.getFWJRWithSkippedFiles()
        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], {})
            data[x['key'][0]].setdefault('tasks', {})
            data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
            data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value']
            data[x['key'][0]]['skipped'] =  True

        return data
Esempio n. 4
0
class LocalCouchDBData(object):
    def __init__(self, couchURL, statSummaryDB, summaryLevel):
        # set the connection for local couchDB call
        self.couchURL = couchURL
        self.couchURLBase, self.dbName = splitCouchServiceURL(couchURL)
        self.jobCouchDB = CouchServer(self.couchURLBase).connectDatabase(
            self.dbName + "/jobs", False)
        fwjrDBname = "%s/fwjrs" % self.dbName
        self.fwjrAPI = FWJRDBAPI(self.couchURLBase, fwjrDBname)
        self.summaryStatsDB = CouchServer(self.couchURLBase).connectDatabase(
            statSummaryDB, False)
        self.summaryLevel = summaryLevel

    def getJobSummaryByWorkflowAndSite(self):
        """
        gets the job status information by workflow

        example
        {"rows":[

            {"key":['request_name1", 'task_name1', "queued_first", "siteA"],"value":100},
            {"key":['request_name1", 'task_name1', "queued_first", "siteB"],"value":100},
            {"key":['request_name1", 'task_name2', "running", "siteA"],"value":100},
            {"key":['request_name1", 'task_name2', "success", "siteB"],"value":100}\
         ]}
         and convert to
         {'request_name1': {'queue_first': { 'siteA': 100}}
          'request_name1': {'queue_first': { 'siteB': 100}}
         }
         if taskflag is set,
         convert to
         {'request_name1': {'tasks': {'task_name1 : {'queue_first': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name1 : {'queue_first': { 'siteB': 100}}}},
          'request_name1': {'tasks':{'task_name2 : {'running': { 'siteA': 100}}}}
          'request_name1': {'tasks':{'task_name2 : {'success': { 'siteB': 100}}}},
         }
        """
        options = {"group": True, "stale": "ok"}
        # site of data should be relatively small (~1M) for put in the memory
        # If not, find a way to stream
        results = self.jobCouchDB.loadView("JobDump",
                                           "jobStatusByWorkflowAndSite",
                                           options)

        # reformat the doc to upload to reqmon db
        data = {}
        if self.summaryLevel == "task":
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault('tasks', {})
                data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
                data[x['key'][0]]['tasks'][x['key'][1]].setdefault(
                    x['key'][2], {})
                data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]][
                    x['key'][3]] = x['value']
        else:
            for x in results.get('rows', []):
                data.setdefault(x['key'][0], {})
                data[x['key'][0]].setdefault(x['key'][2], {})
                # data[x['key'][0]][x['key'][1]].setdefault(x['key'][2], {})
                data[x['key'][0]][x['key'][2]][x['key'][3]] = x['value']
        logging.info("Found %i requests", len(data))
        return data

    def getJobPerformanceByTaskAndSiteFromSummaryDB(self):

        options = {"include_docs": True}
        results = self.summaryStatsDB.allDocs(options)
        data = {}
        for row in results['rows']:
            if not row['id'].startswith("_"):
                data[row['id']] = {}
                data[row['id']]['tasks'] = row['doc']['tasks']
        return data

    def getEventSummaryByWorkflow(self):
        """
        gets the job status information by workflow

        example
        {"rows":[
            {"key":['request_name1", "/test/output_dataset1"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset1"}},
            {"key":['request_name1", "/test/output_dataset2"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset2"}},
            {"key":['request_name1", "/test/output_dataset3"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset3"}},
            {"key":['request_name1", "/test/output_dataset4"],
             "value": {size: 20286644784714, events: 38938099, count: 6319,
                       dataset: "/test/output_dataset4"}},
         ]}
         and convert to
         {'request_name1': {'size_event': [{size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset1"},
                             {size: 20286644784714, events: 38938099, count: 6319,
                             dataset: "/test/output_dataset2"}]}

          'request_name2': ...
        """
        results = self.fwjrAPI.outputByWorkflowName()

        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], [])
            data[x['key'][0]].append(x['value'])
        logging.info("Found %i requests", len(data))
        return data

    def getHeartbeat(self):
        try:
            return self.jobCouchDB.info()
        except Exception as ex:
            return {'error_message': str(ex)}

    def getSkippedFilesSummaryByWorkflow(self):
        """
        get skipped file summary
        gets the data with following format
        {u'rows': [{u'value': {u'skippedFile': 5}, u'key':
        ["sryu_StepChain_MC_reqmgr2_170609_180852_5295", "/sryu_StepChain_MC_reqmgr2_170609_180852_5295/GENSIM/GENSIMMergeRAWSIMoutput", "T1_US_FNAL_Disk"]}]}

        and covert to
        {'sryu_TaskChain_Data_wq_testt_160204_061048_5587':
         {'tasks': {'/sryu_TaskChain_Data_wq_testt_160204_061048_5587/RECOCOSD :
                      {'skippedFiles':2}}}}
        """
        results = self.fwjrAPI.getFWJRWithSkippedFiles()
        # reformat the doc to upload to reqmon db
        data = {}
        for x in results.get('rows', []):
            data.setdefault(x['key'][0], {})
            data[x['key'][0]].setdefault('tasks', {})
            data[x['key'][0]]['tasks'].setdefault(x['key'][1], {})
            data[x['key'][0]]['tasks'][x['key'][1]][x['key'][2]] = x['value']
            data[x['key'][0]]['skipped'] = True

        return data