def main(): """ _main_ """ if 'WMAGENT_CONFIG' not in os.environ: os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage' connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) # Get all the files available for each subscription print "Getting files available without location..." availFiles = formatter.formatDict(myThread.dbi.processData(getFilesAvailable)) print "Total files available: %s" % len(availFiles) uniqAvailFiles = list(set([x['fileid'] for x in availFiles])) availFiles = [{'fileid': x} for x in uniqAvailFiles] print "Total unique files available: %s" % len(uniqAvailFiles) cernID = formatter.formatDict(myThread.dbi.processData(getCERNLocation))[0] print "CERN location id: %s" % cernID if not cernID: print "You need to add T0_CH_CERN to the resource control db" sys.exit(1) for fid in availFiles: fid.update(cernID) myThread.dbi.processData(updateFileLocation, availFiles) print "Done!" sys.exit(0)
def main(): if 'WMAGENT_CONFIG' not in os.environ: os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage' timenow = int(time.time()) time6d = timenow - 6 * 24 * 3600 connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) # Get list of workflows and number of jobs executing for more than 6 days binds = [{'timestamp': time6d}] wmbsJobsPerWf = formatter.formatDict(myThread.dbi.processData(getJobsExecuting, binds)) totalJobs = sum([int(item['count']) for item in wmbsJobsPerWf]) print "Found %d workflows with a total of %d jobs" % (len(wmbsJobsPerWf), totalJobs) #pprint(wmbsJobsPerWf) # Retrieve all jobs from condor schedd # it returns an iterator, so let's make it a list such that we can iterate over # it several times... why did I notice it only know?!?! schedd = condor.Schedd() jobs = list(schedd.xquery('true', ['ClusterID', 'ProcId', 'WMAgent_RequestName', 'JobStatus', 'WMAgent_JobID'])) # Retrieve their status from reqmgr2 and # add their wmbsId to the dict for item in wmbsJobsPerWf: item['status'] = getStatus(item['name']) item['condorjobs'] = [] for job in jobs: if job['WMAgent_RequestName'] == item['name']: item['condorjobs'].append(job['WMAgent_JobID']) #pprint(wmbsJobsPerWf) # time to have some ACTION for item in wmbsJobsPerWf: binds = [{'timestamp': time6d, 'wfname': item['name']}] jobIds = formatter.formatDict(myThread.dbi.processData(getWMBSIds, binds)) wmbsIds = [x['id'] for x in jobIds] print "%-100s in %s. Has %d wmbs and %d condor jobs" % (item['name'], item['status'], len(wmbsIds), len(item['condorjobs'])) # continue # Just skip it if there are condor jobs out there if len(item['condorjobs']) > 0 or item['status'] == 'UNKNOWN': continue newstatus = 'jobfailed' if item['status'] in ('acquired', 'running-open', 'running-closed') else 'cleanout' var = raw_input("Marking jobs from %s to %s: (Y/N) " % (item['status'], newstatus)) if var in ['Y', 'y']: print "UPDATED %s" % item['name'] binds = [] for x in jobIds: x['state'] = newstatus binds.append(x) myThread.dbi.processData(updateState, binds) print "Done!" sys.exit(0)
def format(self, assignedResults, unassignedResults): """ _format_ Combine together the total we received from the assigned and unassigned queries into a single datastructure. """ assignedResults = DBFormatter.formatDict(self, assignedResults) unassignedResults = DBFormatter.formatDict(self, unassignedResults) results = {} for result in assignedResults: if result["total"] == None: result["total"] = 0 siteName = result["site_name"] if not siteName in results: results[siteName] = {"cms_name": result["cms_name"], "state": result["state"], "pending_jobs": {}, "total_slots": result["pending_slots"]} countJobs = True if result['job_status']: module = __import__("WMCore.BossAir.Plugins.%s" % result['plugin'], globals(), locals(), [result['plugin']]) plugIn = getattr(module, result['plugin']) status = plugIn.stateMap().get(result['job_status']) if status == 'Running': countJobs = False priority = result["priority"] if priority not in results[siteName]["pending_jobs"]: results[siteName]["pending_jobs"][priority] = 0 if countJobs: results[siteName]["pending_jobs"][priority] += result["total"] results[result["site_name"]]["total_slots"] = result["pending_slots"] # Sum up all the jobs currently unassigned for result in unassignedResults: siteName = result['site_name'] if siteName not in results: results[siteName] = {"total_slots": result["pending_slots"], "pending_jobs": {}, "cms_name": result["cms_name"], "state": result["state"]} priority = result["priority"] or 0 if priority not in results[siteName]['pending_jobs']: results[siteName]['pending_jobs'][priority] = 0 results[siteName]['pending_jobs'][priority] += result['job_count'] return results
def checkJobCountsAgent(requestName): os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) unfinishedTasks = formatter.formatDict(myThread.dbi.processData("""SELECT wmbs_workflow.task, wmbs_job_state.name, COUNT(wmbs_job.id) AS jobcount FROM wmbs_workflow INNER JOIN wmbs_subscription ON wmbs_subscription.workflow = wmbs_workflow.id INNER JOIN wmbs_jobgroup ON wmbs_jobgroup.subscription = wmbs_subscription.id INNER JOIN wmbs_job ON wmbs_job.jobgroup = wmbs_jobgroup.id INNER JOIN wmbs_job_state ON wmbs_job.state = wmbs_job_state.id WHERE wmbs_workflow.name = '%s' AND wmbs_subscription.finished = 0 AND wmbs_job_state.name != 'cleanout' GROUP BY wmbs_workflow.task, wmbs_job_state.name""" % requestName)) result = {} for row in unfinishedTasks: if row['task'] not in result: result[row['task']] = {} result[row['task']][row['name']] = row['jobcount'] for task in result: msg = "Task %s has " % task for state in result[task]: msg += '%d jobs %s ' % (result[task][state], state) print msg if not result: print "Check #1 failed, there are no unfinished tasks in the system apparently." else: return unfinishedSubs = formatter.formatDict(myThread.dbi.processData("""SELECT wmbs_subscription.id, wmbs_workflow.task FROM wmbs_workflow INNER JOIN wmbs_subscription ON wmbs_subscription.workflow = wmbs_workflow.id WHERE wmbs_workflow.name = '%s' AND wmbs_subscription.finished = 0""" % requestName)) totalSubs = formatter.formatDict(myThread.dbi.processData("""SELECT wmbs_subscription.id, wmbs_workflow.task FROM wmbs_workflow INNER JOIN wmbs_subscription ON wmbs_subscription.workflow = wmbs_workflow.id WHERE wmbs_workflow.name = '%s'""" % requestName)) print "There are %d subscriptions for this workflow, %d are incomplete." % (len(totalSubs), len(unfinishedSubs)) if len(unfinishedSubs) != 0: print "It appears no jobs have been created for some unfinished subscriptions, check the health of the JobCreator or contact a developer." print "This workflow has all subscriptions as finished, the TaskArchiver should be eating through it now. This can take time though." return
def format(self, assignedResults, unassignedResults): """ _format_ Combine together the total we received from the assigned and unassigned queries into a single datastructure. """ assignedResults = DBFormatter.formatDict(self, assignedResults) unassignedResults = DBFormatter.formatDict(self, unassignedResults) results = {} for result in assignedResults: if result["total"] == None: result["total"] = 0 if not results.has_key(result["site_name"]): results[result["site_name"]] = {"total_slots": 0, "running_jobs": 0, "cms_name": result["cms_name"]} results[result["site_name"]]["running_jobs"] += result["total"] results[result["site_name"]]["total_slots"] = result["job_slots"] # Bin jobs jobBin = {} for result in unassignedResults: if not results.has_key(result["site_name"]): results[result["site_name"]] = {"total_slots": result["job_slots"], "running_jobs": 0, "cms_name": result["cms_name"]} if not jobBin.has_key(result["job"]): jobBin[result["job"]] = [] jobBin[result["job"]].append(result) for jobID in jobBin.keys(): foundWhitelist = False for site in jobBin[jobID]: if site["valid"] == 1: foundWhitelist = True break if foundWhitelist: for site in copy.copy(jobBin[jobID]): if site["valid"] != 1: jobBin[jobID].remove(site) for jobID in jobBin.keys(): for site in jobBin[jobID]: if site["job"] != None: results[site["site_name"]]["running_jobs"] += 1 return results
def main(): if "WMAGENT_CONFIG" not in os.environ: os.environ["WMAGENT_CONFIG"] = '/data/srv/wmagent/current/config/wmagent/config.py' myThread = threading.currentThread() connectToDB() formatter = DBFormatter(logging, myThread.dbi) limboFiles = formatter.formatDict(myThread.dbi.processData("""SELECT dbsbuffer_workflow.name, dbsbuffer_file.lfn FROM dbsbuffer_file INNER JOIN dbsbuffer_workflow ON dbsbuffer_file.workflow = dbsbuffer_workflow.id LEFT OUTER JOIN dbsbuffer_block ON dbsbuffer_file.block_id = dbsbuffer_block.id WHERE dbsbuffer_file.status = 'READY' AND dbsbuffer_block.id is NULL""")) if not limboFiles: print "There are no bad files to fix" return for entry in limboFiles: data = Database('wmagent_jobdump/fwjrs', 'http://%s:5984' % socket.gethostname()) result = data.loadView('FWJRDump', 'jobsByOutputLFN', {'include_docs' : True}, [[entry['name'], entry['lfn']]])['rows'] if result: result = result[0] fwjr = result['doc']['fwjr'] for step in fwjr['steps']: if step == 'cmsRun1': stepInfo = fwjr['steps'][step] site = stepInfo['site'] break else: print "Could not find location for %s" % entry['lfn'] continue se = myThread.dbi.processData("""SELECT wmbs_location_senames.se_name FROM wmbs_location_senames INNER JOIN wmbs_location ON wmbs_location.id = wmbs_location_senames.location WHERE wmbs_location.site_name = '%s'""" % site) se = formatter.formatDict(se)[0] insertQuery = """INSERT INTO dbsbuffer_location (se_name) SELECT '%s' AS se_name FROM DUAL WHERE NOT EXISTS (SELECT se_name FROM dbsbuffer_location WHERE se_name = '%s')""" % (se['se_name'], se['se_name']) myThread.dbi.processData(insertQuery) updateQuery = """INSERT INTO dbsbuffer_file_location (filename, location) SELECT df.id, dl.id FROM dbsbuffer_file df, dbsbuffer_location dl WHERE df.lfn = '%s' AND dl.se_name = '%s'""" % (entry['lfn'], se['se_name']) myThread.dbi.processData(updateQuery) updateQuery = """UPDATE dbsbuffer_file SET status = 'NOTUPLOADED' WHERE lfn = '%s'""" % entry['lfn'] myThread.dbi.processData(updateQuery)
def formatDict(self, results): """ _formatDict_ Cast the file column to an integer as the DBFormatter's formatDict() method turns everything into strings. Also, fixup the results of the Oracle query by renaming 'fileid' to file. """ formattedResults = DBFormatter.formatDict(self, results) for formattedResult in formattedResults: if "file" in formattedResult.keys(): formattedResult["file"] = int(formattedResult["file"]) else: formattedResult["file"] = int(formattedResult["fileid"]) #Now the tricky part tempResults = {} for formattedResult in formattedResults: fileID = formattedResult['file'] if fileID not in tempResults.keys(): tempResults[fileID] = [] if "pnn" in formattedResult.keys(): if not formattedResult['pnn'] in tempResults[fileID]: tempResults[fileID].append(formattedResult["pnn"]) finalResults = [] for key in tempResults.keys(): tmpDict = {"file": key} if not tempResults[key] == []: tmpDict['locations'] = tempResults[key] finalResults.append(tmpDict) return finalResults
def formatDict(self, result): """ _formatDict_ Format the query results into something that resembles the XML format PhEDEx expects for injection: {"location1": {"dataset1": {"block1": {"is-open": "n", "files": []}}}} """ dictResult = DBFormatter.formatDict(self, result) formattedResult = {} for row in dictResult: if row["location"] not in formattedResult.keys(): formattedResult[row["location"]] = {} locationDict = formattedResult[row["location"]] if row["dataset"] not in locationDict.keys(): locationDict[row["dataset"]] = {} datasetDict = locationDict[row["dataset"]] if row["blockname"] not in datasetDict.keys(): datasetDict[row["blockname"]] = {"is-open": "n", "files": []} return formattedResult
def main(): if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ[ 'manage'] = '/data/srv/wmagent/current/config/wmagent/manage' # first, break free from old condor jobs condorCleanup() connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) time5d = int(time.time()) - 5 * 24 * 3600 binds = [{'timestamp': time5d}] activeRunJobs = formatter.formatDict( myThread.dbi.processData(getRunJobsActive, binds)) print "Found %d active jobs in BossAir older than 5 days" % len( activeRunJobs) # now mark these jobs as complete and in Timeout status binds = activeRunJobs[:10000] myThread.dbi.processData(updateState, binds) print "Done!" sys.exit(0)
def formatResult(self, result): """ I need the result in a reasonable list. This will return None if there is no cksum """ formattedResult = {} dictVersion = DBFormatter.formatDict(self, result) if type(dictVersion) == type([]): if len(dictVersion) == 0: #Then it's empty return None else: #Otherwise there are several, and we have to record each one #I don't know how to do this yet. tmpDict = {} for entry in dictVersion: tmpDict.update({ entry.get('cktype', 'Default'): entry.get('cksum', None) }) formattedResult['checksums'] = tmpDict else: formattedResult['checksums'] = { 'Default': dictVersion.get('cksum', None) } if formattedResult == {'Default': None}: #Then the thing was empty anyway return None return formattedResult
def formatDict(self, result): """ _formatDict_ Cast the integer attributes of the file object to integers as formatDict() will turn everything into a string. """ formattedResult = DBFormatter.formatDict(self, result)[0] formattedResult["id"] = int(formattedResult["id"]) formattedResult["merged"] = bool(int(formattedResult["merged"])) if formattedResult["events"] != None: formattedResult["events"] = int(formattedResult["events"]) if formattedResult["first_event"] != None: formattedResult["first_event"] = int(formattedResult["first_event"]) if "size" in formattedResult.keys(): formattedResult["size"] = formattedResult["size"] else: # The size column is named "filesize" in Oracle as size is # as reserved word. We'll handle this here to make things # easier in the Oracle version of this object. formattedResult["size"] = formattedResult["filesize"] del formattedResult["filesize"] if formattedResult["size"] != None: formattedResult["size"] = int(formattedResult["size"]) return formattedResult
def formatDict(self, results): """ _formatDict_ Cast the file column to an integer as the DBFormatter's formatDict() method turns everything into strings. Also, fixup the results of the Oracle query by renaming 'fileid' to file. """ formattedResults = DBFormatter.formatDict(self, results) for formattedResult in formattedResults: if "file" in formattedResult.keys(): formattedResult["file"] = int(formattedResult["file"]) else: formattedResult["file"] = int(formattedResult["fileid"]) # Now the tricky part tempResults = {} for formattedResult in formattedResults: if formattedResult["file"] not in tempResults.keys(): tempResults[formattedResult["file"]] = [] if "se_name" in formattedResult.keys(): tempResults[formattedResult["file"]].append(formattedResult["se_name"]) finalResults = [] for key in tempResults.keys(): tmpDict = {"file": key} if not tempResults[key] == []: tmpDict["locations"] = tempResults[key] finalResults.append(tmpDict) return finalResults
def check_sites(oldsites, badsites, conn): """ Make sure that all sites from the old schema, that aren't in bad sites are in the new schema. """ logger = logging.getLogger('SiteDB Schema Upgrade: check_sites') sql = "select * from siteinfo where cms_name = :cms_name" formatter = DBFormatter(logger, conn) print "checking %s sites" % len(oldsites) for site in oldsites: test = None if not site in badsites: try: data = conn.processData(sql, {'cms_name': site['cms_name']}) test = formatter.formatDict(data) if len(test) > 0: testsite = test[0] if testsite == site: logger.debug("%s migrated correctly!" % site['cms_name']) else: logger.warning("Problem with %s" % site['cms_name']) logger.warning("%s != %s" % (testsite, site)) else: logger.warning("Problem with %s" % site['cms_name']) logger.warning("%s != %s" % (test, site)) except Exception, e: logger.warning("Problem with %s" % site['cms_name']) logger.warning("%s != %s" % (test, site)) logger.warning(e)
def testBFormatting(self): """ Test various formats """ myThread = threading.currentThread() dbformatter = DBFormatter(myThread.logger, myThread.dbi) myThread.transaction.begin() result = myThread.transaction.processData(myThread.select) output = dbformatter.format(result) self.assertEqual(output , [['value1a', 'value2a'], \ ['value1b', 'value2b'], ['value1c', 'value2d']]) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOne(result) print('test1 '+str(output)) self.assertEqual( output , ['value1a', 'value2a'] ) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatDict(result) self.assertEqual( output , [{'bind2': 'value2a', 'bind1': 'value1a'}, \ {'bind2': 'value2b', 'bind1': 'value1b'},\ {'bind2': 'value2d', 'bind1': 'value1c'}] ) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOneDict(result) self.assertEqual( output, {'bind2': 'value2a', 'bind1': 'value1a'} )
def fixDBSmissingFileAssoc(): os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) problemFilesSql = """ select dbsbuffer_file.id as fileid, dbsbuffer_location.id as seid from wmbs_file_location fl inner join wmbs_file_details fd on fd.id = fl.fileid inner join wmbs_location_senames wls on wls.location = fl.location inner join wmbs_location wl on wl.id = fl.location inner join dbsbuffer_location on dbsbuffer_location.se_name = wls.se_name inner join dbsbuffer_file on dbsbuffer_file.lfn = fd.lfn where fd.lfn in (select df.lfn from dbsbuffer_file df left outer join dbsbuffer_file_location dfl on df.id = dfl.filename where dfl.location is null) """ unfinishedTasks = formatter.formatDict(formatter.dbi.processData(problemFilesSql)) print("%s lenth" % len(unfinishedTasks)) result = {} for row in unfinishedTasks: result.setdefault(row["fileid"], row) print(row) print("trimed %s lenth" % len(result)) insertSQL = """INSERT INTO dbsbuffer_file_location (filename, location) VALUES (:fileid, :seid)""" done = formatter.dbi.processData(insertSQL, result.values()) print("inserted %s" % done)
def fixDBSmissingFileAssoc(): os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) problemFilesSql = """ select dbsbuffer_file.id as fileid, dbsbuffer_location.id as seid from wmbs_file_location fl inner join wmbs_file_details fd on fd.id = fl.fileid inner join wmbs_location_pnns wls on wls.location = fl.location inner join wmbs_location wl on wl.id = fl.location inner join dbsbuffer_location on dbsbuffer_location.pnn = wls.pnn inner join dbsbuffer_file on dbsbuffer_file.lfn = fd.lfn where fd.lfn in (select df.lfn from dbsbuffer_file df left outer join dbsbuffer_file_location dfl on df.id = dfl.filename where dfl.location is null) """ unfinishedTasks = formatter.formatDict( formatter.dbi.processData(problemFilesSql)) print("%s lenth" % len(unfinishedTasks)) result = {} for row in unfinishedTasks: result.setdefault(row["fileid"], row) print(row) print("trimed %s lenth" % len(result)) insertSQL = """INSERT INTO dbsbuffer_file_location (filename, location) VALUES (:fileid, :seid)""" done = formatter.dbi.processData(insertSQL, result.values()) print("inserted %s" % done)
def formatBulkDict(self, result): """ _formatBulkDict_ Formats a whole list of dictionaries """ formattedResult = {} listOfDicts = DBFormatter.formatDict(self, result) for entry in listOfDicts: tmpDict = {} tmpDict["id"] = int(entry["id"]) tmpDict["lfn"] = entry["lfn"] tmpDict["events"] = int(entry["events"]) tmpDict["first_event"] = int(entry["first_event"]) tmpDict["last_event"] = int(entry["last_event"]) tmpDict['minrun'] = entry.get('minrun', None) if "size" in entry.keys(): tmpDict["size"] = int(entry["size"]) else: tmpDict["size"] = int(entry["filesize"]) del entry["filesize"] formattedResult[tmpDict['id']] = tmpDict return formattedResult
def formatBulkDict(self, result): """ _formatBulkDict_ Formats a whole list of dictionaries """ formattedResult = {} listOfDicts = DBFormatter.formatDict(self, result) for entry in listOfDicts: tmpDict = {} tmpDict["id"] = int(entry["id"]) tmpDict["lfn"] = entry["lfn"] tmpDict["events"] = int(entry["events"]) tmpDict["first_event"] = int(entry["first_event"]) tmpDict['minrun'] = entry.get('minrun', None) if "size" in entry.keys(): tmpDict["size"] = int(entry["size"]) else: tmpDict["size"] = int(entry["filesize"]) del entry["filesize"] formattedResult[tmpDict['id']] = tmpDict return formattedResult
def formatResult(self, result): """ I need the result in a reasonable list. This will return None if there is no cksum """ formattedResult = {} dictVersion = DBFormatter.formatDict(self, result) if type(dictVersion) == type([]): if len(dictVersion) == 0: #Then it's empty return None else: #Otherwise there are several, and we have to record each one #I don't know how to do this yet. tmpDict = {} for entry in dictVersion: tmpDict.update({entry.get('cktype', 'Default'): entry.get('cksum', None)}) formattedResult['checksums'] = tmpDict else: formattedResult['checksums'] = {'Default': dictVersion.get('cksum', None)} if formattedResult == {'Default': None}: #Then the thing was empty anyway return None return formattedResult
def formatDict(self, result): """ _formatDict_ Cast the integer attributes of the file object to integers as formatDict() will turn everything into a string. """ formattedResult = DBFormatter.formatDict(self, result)[0] formattedResult["id"] = int(formattedResult["id"]) formattedResult["merged"] = bool(int(formattedResult["merged"])) if formattedResult["events"] != None: formattedResult["events"] = int(formattedResult["events"]) if formattedResult["first_event"] != None: formattedResult["first_event"] = int( formattedResult["first_event"]) if "size" in formattedResult.keys(): formattedResult["size"] = formattedResult["size"] else: # The size column is named "filesize" in Oracle as size is # as reserved word. We'll handle this here to make things # easier in the Oracle version of this object. formattedResult["size"] = formattedResult["filesize"] del formattedResult["filesize"] if formattedResult["size"] != None: formattedResult["size"] = int(formattedResult["size"]) return formattedResult
def testBFormatting(self): """ Test various formats """ myThread = threading.currentThread() dbformatter = DBFormatter(myThread.logger, myThread.dbi) myThread.transaction.begin() result = myThread.transaction.processData(myThread.select) output = dbformatter.format(result) self.assertEqual(output , [['value1a', 'value2a'], \ ['value1b', 'value2b'], ['value1c', 'value2d']]) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOne(result) print('test1 ' + str(output)) self.assertEqual(output, ['value1a', 'value2a']) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatDict(result) self.assertEqual( output , [{'bind2': 'value2a', 'bind1': 'value1a'}, \ {'bind2': 'value2b', 'bind1': 'value1b'},\ {'bind2': 'value2d', 'bind1': 'value1c'}] ) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOneDict(result) self.assertEqual(output, {'bind2': 'value2a', 'bind1': 'value1a'})
def testBFormatting(self): """ Test various formats """ myThread = threading.currentThread() dbformatter = DBFormatter(myThread.logger, myThread.dbi) myThread.transaction.begin() result = myThread.transaction.processData(myThread.select) output = dbformatter.format(result) self.assertEqual(output, [["value1a", "value2a"], ["value1b", "value2b"], ["value1c", "value2d"]]) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOne(result) print("test1 " + str(output)) self.assertEqual(output, ["value1a", "value2a"]) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatDict(result) self.assertEqual( output, [ {"bind2": "value2a", "bind1": "value1a"}, {"bind2": "value2b", "bind1": "value1b"}, {"bind2": "value2d", "bind1": "value1c"}, ], ) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOneDict(result) self.assertEqual(output, {"bind2": "value2a", "bind1": "value1a"})
def formatOneDict(self, result): """ _formatOneDict_ Return the row as a dict """ formattedResult = DBFormatter.formatDict(self, result)[0] return workUnitFormatter(formattedResult)
def format(self, result): list = DBFormatter.formatDict(self, result) # [{'count': '2', 'name': 'none'}, {'count': '4', 'name': 'new'}] def map_function(item): return {item['name'] : item['count']} def reduce_function(x, y): x.update(y) return x return reduce(reduce_function, list(map(map_function, list)))
def format(self, result): """ _format_ Format the results of the query into something reasonable. The query will return a row for each subscription/job type combination so we need to condense that down and return a single dictionary with information on job status for each subscription. """ results = DBFormatter.formatDict(self, result) workflows = {} for result in results: if not workflows.has_key(result["workflow_name"]): workflows[result["workflow_name"]] = {} workflowDict = workflows[result["workflow_name"]] if not workflowDict.has_key(result["fileset_name"]): workflowDict[result["fileset_name"]] = {"success": 0, "running": 0, "failure": 0, "subId": result["subscription_id"]} filesetDict = workflowDict[result["fileset_name"]] if result["job_state"] in ("exhausted", "cleanout", "success", "jobfailed"): if result["success_count"] != None: filesetDict["success"] += result["success_count"] filesetDict["failure"] += result["job_count"] - result["success_count"] else: if result["success_count"] != None: filesetDict["running"] += result["job_count"] results = [] for workflowName in workflows.keys(): for filesetName in workflows[workflowName].keys(): success = workflows[workflowName][filesetName]["success"] failure = workflows[workflowName][filesetName]["failure"] running = workflows[workflowName][filesetName]["running"] if success + failure + running == 0: percentComplete = 0 percentSuccess = 0 elif success + failure == 0: percentComplete = int((success + failure) / (success + failure + running) * 100) percentSuccess = 0 else: percentComplete = int((success + failure) / (success + failure + running) * 100) percentSuccess = int(success / (success + failure + running) * 100) subId = workflows[workflowName][filesetName]["subId"] results.append({"subscription_id": subId, "workflow_name": workflowName, "fileset_name": filesetName, "percent_complete": percentComplete, "percent_success": percentSuccess}) return results
def format(self, assignedResults, unassignedResults): """ _format_ Combine together the total we received from the assigned and unassigned queries into a single datastructure. """ assignedResults = DBFormatter.formatDict(self, assignedResults) unassignedResults = DBFormatter.formatDict(self, unassignedResults) results = {} for result in assignedResults: if result["total"] == None: result["total"] = 0 if not results.has_key(result["site_name"]): if result['drain'] == 'T': drainValue = True else: drainValue = False results[result["site_name"]] = {"total_slots": 0, "running_jobs": 0, "cms_name": result["cms_name"], "drain" : drainValue} results[result["site_name"]]["running_jobs"] += result["total"] results[result["site_name"]]["total_slots"] = result["job_slots"] # Sum up all the jobs currently unassigned for result in unassignedResults: siteName = result['site_name'] if not results.has_key(siteName): if result['drain'] == 'T': drainValue = True else: drainValue = False results[siteName] = {"total_slots": result["job_slots"], "running_jobs": 0, "cms_name": result["cms_name"], "drain" : drainValue} results[siteName]['running_jobs'] += result['job_count'] return results
def format(self, results): """ _format_ Add up totals from various rows and combine that together into a single data structure. """ results = DBFormatter.formatDict(self, results) formattedResults = {} totalRunning = {} skipThreshold = False for result in results: siteName = result['site_name'] taskType = result['task_type'] if not formattedResults.has_key(siteName): formattedResults[siteName] = [] totalRunning[siteName] = 0 for thresh in formattedResults[siteName]: if thresh['task_type'] == taskType: # Then we have a problem logging.error("Skipping duplicate threshold type %s for site %s" % (taskType, siteName)) logging.debug("Current site info: %s" % formattedResults[siteName]) logging.debug("Current processing result: %s" % result) skipThreshold = True if skipThreshold: skipThreshold = False continue if result["task_running_jobs"] == None: result["task_running_jobs"] = 0 threshold = {} threshold['task_type'] = taskType threshold["total_slots"] = result["job_slots"] threshold["task_running_jobs"] = result["task_running_jobs"] threshold["max_slots"] = result["max_slots"] threshold["se_name"] = result["se_name"] threshold["priority"] = result["priority"] threshold['cms_name'] = result["cms_name"] if result['drain'] == 'T': threshold['drain'] = True else: threshold['drain'] = False totalRunning[siteName] += result["task_running_jobs"] formattedResults[siteName].append(threshold) for siteName in totalRunning.keys(): for threshold in formattedResults[siteName]: threshold["total_running_jobs"] = totalRunning[siteName] return formattedResults
def formatList(self, results): """ _formatList_ """ formattedResults = DBFormatter.formatDict(self, results) tmpList = [] for entry in formattedResults: tmpList.append(entry['site_name']) return tmpList
def formatDict(self, results): """ _formatDict_ Format the results in a dict keyed by the state name """ formattedResults = DBFormatter.formatDict(self, results) dictResult = {} for formattedResult in formattedResults: dictResult[formattedResult['sched_status']] = int(formattedResult['count']) return dictResult
def formatDict(self, results): """ _formatDict_ Format the results in a dict keyed by the state name """ formattedResults = DBFormatter.formatDict(self, results) dictResult = {} for formattedResult in formattedResults: dictResult[formattedResult["job_state"]] = int(formattedResult["job_count"]) return dictResult
def formatDict(self, results): """ _formatDict_ Cast the id column to an integer since formatDict() turns everything into strings. """ formattedResults = DBFormatter.formatDict(self, results) for formattedResult in formattedResults: formattedResult["id"] = int(formattedResult["id"]) return formattedResults
def formatDict(self, results): """ _formatDict_ Cast the id to be an integer as formatDict() turns all results into strings. """ formattedResults = DBFormatter.formatDict(self, results) for formattedResult in formattedResults: formattedResult["id"] = int(formattedResult["id"]) return formattedResults
def formatDict(self, results): """ _formatDict_ Format the results in a dict keyed by the state name """ formattedResults = DBFormatter.formatDict(self, results) dictResult = {} for formattedResult in formattedResults: dictResult[formattedResult['job_state']] = int( formattedResult['job_count']) return dictResult
def formatDict(self, results): """ _formatDict_ Format the results in a dict keyed by the cms_name """ formattedResults = DBFormatter.formatDict(self, results) dictResult = {} for item in formattedResults: site = item.pop('cms_name') dictResult[site] = item return dictResult
def formatDict(self, result): """ _formatDict_ Cast the id attribute to an int because the DBFormatter turns everything into strings. """ tempResults = DBFormatter.formatDict(self, result) formattedResults = [] for tempResult in tempResults: tempResult["id"] = int(tempResult["id"]) formattedResults.append(tempResult) return formattedResults
def formatDict(self, result): """ _formatDict_ Cast the id, fileset, workflow and last_update columns to integers since formatDict() turns everything into strings. """ formattedResult = DBFormatter.formatDict(self, result)[0] formattedResult["id"] = int(formattedResult["id"]) formattedResult["fileset"] = int(formattedResult["fileset"]) formattedResult["workflow"] = int(formattedResult["workflow"]) formattedResult["last_update"] = int(formattedResult["last_update"]) formattedResult["type"] = formattedResult["name"] del formattedResult["name"] return formattedResult
def get_siteinfo(conn): formatter = DBFormatter(logging.getLogger('SiteDB Schema Upgrade'), conn) sql = """select site.id, cms_name.name cms_name, site.name site_name, site.country, site.usage, site.url, site.logourl from site join SITE_CMS_NAME_MAP on SITE_CMS_NAME_MAP.SITE_ID=site.id join CMS_NAME on CMS_NAME.ID = SITE_CMS_NAME_MAP.CMS_NAME_ID """ data = conn.processData(sql) return formatter.formatDict(data)
def main(): """ _main_ """ if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ[ 'manage'] = '/data/srv/wmagent/current/config/wmagent/manage' connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) # Get all the files available for each subscription print "Getting files available without location..." availFiles = formatter.formatDict( myThread.dbi.processData(getFilesAvailable)) print "Total files available: %s" % len(availFiles) uniqAvailFiles = list(set([x['fileid'] for x in availFiles])) availFiles = [{'fileid': x} for x in uniqAvailFiles] print "Total unique files available: %s" % len(uniqAvailFiles) cernID = formatter.formatDict(myThread.dbi.processData(getCERNLocation))[0] print "CERN location id: %s" % cernID if not cernID: print "You need to add T0_CH_CERN to the resource control db" sys.exit(1) for fid in availFiles: fid.update(cernID) myThread.dbi.processData(updateFileLocation, availFiles) print "Done!" sys.exit(0)
def formatDict(self, result): """ _formatDict_ Cast the id, subscription, output and last_update attributes to integers because formatDict() turns everything into strings. """ formattedResult = DBFormatter.formatDict(self, result)[0] formattedResult["id"] = int(formattedResult["id"]) formattedResult["uid"] = formattedResult["guid"] formattedResult["subscription"] = int(formattedResult["subscription"]) formattedResult["output"] = int(formattedResult["output"]) formattedResult["last_update"] = int(formattedResult["last_update"]) del formattedResult['guid'] return formattedResult
def formatDict(self, result): """ _formatDict_ Cast the id, subscription, output and last_update attributes to integers because formatDict() turns everything into strings. """ formattedResult = DBFormatter.formatDict(self, result)[0] formattedResult["id"] = int(formattedResult["id"]) formattedResult["subscription"] = int(formattedResult["subscription"]) formattedResult["output"] = int(formattedResult["output"]) formattedResult["last_update"] = int(formattedResult["last_update"]) formattedResult["uid"] = formattedResult["guid"] del formattedResult['guid'] return formattedResult
def formatResult(self, result): """ _formatResult_ Turn the query results into a dictionary that is keyed by checksum type. """ formattedResult = {} dictVersion = DBFormatter.formatDict(self, result) for resultRow in dictVersion: formattedResult[resultRow["cktype"]] = resultRow["cksum"] return formattedResult
def formatDict(self, result): """ _formatDict_ Cast the id, jobgroup and last_update columns to integers because formatDict() turns everything into strings. """ formattedResult = DBFormatter.formatDict(self, result)[0] if formattedResult["bool_outcome"] == 0: formattedResult["outcome"] = "failure" else: formattedResult["outcome"] = "success" del formattedResult["bool_outcome"] return formattedResult
def formatDict(self, results): """ _formatDict_ Cast the file attribute to an integer,. """ formattedResults = DBFormatter.formatDict(self, results) dictResults = [] for formattedResult in formattedResults: dictResult = {} if "fileid" in formattedResult.keys(): dictResult["id"] = int(formattedResult["fileid"]) dictResults.append(dictResult) return dictResults
def format(self, results): dictList = DBFormatter.formatDict(self, results) out = [] for entry in dictList: tmpDict = {} tmpDict['FirstEvent'] = entry['firstevent'] tmpDict['LastEvent'] = entry['lastevent'] tmpDict['FirstLumi'] = entry['firstlumi'] tmpDict['LastLumi'] = entry['lastlumi'] tmpDict['FirstRun'] = entry['firstrun'] tmpDict['LastRun'] = entry['lastrun'] out.append(tmpDict) return out
def formatDict(self, results): """ _formatDict_ Cast the file column to an integer as the DBFormatter's formatDict() method turns everything into strings. Also, fixup the results of the Oracle query by renaming "fileid" to file. """ formattedResults = DBFormatter.formatDict(self, results) for formattedResult in formattedResults: if "file" in formattedResult.keys(): formattedResult["file"] = int(formattedResult["file"]) else: formattedResult["file"] = int(formattedResult["fileid"]) return formattedResults
def format(self, result): """ _format_ Format the query results into something that resembles the XML format PhEDEx expects for injection: {"location1": {"dataset1": {"block1": {"is-open": "y", "files": [{"lfn": "lfn1", "size": 10, "checksum": {"cksum": 4321}}, {"lfn": "lfn2", "size": 20, "checksum": {"cksum": 4321}]}}}} In order to do this, we have to graph the checksum. """ dictResult = DBFormatter.formatDict(self, result) self.specCache = {} formattedResult = {} for row in dictResult: location = row['location'] if location not in formattedResult.keys(): formattedResult[location] = {} locationDict = formattedResult[location] if row["dataset"] not in locationDict.keys(): locationDict[row["dataset"]] = {} datasetDict = locationDict[row["dataset"]] if row["blockname"] not in datasetDict.keys(): datasetDict[row["blockname"]] = {"is-open": "y", "files": []} blockDict = datasetDict[row["blockname"]] for file in blockDict["files"]: if file["lfn"] == row["lfn"]: file["checksum"][row["cktype"]] = row["cksum"] break else: cksumDict = {row["cktype"]: row["cksum"]} blockDict["files"].append({ "lfn": row["lfn"], "size": row["filesize"], "checksum": cksumDict }) return formattedResult
def formatDict(self, results): """ _formatDict_ Cast the file attribute to an integer, and also handle changing the column name in Oracle from FILEID to FILE. """ formattedResults = DBFormatter.formatDict(self, results) dictResults = [] for formattedResult in formattedResults: dictResult = {} if "file" in formattedResult: dictResult["id"] = int(formattedResult["file"]) dictResults.append(dictResult) return dictResults