def updateScanStatus(dumper, recoid, datasetid): global dbcursor print 'Updateing scan status.' lumis = {} for run in dumper.getAnalyzedRuns(): if run not in lumis: lumis[run] = [] for lumi in dumper.getAnalyzedLumis(run): lumis[run].append(lumi) query = 'UPDATE `scanstatus` SET STATUS = \'done\' WHERE `recoid` = %d AND `datasetid` = %d AND (' % (recoid, datasetid) runblocks = [] for run, ls in lumis.items(): block = '(`run` = %d AND `lumi` IN (%s))' % (run, ','.join(map(str, ls))) runblocks.append(block) query += ' OR '.join(runblocks) query += ')' dbcursor.execute(query) dumper.resetRuns()
def cleanup(timestamp, jobdir): taskdir = config.installdir + '/jobs/' + timestamp + '/' + jobdir if os.path.exists(taskdir + '/result/missingLumiSummary.json'): with open(taskdir + '/result/missingLumiSummary.json') as json: lumiLists = eval(json.read()) else: jsonName = jobdir.replace('crab_', 'lumiMask_') + '.json' with open(config.installdir + '/jobs/' + timestamp + '/' + jsonName) as json: lumiLists = eval(json.read()) allLumis = [] for srun, lumiRanges in lumiLists.items(): for start, end in lumiRanges: allLumis += ['(%s, %d)' % (srun, l) for l in range(start, end + 1)] query = 'UPDATE `scanstatus` SET `status` = \'failed\' WHERE `status` LIKE \'scanning\' AND (`run`, `lumi`) IN (%s)' % (', '.join(allLumis)) dbcursor.execute(query) shutil.rmtree(config.installdir + '/jobs/' + timestamp + '/' + jobdir)
import math import subprocess import config from localdb import dbcursor #htmlDirs = ['/var/www/html', '/afs/cern.ch/user/y/yiiyama/www/metscan'] htmlDirs = ['/var/www/html'] messages = ' <p><span style="color:red;">The system is currently re-scanning the entire dataset.</span></p>\n' messages += ' <p><a href="nov18/index.html">Status as of November 18</a></p>\n' messages += ' <p>Golden JSON used is: ' + config.goldenJson + '</p>\n' messages += ' <p>Silver JSON used is: ' + config.silverJson + '</p>\n' messages += ' <p>Page last updated: ' + time.asctime() + '</p>' dbcursor.execute('SELECT `status`+0 FROM `scanstatus` WHERE `status` LIKE \'done\'') DONE = dbcursor.fetchall()[0][0] dbcursor.execute('SELECT `recoid`, `name` FROM `reconstructions` ORDER BY `recoid`') recos = [(row[0], row[1]) for row in dbcursor] dbcursor.execute('SELECT `datasetid`, `name` FROM `primarydatasets` ORDER BY `name`') pds = [(pdid, pdname) for pdid, pdname in dbcursor] status = dict([(reco[0], dict([(pdid, {}) for pdid, name in pds])) for reco in recos]) dbcursor.execute('SELECT `recoid`, `datasetid`, `run`, `lumi`, `status`+0 FROM `scanstatus`') for recoid, pdid, run, lumi, st in dbcursor: if recoid not in status: status[recoid] = {} if pdid not in status[recoid]: status[recoid][pdid] = {}
import sys import os import re import subprocess import config from das import dasQuery, datasetList from localdb import dbcursor ### STEP 1 ################################################### ### Find lumisections to be processed from DAS ### ############################################################## recoids = {} for reco in config.reconstructions: dbcursor.execute('SELECT `recoid` FROM `reconstructions` WHERE `name` LIKE %s', (reco,)) if dbcursor.rowcount <= 0: # insert new reconstruction version dbcursor.execute('INSERT INTO `reconstructions` (name) VALUES (%s)', (reco,)) recoids[reco] = dbcursor.lastrowid else: recoids[reco] = dbcursor.fetchall()[0][0] dbcursor.execute('SELECT `datasetid`, `name` FROM `primarydatasets`') knownPDs = dict([(name, datasetid) for datasetid, name in dbcursor]) # list of dataset full names (PD + reconstruction version) # There isn't really a need to query das every time. Providing a hard-coded dataset list is another option.. datasets = datasetList() # DCS-only JSON mask
import CRABClient.UserUtilities from httplib import HTTPException import config from das import dasQuery, datasetList from localdb import dbcursor ### STEP 2 ################################################### ### Submit ntuplizer jobs over all new lumisections ### ############################################################## # list of dataset full names (PD + reconstruction version) # There isn't really a need to query das every time. Providing a hard-coded dataset list is another option.. datasets = datasetList() dbcursor.execute('SELECT `datasetid`, `name` FROM `primarydatasets`') knownPDs = dict([(name, datasetid) for datasetid, name in dbcursor]) timestamp = time.strftime('%y%m%d%H%M%S') crabConfig = CRABClient.UserUtilities.config() crabConfig.General.workArea = config.installdir + '/jobs/' + timestamp crabConfig.JobType.pluginName = 'Analysis' #crabConfig.JobType.outputFiles = ['tags.txt', 'eventdata.txt', 'lumis.txt'] crabConfig.Data.splitting = 'LumiBased' #crabConfig.Data.totalUnits = 1 # TESTING crabConfig.Data.outLFNDirBase = config.eosdir.replace('/eos/cms', '') + '/' + timestamp crabConfig.Site.storageSite = 'T2_CH_CERN' try: os.makedirs(crabConfig.General.workArea)
import sys import os import re import subprocess import config from das import dasQuery, datasetList from localdb import dbcursor ### STEP 1 ################################################### ### Find lumisections to be processed from DAS ### ############################################################## recoids = {} for reco in config.reconstructions: dbcursor.execute('SELECT `recoid` FROM `reconstructions` WHERE `name` LIKE %s', (reco,)) if dbcursor.rowcount <= 0: # insert new reconstruction version dbcursor.execute('INSERT INTO `reconstructions` (name) VALUES (%s)', (reco,)) recoids[reco] = dbcursor.lastrowid else: recoids[reco] = dbcursor.fetchall()[0][0] dbcursor.execute('SELECT `datasetid`, `name` FROM `primarydatasets`') knownPDs = dict([(name, datasetid) for datasetid, name in dbcursor]) # list of dataset full names (PD + reconstruction version) # There isn't really a need to query das every time. Providing a hard-coded dataset list is another option.. datasets = datasetList() for reco in config.reconstructions:
import config from localdb import dbcursor query = 'UPDATE `scanstatus` SET STATUS = \'new\' WHERE `recoid` = 1 AND `datasetid` = %s AND `run` = %s AND `lumi` = %s' replist = open('/data/scratch/reprocess.txt') for line in replist: datasetid, run, lumi = map(int, line.split()) dbcursor.execute(query, (datasetid, run, lumi)) dbcursor.commit() replist.close()
loadFromFile('datasetrel.txt', 'datasetrel') dumper.resetNData() def loadFromFile(fileName, tableName): global config query = 'LOAD DATA LOCAL INFILE \'' + config.scratchdir + '/' + fileName + '\' INTO TABLE `' + tableName + '` FIELDS TERMINATED BY \',\' LINES TERMINATED BY \'\\n\'' proc = subprocess.Popen(['mysql', '-u', config.dbuser, '-p' + config.dbpass, '-D', config.dbname, '-e', query]) out, err = proc.communicate() ROOT.gROOT.LoadMacro(config.installdir + '/scripts/dumpASCII.cc+') dumper = ROOT.ASCIIDumper(config.scratchdir) dbcursor.execute('SELECT `filterid`, `name` from `filters`') for filterid, name in dbcursor: dumper.addFilter(filterid, name) sourcePaths = {} nFiles = 0 class MaxFiles(Exception): pass try: for reco in os.listdir('/'.join((config.scratchdir, 'merged'))): sourcePaths[reco] = {} for pd in os.listdir('/'.join((config.scratchdir, 'merged', reco))): sourcePaths[reco][pd] = []
proc = subprocess.Popen(['hadd', outFile + '.tmp'] + ['root://eoscms.cern.ch/' + f for f in filesToMerge]) proc.wait() if proc.returncode == 0: os.rename(outFile + '.tmp', outFile) for path in filesToMerge: xrd.rm(path) else: os.remove(outFile + '.tmp') if __name__ == '__main__': dbcursor.execute('SELECT `name` FROM `primarydatasets`') for reco in config.reconstructions: for name in [row[0] for row in dbcursor]: if not os.path.isdir('/'.join((config.scratchdir, 'merged', reco, name))): os.mkdir('/'.join((config.scratchdir, 'merged', reco, name))) for tsdir in xrd.ls(sourcedir): #temporary if int(os.path.basename(tsdir)) < 151108000000: continue for pddir in xrd.ls(tsdir): for recovdir in xrd.ls(pddir): mergeAndMove(recovdir) xrd.cleanup(sourcedir)