def testBasic(self): """ _testBasic_ Test the basic functions of the DBSBufferDataset, create, load, exists and also the ability to add subscriptions. """ originalDataset = DBSBufferDataset(path = '/bogus/bogus/go') originalDataset.create() myThread = threading.currentThread() result = myThread.dbi.processData("SELECT id FROM dbsbuffer_dataset")[0].fetchall() self.assertEqual(originalDataset.exists(), result[0][0]) duplicateDataset = DBSBufferDataset(path = '/bogus/bogus/go') duplicateDataset.create() self.assertEqual(originalDataset.exists(), duplicateDataset.exists()) result = myThread.dbi.processData("SELECT COUNT(id) FROM dbsbuffer_dataset")[0].fetchall() self.assertEqual(result[0][0], 1) loadedDataset = DBSBufferDataset(path = '/bogus/bogus/go') loadedDataset.load() self.assertEqual(loadedDataset.exists(), originalDataset.exists()) secondDataset = DBSBufferDataset(path = '/BogusPrimary/Run2012Z-PromptReco-v1/RECO') secondDataset.create() workload = WMWorkloadHelper() workload.load(os.path.join(getTestBase(), 'WMComponent_t/PhEDExInjector_t/specs/TestWorkload.pkl')) secondDataset.addSubscription(workload.getSubscriptionInformation()['/BogusPrimary/Run2012Z-PromptReco-v1/RECO']) secondDataset.addSubscription(workload.getSubscriptionInformation()['/BogusPrimary/Run2012Z-PromptReco-v1/RECO']) self.assertEqual(len(secondDataset['subscriptions']), 3) result = myThread.dbi.processData("SELECT COUNT(id) FROM dbsbuffer_dataset_subscription")[0].fetchall() self.assertEqual(result[0][0], 3) return
def retrieveSchema(workflowName): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) paramBlacklist = [ 'AcquisitionEra', 'BlockCloseMaxEvents', 'BlockCloseMaxFiles', 'BlockCloseMaxSize', 'BlockCloseMaxWaitTime', 'CouchURL', 'CouchWorkloadDBName', 'CustodialGroup', 'CustodialSubType', 'Dashboard', 'GracePeriod', 'Group', 'HardTimeout', 'InitialPriority', 'inputMode', 'MaxMergeEvents', 'MaxMergeSize', 'MaxRSS', 'MaxVSize', 'MergedLFNBase', 'MinMergeSize', 'NonCustodialGroup', 'NonCustodialSubType', 'ProcessingString', 'OutputDatasets', 'ReqMgr2Only', 'Requestor', 'RequestDate' 'RequestorDN', 'RequestName', 'RequestStatus', 'RequestTransition', 'RequestWorkflow', 'SiteWhitelist', 'SoftTimeout', 'SoftwareVersions', 'SubscriptionPriority', 'Team', 'timeStamp' ] schema = {} for (key, value ) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): if not value or key in paramBlacklist: continue else: schema[key] = value schema['Requestor'] = 'amaltaro' schema['Group'] = 'DATAOPS' return schema
def getWMWorkload(self): """ _getWMTask_ Get the WMTask for this space TODO: Refactor to getWMWorkload method """ if self.workload != None: return self.workload try: import WMSandbox except ImportError as ex: msg = "Error importing WMSandbox module" msg += str(ex) raise RuntimeError(msg) wmsandboxLoc = inspect.getsourcefile(WMSandbox) workloadPcl = wmsandboxLoc.replace("__init__.py","WMWorkload.pkl") with open(workloadPcl, 'rb') as handle: wmWorkload = pickle.load(handle) self.workload = WMWorkloadHelper(wmWorkload) return
def retrieveSchema(workflowName): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) schema = {} # for (key, value) in helper.data.request.schema.dictionary_().iteritems(): for (key, value) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): if key == 'ProdConfigCacheID': schema['ConfigCacheID'] = value elif key=='ProcConfigCacheID': schema['ConfigCacheID'] = value elif key=='RequestSizeEvents': schema['RequestNumEvents'] = value elif value != None: schema[key] = value # print "Retrieved schema:\n", schema schema['Requestor'] = 'anlevin' schema['Group'] = 'DATAOPS' # schema['Task1']['SplittingArguments'] = {'lumis_per_job': 5} # schema['Memory'] = 1394 # schema['Task1']['KeepOutput'] = True # schema['RequestString'] = 'TEST_DELETE_THIS_' # schema['BlockWhitelist'] = ['/MinimumBias/Run2012D-HLTPhysics-Tier1PromptSkim-v1/RAW-RECO#ce668e80-26a2-11e2-80e7-00155dffff9d'] # schema['Task1']['BlockBlacklist'] = ['/DoubleMu/Run2011A-ZMu-08Nov2011-v1/RAW-RECO#93c53d22-25b2-11e1-8c62-003048f02c8a'] # schema['Task1']['RunWhitelist'] = [208307] # del schema['Task2']['MCPileup'] return schema
def retrieveSchema(workflowName, user, group ): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() #print " retrieving original workflow...", helper.load(specURL) #print "done." schema = {} #for (key, value) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): for (key, value) in helper.data.request.schema.dictionary_().iteritems(): #print key, value if key == 'ProcConfigCacheID': schema['ConfigCacheID'] = value elif key=='RequestSizeEvents': schema['RequestSizeEvents'] = value #schema['RequestNumEvents'] = int(value) elif key=='Requestor': schema['Requestor']=user elif key=='Group': schema['Group']=group elif key=='SizePerEvent': schema['SizePerEvent']=1 elif key in ["RunWhitelist", "RunBlacklist", "BlockWhitelist", "BlockBlacklist"] and not value: schema[key]=[] elif not value: continue elif value != None: schema[key] = value return schema
def get(self, name): """ getting job splitting algorithm. :arg str name: name to appear in the result message. :returns: row with response, here 1 item list with message. """ helper = WMWorkloadHelper() try: helper.loadSpecFromCouch(self.reqdb_url, name) except Exception: raise cherrypy.HTTPError(404, "Cannot find workload: % "+ name) splittingDict = helper.listJobSplittingParametersByTask(performance = False) taskNames = sorted(splittingDict.keys()) splitInfo = [] for taskName in taskNames: splitInfo.append({"splitAlgo": splittingDict[taskName]["algorithm"], "splitParams": splittingDict[taskName], "taskType": splittingDict[taskName]["type"], "taskName": taskName}) return splitInfo
def getMasterName(startDir, wmWorkload = None, workflow = None): """ Gets a universal name for the jobGroup directory Return the uid as the name if none available (THIS SHOULD NEVER HAPPEN) """ if wmWorkload != None: workload = wmWorkload.name() elif not os.path.exists(workflow.spec): msg = "Could not find Workflow spec %s: " % (workflow.spec) msg += "Cannot create work area without spec!" logging.error(msg) raise CreateWorkAreaException(msg) else: wmWorkload = WMWorkloadHelper(WMWorkload("workload")) wmWorkload.load(workflow.spec) workload = wmWorkload.name() task = workflow.task if task.startswith("/" + workload + "/"): task = task[len(workload) + 2:] return (os.path.join(startDir, workload), os.path.join(startDir, workload, task))
def retrieveSchema(url, workflowName, user, group): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() #print " retrieving original workflow...", helper.load(specURL) #print "done." schema = {} #for (key, value) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): for (key, value) in helper.data.request.schema.dictionary_().iteritems(): #print key, value if key == 'ProcConfigCacheID': schema['ConfigCacheID'] = value elif key == 'RequestSizeEvents': schema['RequestSizeEvents'] = value #schema['RequestNumEvents'] = int(value) elif key == 'Requestor': schema['Requestor'] = user elif key == 'Group': schema['Group'] = group elif key == 'RequestNumEvents': schema['RequestNumEvents'] = getFinalRequestedNumEvents( url, workflowName) elif key == 'FirstLumi': schema['FirstLumi'] = getMaxLumi(url, workflowName) * 2 elif key == 'FirstEvent': schema['FirstEvent'] = deprecated.dbsTest.getInputEvents( url, workflowName) * 2 elif key == 'RequestString': schema['RequestString'] = 'ACDC_' + value elif value != None: schema[key] = value return schema
def retrieveSchema(workflowName, user, group): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() #print " retrieving original workflow...", helper.load(specURL) #print "done." schema = {} #for (key, value) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): for (key, value) in helper.data.request.schema.dictionary_().iteritems(): #print key, value if key == 'ProcConfigCacheID': schema['ConfigCacheID'] = value elif key == 'RequestSizeEvents': schema['RequestSizeEvents'] = value #schema['RequestNumEvents'] = int(value) elif key == 'Requestor': schema['Requestor'] = user elif key == 'Group': schema['Group'] = group #elif key=='ProdJobSplitArgs': # schema['ProdJobSplitArgs']={'events_per_job': 500000, 'events_per_lumi': 300, 'lheInputFiles': True} elif value != None: schema[key] = value return schema
def getMasterName(self): """ Gets a universal name for the jobGroup directory Return the uid as the name if none available (THIS SHOULD NEVER HAPPEN) """ if self.wmWorkload != None: workload = self.wmWorkload.name() elif not os.path.exists(self.workflow.spec): logging.error( "Could not find Workflow spec %s; labeling jobs by job ID only!" % (self.workflow.spec)) return os.path.join(self.startDir, self.jobGroup.uid), os.path.join( self.startDir, self.jobGroup.uid) else: wmWorkload = WMWorkloadHelper(WMWorkload("workload")) wmWorkload.load(self.workflow.spec) workload = wmWorkload.name() task = self.workflow.task if task.startswith("/" + workload + "/"): task = task[len(workload) + 2:] return os.path.join(self.startDir, workload), os.path.join(self.startDir, workload, task)
def get(self, name, web_form=False): """ getting job splitting algorithm. :arg str name: name to appear in the result message. :returns: row with response, here 1 item list with message. """ helper = WMWorkloadHelper() try: helper.loadSpecFromCouch(self.reqdb_url, name) except Exception: raise cherrypy.HTTPError(404, "Cannot find workload: %s" % name) splittingDict = helper.listJobSplittingParametersByTask( performance=False) taskNames = sorted(splittingDict.keys()) splitInfo = [] for taskName in taskNames: splitInfo.append({ "splitAlgo": splittingDict[taskName]["algorithm"], "splitParams": splittingDict[taskName], "taskType": splittingDict[taskName]["type"], "taskName": taskName }) if web_form: splitInfo = create_web_splitting_format(splitInfo) return splitInfo
def loadWorkload(self, inputWorkload): """ If workload is sane, then use it """ if inputWorkload == None: self.workload = None if isinstance(inputWorkload, WMWorkload): self.workload = WMWorkloadHelper(inputWorkload) return if isinstance(inputWorkload, WMWorkloadHelper): self.workload = inputWorkload return if not os.path.exists(inputWorkload): raise Exception('Could not find %s in local file system' % (str(inputWorkload))) testWorkload = WMWorkloadHelper(WMWorkload("workload")) testWorkload.load(inputWorkload) self.workload = testWorkload return
def retrieveSchema(workflowName): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) schema = {} # for (key, value) in helper.data.request.schema.dictionary_().iteritems(): for (key, value ) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): if key == 'ProdConfigCacheID': schema['ConfigCacheID'] = value elif key == 'ProcConfigCacheID': schema['ConfigCacheID'] = value elif key == 'RequestSizeEvents': schema['RequestNumEvents'] = value elif value != None: schema[key] = value # print "Retrieved schema:\n", schema schema['Requestor'] = 'anlevin' schema['Group'] = 'DATAOPS' # schema['Task1']['SplittingArguments'] = {'lumis_per_job': 5} # schema['Memory'] = 1394 # schema['Task1']['KeepOutput'] = True # schema['RequestString'] = 'TEST_DELETE_THIS_' # schema['BlockWhitelist'] = ['/MinimumBias/Run2012D-HLTPhysics-Tier1PromptSkim-v1/RAW-RECO#ce668e80-26a2-11e2-80e7-00155dffff9d'] # schema['Task1']['BlockBlacklist'] = ['/DoubleMu/Run2011A-ZMu-08Nov2011-v1/RAW-RECO#93c53d22-25b2-11e1-8c62-003048f02c8a'] # schema['Task1']['RunWhitelist'] = [208307] # del schema['Task2']['MCPileup'] return schema
def retrieveSchema(url, workflowName, user, group ): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() #print " retrieving original workflow...", helper.load(specURL) #print "done." schema = {} #for (key, value) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): for (key, value) in helper.data.request.schema.dictionary_().iteritems(): #print key, value if key == 'ProcConfigCacheID': schema['ConfigCacheID'] = value elif key=='RequestSizeEvents': schema['RequestSizeEvents'] = value #schema['RequestNumEvents'] = int(value) elif key=='Requestor': schema['Requestor']=user elif key=='Group': schema['Group']=group elif key=='RequestNumEvents': schema['RequestNumEvents']=getFinalRequestedNumEvents(url, workflowName) elif key=='FirstLumi': schema['FirstLumi']=getMaxLumi(url, workflowName)*2 elif key=='FirstEvent': schema['FirstEvent']=deprecated.dbsTest.getInputEvents(url, workflowName)*2 elif key=='RequestString': schema['RequestString']='ACDC_'+value elif value != None: schema[key] = value return schema
def getDataFromSpecFile(specFile): workload = WMWorkloadHelper() workload.load(specFile) campaign = workload.getCampaign() result = {"Campaign": campaign} for task in workload.taskIterator(): result[task.getPathName()] = task.getPrepID() return result
def retrieveSchema(workflowName): """ Creates the cloned specs for the original request Updates parameters """ specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) return helper
def getSplitting(requestName): reqmgrUrl = 'https://cmsweb.cern.ch/reqmgr/reqMgr/' reqmgr = RequestManager(dict={'endpoint': reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) params = workload.getTopLevelTask()[0].jobSplittingParameters() algo = workload.getTopLevelTask()[0].jobSplittingAlgorithm() return params['events_per_job']
def retrieveSchema(workflowName, reqmgrCouchURL = "https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache"): """ Creates the cloned specs for the original request Updates parameters """ from WMCore.WMSpec.WMWorkload import WMWorkloadHelper specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) return helper
def loadWorkload(self, requestName): """ _loadWorkload_ Load the workload from couch after we've saved it there. """ workload = WMWorkloadHelper() url = "%s/%s/%s/spec" % (os.environ["COUCHURL"], self.couchDBName, requestName) workload.load(url) return workload
def main(): demPolicy = Block() reqmgr = RequestManager(dict = {'endpoint' : 'https://cmsweb.cern.ch/reqmgr/reqMgr'}) result = reqmgr.getRequest('pdmvserv_HIG-Summer12DR53X-01392_T1_ES_PIC_MSS_1_v0__130724_063344_7207') workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment('pdmvserv_HIG-Summer12DR53X-01392_T1_ES_PIC_MSS_1_v0__130724_063344_7207', 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) x,y = demPolicy(wmspec = workload, task = workload.getTopLevelTask()[0]) print x print y
def getSplitting(requestName): reqmgrUrl='https://cmsweb.cern.ch/reqmgr/reqMgr/' reqmgr = RequestManager(dict = {'endpoint' : reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) params = workload.getTopLevelTask()[0].jobSplittingParameters() algo = workload.getTopLevelTask()[0].jobSplittingAlgorithm() return params['events_per_job']
def loadWorkload(self, requestName): """ _loadWorkload_ Load the workload from couch after we've saved it there. """ workload = WMWorkloadHelper() url = '%s/%s/%s/spec' % (os.environ['COUCHURL'], self.couchDBName, requestName) workload.load(url) return workload
def getElementsForSplitting(self): """Returns the elements from the inbox that need to be split""" elements = self.getInboxElements(status = 'Negotiating') specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = WMWorkloadHelper() wmspec.load(self.parentCouchUrlWithAuth + "/%s/spec" % ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements
def getElementsForSplitting(self): """Returns the elements from the inbox that need to be split, if WorkflowName specified only return elements to split for that workflow""" elements = self.getInboxElements(status="Negotiating") specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele["RequestName"] not in specs: wmspec = WMWorkloadHelper() wmspec.load(self.parentCouchUrlWithAuth + "/%s/spec" % ele["RequestName"]) specs[ele["RequestName"]] = wmspec ele["WMSpec"] = specs[ele["RequestName"]] del specs return elements
def getElementsForSplitting(self): """Returns the elements from the inbox that need to be split, if WorkflowName specified only return elements to split for that workflow""" elements = self.getInboxElements(status='Negotiating') specs = {} # cache as may have multiple elements for same spec for ele in elements: if ele['RequestName'] not in specs: wmspec = WMWorkloadHelper() wmspec.load(self.parentCouchUrlWithAuth + "/%s/spec" % ele['RequestName']) specs[ele['RequestName']] = wmspec ele['WMSpec'] = specs[ele['RequestName']] del specs return elements
def loadOverride(self, specPath): """ _loadOverride_ Loads the spec (if not in the cache) and gets the location where the files should be injected to. If not possible to load the spec, raise an exception. """ helper = WMWorkloadHelper() helper.load(specPath) overrideSite = helper.getPhEDExInjectionOverride() self.specCache[specPath] = overrideSite return overrideSite
def retrieveSchema( workflowName, reqmgrCouchURL="https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache"): """ Creates the cloned specs for the original request Updates parameters """ from WMCore.WMSpec.WMWorkload import WMWorkloadHelper specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) return helper
def retrieveSchema(workflowName): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) schema = {} # for (key, value) in helper.data.request.schema.dictionary_().iteritems(): for (key, value ) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): if key == 'ProdConfigCacheID': schema['ConfigCacheID'] = value elif key == 'ProcConfigCacheID': schema['ConfigCacheID'] = value elif key == 'RequestSizeEvents': schema['RequestNumEvents'] = value elif key == 'ProcessingString' and value == {}: continue elif key == 'AcquisitionEra' and value == {}: continue elif key == 'SkimConfigs' and not value: continue elif value != None: schema[key] = value # print "Retrieved schema:\n", schema ### FOR DEBUG request = deepcopy(schema) request['Requestor'] = 'anlevin' request['Group'] = 'DATAOPS' ### Now changing the parameters according to HG1309 x = 1 while x <= schema['TaskChain']: task = 'Task' + str(x) for (key, value) in schema[task].iteritems(): if key == "SplittingAlgorithm": request[task]['SplittingAlgo'] = value del request[task]['SplittingAlgorithm'] elif key == "SplittingArguments": for (k2, v2) in schema[task][key].iteritems(): if k2 == "lumis_per_job": request[task]["LumisPerJob"] = v2 elif k2 == "events_per_job": request[task]["EventsPerJob"] = v2 del request[task]['SplittingArguments'] x += 1 #request['Memory'] = 3900 #del request['SiteWhitelist'] #request['RequestString'] = 'TEST_ANDREW_TEST' # request['Task1']['BlockBlacklist'] = ['/DoubleMu/Run2011A-ZMu-08Nov2011-v1/RAW-RECO#93c53d22-25b2-11e1-8c62-003048f02c8a'] #request['Task1']['BlockWhitelist'] = ['/RelValQCD_Pt_80_170_BCtoE_8TeV/CMSSW_6_2_0_pre8-PRE_ST62_V8-v3/GEN-SIM#d99587e0-625e-11e3-ad0f-00221959e7c0','/RelValQCD_Pt_80_170_BCtoE_8TeV/CMSSW_6_2_0_pre8-PRE_ST62_V8-v3/GEN-SIM#8689de68-606d-11e3-ad0f-00221959e7c0'] return request
def main(): start = time.time() # blockName = match['Inputs'].keys()[0] blockName = "/acdc/vlimant_ACDC0_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_190218_145226_481/:pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222:SUS-RunIIFall18wmLHEGS-00025_0/0/31055" # acdcInfo = match['ACDC'] acdcInfo = {"database": "acdcserver", "fileset": "/pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222/SUS-RunIIFall18wmLHEGS-00025_0", "collection": "pdmvserv_task_SUS-RunIIFall18wmLHEGS-00025__v1_T_181211_005112_2222", "server": "https://cmsweb.cern.ch/couchdb"} acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"]) splitedBlockName = ACDCBlock.splitBlockName(blockName) print("Splitted block name: %s" % splitedBlockName) fileLists = acdc.getChunkFiles(acdcInfo['collection'], acdcInfo['fileset'], splitedBlockName['Offset'], splitedBlockName['NumOfFiles']) print("Retrieved %d unique files from the ACDCServer" % len(fileLists)) block = {} block["Files"] = fileLists wantedLumis = set([252052, 240646]) for f in fileLists: for run in f['runs']: maskDict = run.json() lumisSet = set(maskDict['Lumis'].keys()) if wantedLumis.intersection(lumisSet): print("File: %s with events: %s, contains these lumis: %s" % (f['lfn'], f['events'], wantedLumis.intersection(lumisSet))) # with open("chunkfiles.json", 'w') as fo: # json.dump(block, fo) end = time.time() print("Spent %s secs running so far" % (end - start)) sys.exit(1) ### Now doing the WMBSHelper stuff reqUrl = "https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache" requestName = "vlimant_ACDC0_task_HIG-RunIIFall17wmLHEGS-01122__v1_T_180808_130708_5376" wmspec = WMWorkloadHelper() wmspec.loadSpecFromCouch(reqUrl, requestName) taskName = "HIG-RunIIFall17DRPremix-00788_0" mask = None cacheDir = "/data/srv/wmagent/v1.1.14.patch6/install/wmagent/WorkQueueManager/cache" # wmbsHelper = WMBSHelper(wmspec, match['TaskName'], blockName, mask, self.params['CacheDir']) wmbsHelper = WMBSHelper(wmspec, taskName, blockName, mask, cacheDir) sub, numFilesAdded = wmbsHelper.createSubscriptionAndAddFiles(block=block)
def buildWorkload(self, originalRequestURL): """ _buildWorkload_ Build a resubmission workload from a previous workload, it loads the workload and truncates it. """ #TODO remove the dependency on reqmgr1 if originalRequestURL == None: # reqmgr1 call (Due to reqmgr2 dependency imports here from WMCore.HTTPFrontEnd.RequestManager.ReqMgrWebTools import loadWorkload from WMCore.RequestManager.RequestDB.Interface.Request.GetRequest import getRequestByName originalRequest = getRequestByName(self.originalRequestName) helper = loadWorkload(originalRequest) else: # reqmgr2 call helper = WMWorkloadHelper() helper.loadSpecFromCouch(originalRequestURL, self.originalRequestName) helper.truncate(self.workloadName, self.initialTaskPath, self.acdcServer, self.acdcDatabase, self.collectionName) helper.ignoreOutputModules(self.ignoredOutputModules) return helper
def loadWorkload(request): """ Returns a WMWorkloadHelper for the workload contained in the request """ url = request['RequestWorkflow'] helper = WMWorkloadHelper() try: WMCore.Lexicon.couchurl(url) except Exception: raise cherrypy.HTTPError(400, "Invalid workload "+urllib.quote(url)) helper = WMWorkloadHelper() try: helper.load(url) except Exception: raise cherrypy.HTTPError(404, "Cannot find workload "+removePasswordFromUrl(url)) return helper
def retrieveSchema(workflowName,newBlockBlacklist): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) schema = {} for (key, value) in helper.data.request.schema.dictionary_().iteritems(): #print key if key == 'ProdConfigCacheID': schema['ProdConfigCacheID'] = value elif value != None: schema[key] = value schema["BlockBlacklist"] = "[" + newBlockBlacklist + "]" return schema
def retrieveSchema(workflowName): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) schema = {} for (key, value) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): if key == 'ProdConfigCacheID': schema['ConfigCacheID'] = value elif key == 'ProcConfigCacheID': schema['ConfigCacheID'] = value elif key=='RequestSizeEvents': schema['RequestNumEvents'] = value elif value != None: schema[key] = value return schema
def retrieveSchema(workflowName, newBlockBlacklist): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) schema = {} for (key, value) in helper.data.request.schema.dictionary_().iteritems(): #print key if key == 'ProdConfigCacheID': schema['ProdConfigCacheID'] = value elif value != None: schema[key] = value schema["BlockBlacklist"] = "[" + newBlockBlacklist + "]" return schema
def retrieveSchema(workflowName): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) schema = {} # for (key, value) in helper.data.request.schema.dictionary_().iteritems(): for (key, value) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): if key == 'ProdConfigCacheID': schema['ConfigCacheID'] = value elif key=='ProcConfigCacheID': schema['ConfigCacheID'] = value elif key=='RequestSizeEvents': schema['RequestNumEvents'] = value elif key=='ProcessingString' and value == {}: continue elif key=='AcquisitionEra' and value == {}: continue elif key=='SkimConfigs' and not value: continue elif value != None: schema[key] = value # print "Retrieved schema:\n", schema ### FOR DEBUG request = deepcopy(schema) request['Requestor'] = 'anlevin' request['Group'] = 'DATAOPS' ### Now changing the parameters according to HG1309 x = 1 while x <= schema['TaskChain']: task = 'Task'+str(x) for (key, value) in schema[task].iteritems(): if key == "SplittingAlgorithm": request[task]['SplittingAlgo'] = value del request[task]['SplittingAlgorithm'] elif key == "SplittingArguments": for (k2, v2) in schema[task][key].iteritems(): if k2 == "lumis_per_job": request[task]["LumisPerJob"] = v2 elif k2 == "events_per_job": request[task]["EventsPerJob"] = v2 del request[task]['SplittingArguments'] x += 1 #request['Memory'] = 3900 #del request['SiteWhitelist'] #request['RequestString'] = 'TEST_ANDREW_TEST' # request['Task1']['BlockBlacklist'] = ['/DoubleMu/Run2011A-ZMu-08Nov2011-v1/RAW-RECO#93c53d22-25b2-11e1-8c62-003048f02c8a'] #request['Task1']['BlockWhitelist'] = ['/RelValQCD_Pt_80_170_BCtoE_8TeV/CMSSW_6_2_0_pre8-PRE_ST62_V8-v3/GEN-SIM#d99587e0-625e-11e3-ad0f-00221959e7c0','/RelValQCD_Pt_80_170_BCtoE_8TeV/CMSSW_6_2_0_pre8-PRE_ST62_V8-v3/GEN-SIM#8689de68-606d-11e3-ad0f-00221959e7c0'] return request
def getWMWorkload(self): """ _getWMTask_ Get the WMTask for this space TODO: Refactor to getWMWorkload method """ if self.workload != None: return self.workload try: import WMSandbox except ImportError as ex: msg = "Error importing WMSandbox module" msg += str(ex) raise RuntimeError, msg wmsandboxLoc = inspect.getsourcefile(WMSandbox) workloadPcl = wmsandboxLoc.replace("__init__.py","WMWorkload.pkl") handle = open(workloadPcl, 'r') wmWorkload = pickle.load(handle) handle.close() self.workload = WMWorkloadHelper(wmWorkload) return
def get(self, name): """ Workload config world API call. :arg str name: name to appear in the result message. :returns: row with response. """ helper = WMWorkloadHelper() try: helper.loadSpecFromCouch(self.reqdb_url, name) except Exception: raise cherrypy.HTTPError(404, "Cannot find workload: %s" % name) return str(helper.data)
def get(self, name): """ Workload config world API call. :arg str name: name to appear in the result message. :returns: row with response. """ helper = WMWorkloadHelper() try: helper.loadSpecFromCouch(self.reqdb_url, name) except Exception: raise cherrypy.HTTPError(404, "Cannot find workload: % "+ name) return str(helper.data)
def retrieveSchema(workflowName): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() helper.load(specURL) schema = {} for (key, value ) in helper.data.request.schema.dictionary_whole_tree_().iteritems(): if key == 'ProdConfigCacheID': schema['ConfigCacheID'] = value elif key == 'ProcConfigCacheID': schema['ConfigCacheID'] = value elif key == 'RequestSizeEvents': schema['RequestNumEvents'] = value elif value != None: schema[key] = value return schema
def retrieveWMSpec(workflow=None, wmWorkloadURL=None): """ _retrieveWMSpec_ Given a subscription, this function loads the WMSpec associated with that workload """ if not wmWorkloadURL and workflow: wmWorkloadURL = workflow.spec if not wmWorkloadURL or not os.path.isfile(wmWorkloadURL): logging.error("WMWorkloadURL %s is empty", wmWorkloadURL) return None wmWorkload = WMWorkloadHelper(WMWorkload("workload")) wmWorkload.load(wmWorkloadURL) return wmWorkload
def retrieveWMSpec(workflow = None, wmWorkloadURL = None): """ _retrieveWMSpec_ Given a subscription, this function loads the WMSpec associated with that workload """ if not wmWorkloadURL and workflow: wmWorkloadURL = workflow.spec if not wmWorkloadURL or not os.path.isfile(wmWorkloadURL): logging.error("WMWorkloadURL %s is empty" % (wmWorkloadURL)) return None wmWorkload = WMWorkloadHelper(WMWorkload("workload")) wmWorkload.load(wmWorkloadURL) return wmWorkload
def buildWorkload(self, originalRequestURL): """ _buildWorkload_ Build a resubmission workload from a previous workload, it loads the workload and truncates it. """ # TODO remove the dependency on reqmgr1 if originalRequestURL == None: # reqmgr1 call (Due to reqmgr2 dependency imports here from WMCore.HTTPFrontEnd.RequestManager.ReqMgrWebTools import loadWorkload from WMCore.RequestManager.RequestDB.Interface.Request.GetRequest import getRequestByName originalRequest = getRequestByName(self.originalRequestName) helper = loadWorkload(originalRequest) else: # reqmgr2 call helper = WMWorkloadHelper() helper.loadSpecFromCouch(originalRequestURL, self.originalRequestName) helper.truncate( self.workloadName, self.initialTaskPath, self.acdcServer, self.acdcDatabase, self.collectionName ) helper.ignoreOutputModules(self.ignoredOutputModules) return helper
def loadTasks(self, workflowName, spec): """ _loadTasks_ Loads the list of tasks for the workflow, stores them in the cache if not present """ if workflowName in self.taskCache: return try: workloadHelper = WMWorkloadHelper() workloadHelper.load(spec) tasks = workloadHelper.listAllTaskPathNames() self.taskCache[workflowName] = tasks except IOError, ex: msg = "Failed to load spec file %s\n" % spec msg += "Original IOError: %s" % str(ex) raise Tier0PluginError(msg)
def queueWork(self, wmspecUrl, request = None, team = None): """ Take and queue work from a WMSpec. If request name is provided but doesn't match WMSpec name an error is raised. If team is provided work will only be available to queue's belonging to that team. Duplicate specs will be ignored. """ self.logger.info('queueWork() begin queueing "%s"' % wmspecUrl) wmspec = WMWorkloadHelper() wmspec.load(wmspecUrl) # check we haven't already got this work try: self.backend.getInboxElements(elementIDs = [wmspec.name()]) except CouchNotFoundError: pass else: self.logger.warning('queueWork(): Ignoring duplicate spec "%s"' % wmspec.name()) return 1 if request: try: Lexicon.requestName(request) except Exception, ex: # can throw many errors e.g. AttributeError, AssertionError etc. error = WorkQueueWMSpecError(wmspec, "Request name validation error: %s" % str(ex)) raise error if request != wmspec.name(): raise WorkQueueWMSpecError(wmspec, 'Request & workflow name mismatch %s vs %s' % (request, wmspec.name()))
def retrieveSchema(workflowName): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() print " retrieving original workflow...", helper.load(specURL) print "done." schema = {} for (key, value) in helper.data.request.schema.dictionary_().iteritems(): if value != None: schema[key] = value schema["Requestor"] = "linacre" schema["Group"] = "DATAOPS" del schema["RequestName"] del schema["CouchDBName"] del schema["CouchURL"] assign = {} assign["unmergedLFNBase"] = helper.data.properties.unmergedLFNBase assign["mergedLFNBase"] = helper.data.properties.mergedLFNBase assign["processingVersion"] = helper.data.properties.processingVersion assign["dashboardActivity"] = helper.data.properties.dashboardActivity assign["acquisitionEra"] = helper.data.properties.acquisitionEra topLevelTask = helper.getTopLevelTask()[0] assign["SiteWhitelist"] = topLevelTask.siteWhitelist() mergeTask = None for mergeTask in topLevelTask.childTaskIterator(): if mergeTask.taskType() == "Merge": if mergeTask.getPathName().find("DQM") == -1: break assign["MinMergeSize"] = mergeTask.jobSplittingParameters( )["min_merge_size"] assign["MaxMergeSize"] = mergeTask.jobSplittingParameters( )["max_merge_size"] assign["MaxMergeEvents"] = mergeTask.jobSplittingParameters().get( "max_merge_events", 50000) return (schema, assign)
def queueWork(self, wmspecUrl, request=None, team=None): """ Take and queue work from a WMSpec. If request name is provided but doesn't match WMSpec name an error is raised. If team is provided work will only be available to queue's belonging to that team. Duplicate specs will be ignored. """ self.logger.info('queueWork() begin queueing "%s"' % wmspecUrl) wmspec = WMWorkloadHelper() wmspec.load(wmspecUrl) if request: # validate request name try: Lexicon.requestName(request) except Exception, ex: # can throw many errors e.g. AttributeError, AssertionError etc. error = WorkQueueWMSpecError( wmspec, "Request name validation error: %s" % str(ex)) raise error if request != wmspec.name(): raise WorkQueueWMSpecError( wmspec, 'Request & workflow name mismatch %s vs %s' % (request, wmspec.name()))
def updatePriority(self, wf, priority): """Update priority of a workflow, this implies updating the spec and the priority of the Available elements""" # Update elements in Available status data = self.db.loadView('WorkQueue', 'elementsDetailByWorkflowAndStatus', {'startkey': [wf], 'endkey': [wf, {}], 'reduce': False}) elementsToUpdate = [x['id'] for x in data.get('rows', [])] if elementsToUpdate: self.updateElements(*elementsToUpdate, Priority=priority) # Update the spec, if it exists if self.db.documentExists(wf): wmspec = WMWorkloadHelper() wmspec.load(self.db['host'] + "/%s/%s/spec" % (self.db.name, wf)) wmspec.setPriority(priority) dummy_values = {'name': wmspec.name()} wmspec.saveCouch(self.hostWithAuth, self.db.name, dummy_values) return
def main(): toCheckList = '/home/dballest/Dev-Workspace/dev-scripts/data/upgrade-vocms85-613.txt' handle = open(toCheckList, 'r') timePerJobFile = open('/home/dballest/Dev-Workspace/dev-scripts/data/upgrade-vocms85-tpj.data', 'w') eventsPerLumiFile = open('/home/dballest/Dev-Workspace/dev-scripts/data/upgrade-vocms85-epl.data', 'w') lumisPerMergeFile = open('/home/dballest/Dev-Workspace/dev-scripts/data/upgrade-vocms85-lpm.data', 'w') count = 0 for request in handle: z = WMWorkloadHelper() z.load('https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache/%s/spec' % request.strip()) schema = z.data.request.schema requesType = schema.RequestType if requesType != 'MonteCarlo': continue timePerEvent = schema.TimePerEvent sizePerEvent = schema.SizePerEvent events = schema.RequestNumEvents eff = getattr(schema, "FilterEfficiency", 1.0) topTask = z.getTopLevelTask()[0] eventsPerJob = topTask.data.input.splitting.events_per_job if eff < 1.0: count += 1 for childTask in topTask.childTaskIterator(): if childTask.data.taskType == 'Merge': mergeSizeLimit = childTask.data.input.splitting.max_merge_size mergeEventLimit = childTask.data.input.splitting.max_merge_events break sizeOfZeroEvent = 131091.0 sizePerLumi = eventsPerJob*sizePerEvent*eff lumisPerMergedBySize = mergeSizeLimit/(sizePerLumi + sizeOfZeroEvent) lumisPerMergedByEvent = mergeEventLimit/(eventsPerJob*eff) timePerJobFile.write("%f\n" % (timePerEvent * eventsPerJob)) eventsPerLumiFile.write("%f\n" % (eventsPerJob*eff)) lumisPerMergeFile.write("%f\n" % min(lumisPerMergedBySize, lumisPerMergedByEvent)) print count handle.close() timePerJobFile.close() eventsPerLumiFile.close() lumisPerMergeFile.close()
def loadWorkload(): """ _loadWorkload_ Load the Workload from the WMSandbox Area """ sandboxLoc = locateWMSandbox() workloadPcl = "%s/WMWorkload.pkl" % sandboxLoc with open(workloadPcl, 'r') as handle: wmWorkload = pickle.load(handle) return WMWorkloadHelper(wmWorkload)
def validate_request_update_args(request_args, config, reqmgr_db_service, param): """ param and safe structure is RESTArgs structure: named tuple RESTArgs(args=[], kwargs={}) validate post request 1. read data from body 2. validate the permission (authentication) 3. validate state transition (against previous state from couchdb) 2. validate using workload validation 3. convert data from body to arguments (spec instance, argument with default setting) TODO: rasie right kind of error with clear message """ request_name = request_args["RequestName"] # this need to be deleted for validation del request_args["RequestName"] couchurl = '%s/%s' % (config.couch_host, config.couch_reqmgr_db) workload = WMWorkloadHelper() # param structure is RESTArgs structure. workload.loadSpecFromCouch(couchurl, request_name) # first validate the permission by status and request type. # if the status is not set only ReqMgr Admin can change the the values # TODO for each step, assigned, approved, announce find out what other values # can be set request_args["RequestType"] = workload.requestType() permission = getWritePermission(request_args) authz_match(permission['role'], permission['group']) del request_args["RequestType"] #validate the status if "RequestStatus" in request_args: validate_state_transition(reqmgr_db_service, request_name, request_args["RequestStatus"]) # delete request_args since it is not part of spec argument sand validation args_without_status = {} args_without_status.update(request_args) del args_without_status["RequestStatus"] else: args_without_status = request_args if len(args_without_status) > 0 and not workqueue_stat_validation( args_without_status): # validate the arguments against the spec argumentSpecdefinition #TODO: currently only assigned status allows any update other then Status update workload.validateArgumentForAssignment(args_without_status) # to update request_args with type conversion request_args.update(args_without_status) return workload, request_args
def getRequestInformationAndWorkload(requestName, reqmgrUrl): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ reqmgr = RequestManager(dict={'endpoint': reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result
def getMasterName(self): """ Gets a universal name for the jobGroup directory Return the uid as the name if none available (THIS SHOULD NEVER HAPPEN) """ if self.wmWorkload != None: workload = self.wmWorkload.name() elif not os.path.exists(self.workflow.spec): logging.error("Could not find Workflow spec %s; labeling jobs by job ID only!" %(self.workflow.spec)) return os.path.join(self.startDir, self.jobGroup.uid), os.path.join(self.startDir, self.jobGroup.uid) else: wmWorkload = WMWorkloadHelper(WMWorkload("workload")) wmWorkload.load(self.workflow.spec) workload = wmWorkload.name() task = self.workflow.task if task.startswith("/" + workload + "/"): task = task[len(workload) + 2:] return os.path.join(self.startDir, workload), os.path.join(self.startDir, workload, task)
def getRequestInformationAndWorkload(requestName, reqmgrUrl, centralRequestDBURL): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ wfDBReader = RequestDBReader(centralRequestDBURL, couchapp="ReqMgr") result = wfDBReader.getRequestByNames(requestName, True) workloadDB = Database(result[requestName]['CouchWorkloadDBName'], result[requestName]['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result[requestName]
def createResubmitSpec(self, serverUrl, couchDB): """ _createResubmitSpec_ Create a bogus resubmit workload. """ self.site = "cmssrm.fnal.gov" workload = WMWorkloadHelper(WMWorkload("TestWorkload")) reco = workload.newTask("reco") workload.setOwnerDetails(name = "evansde77", group = "DMWM") # first task uses the input dataset reco.addInputDataset(primary = "PRIMARY", processed = "processed-v1", tier = "TIER1") reco.data.input.splitting.algorithm = "File" reco.setTaskType("Processing") cmsRunReco = reco.makeStep("cmsRun1") cmsRunReco.setStepType("CMSSW") reco.applyTemplates() cmsRunRecoHelper = cmsRunReco.getTypeHelper() cmsRunRecoHelper.addOutputModule("outputRECO", primaryDataset = "PRIMARY", processedDataset = "processed-v2", dataTier = "TIER2", lfnBase = "/store/dunkindonuts", mergedLFNBase = "/store/kfc") dcs = DataCollectionService(url = serverUrl, database = couchDB) def getJob(workload): job = Job() job["task"] = workload.getTask("reco").getPathName() job["workflow"] = workload.name() job["location"] = self.site job["owner"] = "evansde77" job["group"] = "DMWM" return job testFileA = WMFile(lfn = makeUUID(), size = 1024, events = 1024) testFileA.setLocation([self.site]) testFileA.addRun(Run(1, 1, 2)) testFileB = WMFile(lfn = makeUUID(), size = 1024, events = 1024) testFileB.setLocation([self.site]) testFileB.addRun(Run(1, 3, 4)) testJobA = getJob(workload) testJobA.addFile(testFileA) testJobA.addFile(testFileB) dcs.failedJobs([testJobA]) topLevelTask = workload.getTopLevelTask()[0] workload.truncate("Resubmit_TestWorkload", topLevelTask.getPathName(), serverUrl, couchDB) return workload
def retrieveSchema(workflowName): specURL = os.path.join(reqmgrCouchURL, workflowName, "spec") helper = WMWorkloadHelper() print " retrieving original workflow...", helper.load(specURL) print "done." schema = {} for (key, value) in helper.data.request.schema.dictionary_().iteritems(): if value != None: schema[key] = value schema["Requestor"] = "linacre" schema["Group"] = "DATAOPS" del schema["RequestName"] del schema["CouchDBName"] del schema["CouchURL"] assign = {} assign["unmergedLFNBase"] = helper.data.properties.unmergedLFNBase assign["mergedLFNBase"] = helper.data.properties.mergedLFNBase assign["processingVersion"] = helper.data.properties.processingVersion assign["dashboardActivity"] = helper.data.properties.dashboardActivity assign["acquisitionEra"] = helper.data.properties.acquisitionEra topLevelTask = helper.getTopLevelTask()[0] assign["SiteWhitelist"] = topLevelTask.siteWhitelist() mergeTask = None for mergeTask in topLevelTask.childTaskIterator(): if mergeTask.taskType() == "Merge": if mergeTask.getPathName().find("DQM") == -1: break assign["MinMergeSize"] = mergeTask.jobSplittingParameters()["min_merge_size"] assign["MaxMergeSize"] = mergeTask.jobSplittingParameters()["max_merge_size"] assign["MaxMergeEvents"] = mergeTask.jobSplittingParameters().get("max_merge_events", 50000) return (schema, assign)