def BuildJobList(self, type=0): # total jobs nj_list = [] self.complete_List = common._db.nJobs('list') if type == 1: self.nj_list = [] if self.chosenJobsList: self.nj_list = self.chosenJobsList return # build job list from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser self.blackWhiteListParser = SEBlackWhiteListParser( self.seWhiteList, self.seBlackList, common.logger()) common.logger.debug('nsjobs ' + str(self.nsjobs)) # get the first not already submitted common.logger.debug('Total jobs ' + str(len(self.complete_List))) jobSetForSubmission = 0 jobSkippedInSubmission = [] tmp_jList = self.complete_List if self.chosenJobsList != None: tmp_jList = self.chosenJobsList for job in common._db.getTask(tmp_jList).jobs: cleanedBlackWhiteList = self.blackWhiteListParser.cleanForBlackWhiteList( job['dlsDestination']) if (cleanedBlackWhiteList != '') or (self.datasetPath == None): #if ( job.runningJob['status'] in ['C','RC'] and job.runningJob['statusScheduler'] in ['Created',None]): if (job.runningJob['state'] in ['Created']): jobSetForSubmission += 1 nj_list.append(job['id']) else: continue else: jobSkippedInSubmission.append(job['id']) if self.nsjobs > 0 and self.nsjobs == jobSetForSubmission: break pass if self.nsjobs > jobSetForSubmission: common.logger.info('asking to submit '+str(self.nsjobs)+' jobs, but only '+\ str(jobSetForSubmission)+' left: submitting those') if len(jobSkippedInSubmission) > 0: mess = "" for jobs in jobSkippedInSubmission: mess += str(jobs) + "," common.logger.info( "Jobs: " + str(mess) + "\n\tskipped because no sites are hosting this data\n") self.submissionError() pass # submit N from last submitted job common.logger.debug('nj_list ' + str(nj_list)) self.nj_list = nj_list if self.limitJobs and len(self.nj_list) > 500: ###### FEDE FOR BUG 85243 ############## msg = "The CRAB client will not submit task with more than 500 jobs.\n" msg += " Use the server mode or submit your jobs in smaller groups" raise CrabException(msg) ######################################## return
def __init__(self, cfg_params, nj_list): self.cfg_params = cfg_params self.nj_list = nj_list from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser seWhiteList = cfg_params.get('GRID.se_white_list', []) seBlackList = cfg_params.get('GRID.se_black_list', []) self.blackWhiteListParser = SEBlackWhiteListParser( seWhiteList, seBlackList, common.logger()) self.datasetpath = self.cfg_params['CMSSW.datasetpath'] if string.lower(self.datasetpath) == 'none': self.datasetpath = None return
class Checker(Actor): def __init__(self, cfg_params, nj_list): self.cfg_params = cfg_params self.nj_list = nj_list from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser seWhiteList = cfg_params.get('GRID.se_white_list', []) seBlackList = cfg_params.get('GRID.se_black_list', []) self.blackWhiteListParser = SEBlackWhiteListParser( seWhiteList, seBlackList, common.logger()) self.datasetpath = self.cfg_params['CMSSW.datasetpath'] if string.lower(self.datasetpath) == 'none': self.datasetpath = None return def run(self): """ The main method of the class. """ common.logger.debug("Checker::run() called") if len(self.nj_list) == 0: common.logger.debug("No jobs to check") return task = common._db.getTask(self.nj_list) allMatch = {} for job in task.jobs: id_job = job['jobId'] jobDest = job['dlsDestination'] if not jobDest: jobDest = [] dest = self.blackWhiteListParser.cleanForBlackWhiteList( jobDest, True) # only if some dest i s available or if dataset is None if len(dest) > 0 or not self.datasetpath: if ','.join(dest) in allMatch.keys(): pass else: match = common.scheduler.listMatch(dest, True) allMatch[','.join(dest)] = match if len(match) > 0: common.logger.info("Found " + str(len(match)) + " compatible CE(s) for job " + str(id_job) + " : " + str(match)) else: common.logger.info( "No compatible site found, will not submit jobs " + str(id_job)) pass pass else: common.logger.info( "No compatible site found, will not submit jobs " + str(id_job)) return
def __init__( self, cfg_params, args ): self.cfg_params = cfg_params self.args=args self.lumisPerJob = -1 self.totalNLumis = 0 self.theNumberOfJobs = 0 self.limitNJobs = False self.limitTotalLumis = False self.limitJobLumis = False #self.maxEvents # init BlackWhiteListParser self.seWhiteList = cfg_params.get('GRID.se_white_list',[]) if type(self.seWhiteList) == type("string"): self.seWhiteList = self.seWhiteList.split(',') seBlackList = cfg_params.get('GRID.se_black_list',[]) if type(seBlackList) == type("string"): seBlackList = seBlackList.split(',') if common.scheduler.name().upper() == 'REMOTEGLIDEIN' : # use central black list removeBList = cfg_params.get("GRID.remove_default_blacklist", 0 ) blackAnaOps = None if int(removeBList) == 0: blacklist = Downloader("http://cmsdoc.cern.ch/cms/LCG/crab/config/") result = blacklist.config("site_black_list.conf").strip().split(',') if result != None: blackAnaOps = result common.logger.debug("Enforced black list: %s "%blackAnaOps) else: common.logger.info("WARNING: Skipping default black list!") if int(removeBList) == 0 and blackAnaOps: seBlackList += blackAnaOps self.blackWhiteListParser = SEBlackWhiteListParser(self.seWhiteList, seBlackList, common.logger()) if seBlackList != []: common.logger.info("SE black list applied to data location: %s" %\ seBlackList) if self.seWhiteList != []: common.logger.info("SE white list applied to data location: %s" %\ self.seWhiteList) # apply BW list blockSites=args['blockSites'] common.logger.debug("List of blocks and used locations (SE):") for block,dlsDest in blockSites.iteritems(): noBsites=self.blackWhiteListParser.checkBlackList(dlsDest) sites=self.blackWhiteListParser.checkWhiteList(noBsites) if sites : blockSites[block]=sites common.logger.debug("%s : %s" % (block,sites)) args['blockSites']=blockSites ## check if has been asked for a non default file to store/read analyzed fileBlocks defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt' self.fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName))
def BuildJobList(self,type=0): # total jobs nj_list = [] self.complete_List = common._db.nJobs('list') if type==1: self.nj_list =[] if self.chosenJobsList: self.nj_list = self.chosenJobsList return # build job list from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser self.blackWhiteListParser = SEBlackWhiteListParser(self.seWhiteList, self.seBlackList, common.logger()) common.logger.debug('nsjobs '+str(self.nsjobs)) # get the first not already submitted common.logger.debug('Total jobs '+str(len(self.complete_List))) jobSetForSubmission = 0 jobSkippedInSubmission = [] tmp_jList = self.complete_List if self.chosenJobsList != None: tmp_jList = self.chosenJobsList for job in common._db.getTask(tmp_jList).jobs: if self.global_data_service: GlobalDataService.modifyPossibleJobLocations( job ) cleanedBlackWhiteList = self.blackWhiteListParser.cleanForBlackWhiteList(job['dlsDestination']) if (cleanedBlackWhiteList != '') or (self.datasetPath == None): #if ( job.runningJob['status'] in ['C','RC'] and job.runningJob['statusScheduler'] in ['Created',None]): if ( job.runningJob['state'] in ['Created']): jobSetForSubmission +=1 nj_list.append(job['id']) else: continue else : jobSkippedInSubmission.append( job['id'] ) if self.nsjobs >0 and self.nsjobs == jobSetForSubmission: break pass if self.nsjobs>jobSetForSubmission: common.logger.info('asking to submit '+str(self.nsjobs)+' jobs, but only '+\ str(jobSetForSubmission)+' left: submitting those') if len(jobSkippedInSubmission) > 0 : mess ="" for jobs in jobSkippedInSubmission: mess += str(jobs) + "," common.logger.info("Jobs: " +str(mess) + "\n\tskipped because no sites are hosting this data\n") self.submissionError() pass # submit N from last submitted job common.logger.debug('nj_list '+str(nj_list)) self.nj_list = nj_list if self.limitJobs and len(self.nj_list) > 500: ###### FEDE FOR BUG 85243 ############## msg = "The CRAB client will not submit task with more than 500 jobs.\n" msg += " Use the server mode or submit your jobs in smaller groups" raise CrabException(msg) ######################################## return
def __init__(self, cfg_params, nj_list): self.cfg_params = cfg_params self.nj_list = nj_list from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser seWhiteList = cfg_params.get('GRID.se_white_list',[]) seBlackList = cfg_params.get('GRID.se_black_list',[]) self.blackWhiteListParser = SEBlackWhiteListParser(seWhiteList, seBlackList, common.logger()) self.datasetpath=self.cfg_params['CMSSW.datasetpath'] if string.lower(self.datasetpath)=='none': self.datasetpath = None return
class Checker(Actor): def __init__(self, cfg_params, nj_list): self.cfg_params = cfg_params self.nj_list = nj_list from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser seWhiteList = cfg_params.get('GRID.se_white_list',[]) seBlackList = cfg_params.get('GRID.se_black_list',[]) self.blackWhiteListParser = SEBlackWhiteListParser(seWhiteList, seBlackList, common.logger()) self.datasetpath=self.cfg_params['CMSSW.datasetpath'] if string.lower(self.datasetpath)=='none': self.datasetpath = None self.global_data_service = self.cfg_params.get('CMSSW.global_data_service',0) return def run(self): """ The main method of the class. """ common.logger.debug( "Checker::run() called") if len(self.nj_list)==0: common.logger.debug( "No jobs to check") return task=common._db.getTask(self.nj_list) allMatch={} for job in task.jobs: id_job = job['jobId'] jobDest = job['dlsDestination'] if not jobDest: jobDest=[] dest = self.blackWhiteListParser.cleanForBlackWhiteList(jobDest, True) # only if some dest i s available or if dataset is None if len(dest) > 0 or not self.datasetpath: if ','.join(dest) in allMatch.keys(): pass else: match = common.scheduler.listMatch(dest, True) allMatch[','.join(dest)] = match if len(match)>0: common.logger.info("Found "+str(len(match))+" compatible CE(s) for job "+str(id_job)+" : "+str(match)) else: common.logger.info("No compatible site found, will not submit jobs "+str(id_job)) pass pass else: common.logger.info("No compatible site found, will not submit jobs "+str(id_job)) return
def __init__( self, cfg_params, args ): self.cfg_params = cfg_params self.args=args self.lumisPerJob = -1 self.totalNLumis = 0 self.theNumberOfJobs = 0 self.limitNJobs = False self.limitTotalLumis = False self.limitJobLumis = False #self.maxEvents # init BlackWhiteListParser self.seWhiteList = cfg_params.get('GRID.se_white_list',[]) seBlackList = cfg_params.get('GRID.se_black_list',[]) self.blackWhiteListParser = SEBlackWhiteListParser(self.seWhiteList, seBlackList, common.logger()) ## check if has been asked for a non default file to store/read analyzed fileBlocks defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt' self.fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName)) self.global_data_service = int( self.cfg_params.get('CMSSW.global_data_service', 0) ) self.global_data_rewrite = int( self.cfg_params.get('CMSSW.global_data_rewrite', 1) )
class Submitter(Actor): def __init__(self, cfg_params, parsed_range, val): self.cfg_params = cfg_params self.limitJobs = True # get user request self.nsjobs = -1 self.chosenJobsList = None if val: if val=='range': # for Resubmitter self.chosenJobsList = parsed_range elif val=='all': pass elif (type(eval(val)) is int) and eval(val) > 0: # positive number self.nsjobs = eval(val) elif (type(eval(val)) is tuple)or( type(eval(val)) is int and eval(val)<0 ) : self.chosenJobsList = parsed_range self.nsjobs = len(self.chosenJobsList) else: msg = 'Bad submission option <'+str(val)+'>\n' msg += ' Must be an integer or "all"' msg += ' Generic range is not allowed"' raise CrabException(msg) pass self.seWhiteList = cfg_params.get('GRID.se_white_list',[]) self.seBlackList = cfg_params.get('GRID.se_black_list',[]) self.datasetPath=self.cfg_params['CMSSW.datasetpath'] if string.lower(self.datasetPath)=='none': self.datasetPath = None self.scram = Scram.Scram(cfg_params) return #wmbs def BuildJobList(self,type=0): # total jobs nj_list = [] self.complete_List = common._db.nJobs('list') if type==1: self.nj_list =[] if self.chosenJobsList: self.nj_list = self.chosenJobsList return # build job list from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser self.blackWhiteListParser = SEBlackWhiteListParser(self.seWhiteList, self.seBlackList, common.logger()) common.logger.debug('nsjobs '+str(self.nsjobs)) # get the first not already submitted common.logger.debug('Total jobs '+str(len(self.complete_List))) jobSetForSubmission = 0 jobSkippedInSubmission = [] tmp_jList = self.complete_List if self.chosenJobsList != None: tmp_jList = self.chosenJobsList for job in common._db.getTask(tmp_jList).jobs: cleanedBlackWhiteList = self.blackWhiteListParser.cleanForBlackWhiteList(job['dlsDestination']) if (cleanedBlackWhiteList != '') or (self.datasetPath == None): #if ( job.runningJob['status'] in ['C','RC'] and job.runningJob['statusScheduler'] in ['Created',None]): if ( job.runningJob['state'] in ['Created']): jobSetForSubmission +=1 nj_list.append(job['id']) else: continue else : jobSkippedInSubmission.append( job['id'] ) if self.nsjobs >0 and self.nsjobs == jobSetForSubmission: break pass if self.nsjobs>jobSetForSubmission: common.logger.info('asking to submit '+str(self.nsjobs)+' jobs, but only '+\ str(jobSetForSubmission)+' left: submitting those') if len(jobSkippedInSubmission) > 0 : mess ="" for jobs in jobSkippedInSubmission: mess += str(jobs) + "," common.logger.info("Jobs: " +str(mess) + "\n\tskipped because no sites are hosting this data\n") self.submissionError() pass # submit N from last submitted job common.logger.debug('nj_list '+str(nj_list)) self.nj_list = nj_list if self.limitJobs and len(self.nj_list) > 500: ###### FEDE FOR BUG 85243 ############## msg = "The CRAB client will not submit task with more than 500 jobs.\n" msg += " Use the server mode or submit your jobs in smaller groups" raise CrabException(msg) ######################################## return def run(self): """ The main method of the class: submit jobs in range self.nj_list """ common.logger.debug("Submitter::run() called") start = time.time() self.BuildJobList() check = self.checkIfCreate() if check == 0 : self.SendMLpre() list_matched , task = self.performMatch() njs = self.perfromSubmission(list_matched, task) stop = time.time() common.logger.debug("Submission Time: "+str(stop - start)) msg = 'Total of %d jobs submitted'%njs if njs != len(self.nj_list) : msg += ' (from %d requested).'%(len(self.nj_list)) else: msg += '.' common.logger.info(msg) if (njs < len(self.nj_list) or len(self.nj_list)==0): self.submissionError() #wmbs def checkIfCreate(self,type=0): """ """ code = 0 task=common._db.getTask() if type == 1 and len(task.jobs)==0: if task['jobType']=='Submitted': common.logger.info("No Request to be submitted: first create it.\n") code=1 else: totalCreatedJobs = 0 for job in task.jobs: if job.runningJob['state'] == 'Created': totalCreatedJobs +=1 if (totalCreatedJobs==0): common.logger.info("No jobs to be submitted: first create them") code = 1 return code def performMatch(self): """ """ common.logger.info("Checking available resources...") ### define here the list of distinct destinations sites list distinct_dests = common._db.queryDistJob_Attr('dlsDestination', 'jobId' ,self.nj_list) ### define here the list of jobs Id for each distinct list of sites self.sub_jobs =[] # list of jobs Id list to submit jobs_to_match =[] # list of jobs Id to match all_jobs=[] count=0 for distDest in distinct_dests: all_jobs.append(common._db.queryAttrJob({'dlsDestination':distDest},'jobId')) sub_jobs_temp=[] for i in self.nj_list: if i in all_jobs[count]: sub_jobs_temp.append(i) if len(sub_jobs_temp)>0: self.sub_jobs.append(sub_jobs_temp) jobs_to_match.append(self.sub_jobs[count][0]) count +=1 sel=0 matched=[] task=common._db.getTask() for id_job in jobs_to_match : match = common.scheduler.listMatch(distinct_dests[sel], False) if len(match)>0: common.logger.info("Found compatible site(s) for job "+str(id_job)) matched.append(sel) else: common.logger.info("No compatible site found, will not submit jobs "+str(self.sub_jobs[sel])) self.submissionError() sel += 1 return matched , task def perfromSubmission(self,matched,task): njs=0 ### Progress Bar indicator, deactivate for debug if common.debugLevel == 0 : term = TerminalController() if len(matched)>0: common.logger.info(str(len(matched))+" blocks of jobs will be submitted") common.logger.debug("Delegating proxy ") try: common.scheduler.delegateProxy() except CrabException: common.logger.debug("Proxy delegation failed ") for ii in matched: common.logger.debug('Submitting jobs '+str(self.sub_jobs[ii])) # fix arguments for unique naming of the output common._db.updateResubAttribs(self.sub_jobs[ii]) try: common.scheduler.submit(self.sub_jobs[ii],task) except CrabException: common.logger.debug('common.scheduler.submit exception. Job(s) possibly not submitted') raise CrabException("Job not submitted") if common.debugLevel == 0 : try: pbar = ProgressBar(term, 'Submitting '+str(len(self.sub_jobs[ii]))+' jobs') except: pbar = None if common.debugLevel == 0: if pbar : pbar.update(float(ii+1)/float(len(self.sub_jobs)),'please wait') ### check the if the submission succeded Maybe not needed or at least simplified sched_Id = common._db.queryRunJob('schedulerId', self.sub_jobs[ii]) listId=[] run_jobToSave = {'status' :'S'} listRunField = [] for j in range(len(self.sub_jobs[ii])): if str(sched_Id[j]) != '': listId.append(self.sub_jobs[ii][j]) listRunField.append(run_jobToSave) common.logger.debug("Submitted job # "+ str(self.sub_jobs[ii][j])) njs += 1 common._db.updateRunJob_(listId, listRunField) self.stateChange(listId,"SubSuccess") self.SendMLpost(self.sub_jobs[ii]) else: common.logger.info("The whole task doesn't found compatible site ") return njs def submissionError(self): ## add some more verbose message in case submission is not complete msg = 'Submission performed using the Requirements: \n' ### TODO_ DS--BL #msg += common.taskDB.dict("jobtype")+' version: '+common.taskDB.dict("codeVersion")+'\n' #msg += '(Hint: please check if '+common.taskDB.dict("jobtype")+' is available at the Sites)\n' if self.cfg_params.has_key('GRID.se_white_list'): msg += '\tSE White List: '+self.cfg_params['GRID.se_white_list']+'\n' if self.cfg_params.has_key('GRID.se_black_list'): msg += '\tSE Black List: '+self.cfg_params['GRID.se_black_list']+'\n' if self.cfg_params.has_key('GRID.ce_white_list'): msg += '\tCE White List: '+self.cfg_params['GRID.ce_white_list']+'\n' if self.cfg_params.has_key('GRID.ce_black_list'): msg += '\tCE Black List: '+self.cfg_params['GRID.ce_black_list']+'\n' removeDefBL = self.cfg_params.get('GRID.remove_default_blacklist',0) if removeDefBL == '0': msg += '\tNote: All CMS T1s are BlackListed by default \n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if:\n' msg += '\t\t -- the dataset is available at this site\n' msg += '\t\t -- the CMSSW version is available at this site\n' msg += '\t\t -- grid submission to CERN & FNAL CAFs is not allowed)\n' msg += '\tPlease also look at the Site Status Page for CMS sites,\n' msg += '\t to check if the sites hosting your data are ok\n' msg += '\t http://dashb-ssb.cern.ch/dashboard/request.py/siteviewhome\n' common.logger.info(msg) return def collect_MLInfo(self): """ Prepare DashBoard information """ taskId = common._db.queryTask('name') gridName = string.strip(common.scheduler.userName()) gridScheduler = common.scheduler.name() if gridScheduler.upper() == 'REMOTEGLIDEIN' : gridScheduler = 'GLIDEIN' common.logger.debug("GRIDNAME: %s "%gridName) #### FEDE for taskType (savannah 76950) taskType = self.cfg_params.get('USER.tasktype','analysis') #### taskType = 'analysis' self.executable = self.cfg_params.get('CMSSW.executable','cmsRun') VO = self.cfg_params.get('GRID.virtual_organization','cms') params = {'tool': common.prog_name, 'SubmissionType':'direct', 'JSToolVersion': common.prog_version_str, 'tool_ui': os.environ.get('HOSTNAME',''), 'scheduler': gridScheduler, 'GridName': gridName, 'ApplicationVersion': self.scram.getSWVersion(), 'taskType': taskType, 'vo': VO, 'CMSUser': getUserName(), 'user': getUserName(), 'taskId': str(taskId), 'datasetFull': self.datasetPath, 'resubmitter': 'user', \ 'exe': self.executable } return params def SendMLpre(self): """ Send Pre info to ML """ params = self.collect_MLInfo() params['jobId'] ='TaskMeta' common.apmon.sendToML(params) common.logger.debug('Submission DashBoard Pre-Submission report: %s'%str(params)) return def SendMLpost(self,allList): """ Send post-submission info to ML """ task = common._db.getTask(allList) params = {} for k,v in self.collect_MLInfo().iteritems(): params[k] = v msg = '' Sub_Type = 'Direct' for job in task.jobs: jj = job['jobId'] jobId = '' localId = '' jid = str(job.runningJob['schedulerId']) if common.scheduler.name().upper() in ['CONDOR_G']: rb = 'OSG' taskHash = sha1(common._db.queryTask('name')).hexdigest() jobId = str(jj) + '_https://' + common.scheduler.name() + '/' + taskHash + '/' + str(jj) msg += ('JobID for ML monitoring is created for CONDOR_G scheduler: %s \n'%str(jobId)) elif common.scheduler.name().upper() in ['GLIDEIN']: rb = common.scheduler.name() jobId = str(jj) + '_https://' + str(jid) msg += ('JobID for ML monitoring is created for GLIDEIN scheduler: %s \n'%str(jobId)) elif common.scheduler.name().upper() in ['REMOTEGLIDEIN']: rb = str(task['serverName']) jobId = str(jj) + '_https://' + str(jid) msg += ('JobID for ML monitoring is created for REMOTEGLIDEIN scheduler: %s\n'%str(jobId)) elif common.scheduler.name().upper() in ['LSF', 'CAF', 'PBS', 'SLURM']: jobId= str(jj) + "_https://"+common.scheduler.name().upper()+":/"+jid+"-"+string.replace(str(task['name']),"_","-") msg += ('JobID for ML monitoring is created for %s scheduler: %s\n'%(common.scheduler.name().upper(), str(jobId)) ) rb = common.scheduler.name() localId = jid elif common.scheduler.name().upper() in ['CONDOR']: taskHash = sha1(common._db.queryTask('name')).hexdigest() jobId = str(jj) + '_https://' + socket.gethostname() + '/' + taskHash + '/' + str(jj) rb = common.scheduler.name() msg += ('JobID for ML monitoring is created for CONDOR scheduler: %s\n'%str(jobId)) elif common.scheduler.name().upper() in ['ARC']: jobId = str(jj) + '_' + str(jid) msg += ('JobID for ML monitoring is created for ARC scheduler: %s\n'%str(jobId)) rb = 'ARC' else: jobId = str(jj) + '_' + str(jid) msg += ('JobID for ML monitoring is created for gLite scheduler %s\n'%str(jobId)) rb = str(job.runningJob['service']) dlsDest = job['dlsDestination'] if len(dlsDest) == 1 : T_SE=str(dlsDest[0]) elif len(dlsDest) == 2 : T_SE=str(dlsDest[0])+','+str(dlsDest[1]) else : T_SE=str(len(dlsDest))+'_Selected_SE' infos = { 'jobId': jobId, \ 'sid': jid, \ 'broker': rb, \ 'bossId': jj, \ 'SubmissionType': Sub_Type, \ 'TargetSE': T_SE, \ 'localId' : localId} for k,v in infos.iteritems(): params[k] = v msg +=('Submission DashBoard report: %s\n'%str(params)) common.apmon.sendToML(params) #common.logger.debug(msg) return
class JobSplitter: def __init__( self, cfg_params, args ): self.cfg_params = cfg_params self.args=args self.lumisPerJob = -1 self.totalNLumis = 0 self.theNumberOfJobs = 0 self.limitNJobs = False self.limitTotalLumis = False self.limitJobLumis = False #self.maxEvents # init BlackWhiteListParser self.seWhiteList = cfg_params.get('GRID.se_white_list',[]) seBlackList = cfg_params.get('GRID.se_black_list',[]) self.blackWhiteListParser = SEBlackWhiteListParser(self.seWhiteList, seBlackList, common.logger()) ## check if has been asked for a non default file to store/read analyzed fileBlocks defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt' self.fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName)) self.global_data_service = int( self.cfg_params.get('CMSSW.global_data_service', 0) ) self.global_data_rewrite = int( self.cfg_params.get('CMSSW.global_data_rewrite', 1) ) def checkUserSettings(self): ## Events per job if self.cfg_params.has_key('CMSSW.events_per_job'): self.eventsPerJob =int( self.cfg_params['CMSSW.events_per_job']) self.selectEventsPerJob = 1 else: self.eventsPerJob = -1 self.selectEventsPerJob = 0 ## number of jobs if self.cfg_params.has_key('CMSSW.number_of_jobs'): self.theNumberOfJobs =int( self.cfg_params['CMSSW.number_of_jobs']) self.selectNumberOfJobs = 1 else: self.theNumberOfJobs = 0 self.selectNumberOfJobs = 0 if self.cfg_params.has_key('CMSSW.total_number_of_events'): self.total_number_of_events = int(self.cfg_params['CMSSW.total_number_of_events']) self.selectTotalNumberEvents = 1 if self.selectNumberOfJobs == 1: if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs): msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs ' raise CrabException(msg) else: self.total_number_of_events = 0 self.selectTotalNumberEvents = 0 return def checkLumiSettings(self): """ Check to make sure the user has specified enough information to perform splitting by Lumis to run the job """ settings = 0 if self.cfg_params.has_key('CMSSW.lumis_per_job'): self.lumisPerJob =int( self.cfg_params['CMSSW.lumis_per_job']) self.limitJobLumis = True settings += 1 if self.cfg_params.has_key('CMSSW.number_of_jobs'): self.theNumberOfJobs =int( self.cfg_params['CMSSW.number_of_jobs']) self.limitNJobs = True settings += 1 if self.cfg_params.has_key('CMSSW.total_number_of_lumis'): self.totalNLumis = int(self.cfg_params['CMSSW.total_number_of_lumis']) self.limitTotalLumis = (self.totalNLumis != -1) settings += 1 if settings != 2: msg = 'When splitting by lumi section you must specify two and only two of:\n' msg += ' number_of_jobs, lumis_per_job, total_number_of_lumis' raise CrabException(msg) if self.limitNJobs and self.limitJobLumis: self.limitTotalLumis = True self.totalNLumis = self.lumisPerJob * self.theNumberOfJobs # Has the user specified runselection? if (self.cfg_params.has_key('CMSSW.runselection')): common.logger.info('You have specified runselection and split by lumi.') common.logger.info('Good lumi list will be the intersection of runselection and lumimask or ADS (if any).') return def ComputeSubBlockSites( self, blockSites ): """ """ sub_blockSites = {} for k,v in blockSites.iteritems(): sites=self.blackWhiteListParser.checkWhiteList(v) if sites : sub_blockSites[k]=v if len(sub_blockSites) < 1: msg = 'WARNING: the sites %s is not hosting any part of data.'%self.seWhiteList raise CrabException(msg) return sub_blockSites ######################################################################## def jobSplittingByEvent( self ): """ Perform job splitting. Jobs run over an integer number of files and no more than one block. ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs, self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs, self.maxEvents, self.filesbyblock SETS: jobDestination - Site destination(s) for each job (a list of lists) self.total_number_of_jobs - Total # of jobs self.list_of_args - File(s) job will run on (a list of lists) """ jobDestination=[] self.checkUserSettings() if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ): msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.' raise CrabException(msg) blockSites = self.args['blockSites'] pubdata = self.args['pubdata'] filesbyblock=pubdata.getFiles() self.eventsbyblock=pubdata.getEventsPerBlock() self.eventsbyfile=pubdata.getEventsPerFile() self.parentFiles=pubdata.getParent() ## get max number of events self.maxEvents=pubdata.getMaxEvents() self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0)) noBboundary = int(self.cfg_params.get('CMSSW.no_block_boundary',0)) if noBboundary == 1: if self.total_number_of_events== -1: msg = 'You are selecting no_block_boundary=1 which does not allow to set total_number_of_events=-1\n' msg +='\tYou shoud get the number of event from DBS web interface and use it for your configuration.' raise CrabException(msg) if len(self.seWhiteList) == 0 or len(self.seWhiteList.split(',')) != 1: msg = 'You are selecting no_block_boundary=1 which requires to choose one and only one site.\n' msg += "\tPlease set se_white_list with the site's storage element name." raise CrabException(msg) blockSites = self.ComputeSubBlockSites(blockSites) # ---- Handle the possible job splitting configurations ---- # if (self.selectTotalNumberEvents): totalEventsRequested = self.total_number_of_events if (self.selectEventsPerJob): eventsPerJobRequested = self.eventsPerJob if (self.selectNumberOfJobs): totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob # If user requested all the events in the dataset if (totalEventsRequested == -1): eventsRemaining=self.maxEvents # If user requested more events than are in the dataset elif (totalEventsRequested > self.maxEvents): eventsRemaining = self.maxEvents common.logger.info("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.") # If user requested less events than are in the dataset else: eventsRemaining = totalEventsRequested # If user requested more events per job than are in the dataset if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents): eventsPerJobRequested = self.maxEvents # For user info at end totalEventCount = 0 if (self.selectTotalNumberEvents and self.selectNumberOfJobs): eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs) if (self.selectNumberOfJobs): common.logger.info("May not create the exact number_of_jobs requested.") if (self.theNumberOfJobs < 0): common.logger.info("ERROR: Negative number_of_jobs requested. Will result in no jobs.") # old... to remove Daniele totalNumberOfJobs = 999999999 blocks = blockSites.keys() blockCount = 0 # Backup variable in case self.maxEvents counted events in a non-included block numBlocksInDataset = len(blocks) jobCount = 0 list_of_lists = [] # list tracking which jobs are in which jobs belong to which block jobsOfBlock = {} parString = "" pString = "" filesEventCount = 0 msg='' # ---- Iterate over the blocks in the dataset until ---- # # ---- we've met the requested total # of events ---- # while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)): block = blocks[blockCount] blockCount += 1 if block not in jobsOfBlock.keys() : jobsOfBlock[block] = [] if self.eventsbyblock.has_key(block) : numEventsInBlock = self.eventsbyblock[block] common.logger.debug('Events in Block File '+str(numEventsInBlock)) files = filesbyblock[block] numFilesInBlock = len(files) if (numFilesInBlock <= 0): continue fileCount = 0 if noBboundary == 0: # DD # ---- New block => New job ---- # parString = "" pString="" # counter for number of events in files currently worked on filesEventCount = 0 # flag if next while loop should touch new file newFile = 1 # job event counter jobSkipEventCount = 0 # ---- Iterate over the files in the block until we've met the requested ---- # # ---- total # of events or we've gone over all the files in this block ---- # msg='\n' while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ): file = files[fileCount] if self.useParent==1: parent = self.parentFiles[file] common.logger.log(10-1, "File "+str(file)+" has the following parents: "+str(parent)) if newFile : try: numEventsInFile = self.eventsbyfile[file] common.logger.log(10-1, "File "+str(file)+" has "+str(numEventsInFile)+" events") # increase filesEventCount filesEventCount += numEventsInFile # Add file to current job parString += file + ',' if self.useParent==1: for f in parent : pString += f + ',' newFile = 0 except KeyError: common.logger.info("File "+str(file)+" has unknown number of events: skipping") eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining) # if less events in file remain than eventsPerJobRequested if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested): if noBboundary == 1: ## DD newFile = 1 fileCount += 1 else: # if last file in block if ( fileCount == numFilesInBlock-1 ) : # end job using last file, use remaining events in block # close job and touch new file fullString = parString[:-1] if self.useParent==1: fullParentString = pString[:-1] list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount),block]) else: list_of_lists.append([fullString,str(-1),str(jobSkipEventCount),block]) msg += "Job %s can run over %s events (last file in block).\n"%(str(jobCount+1), str(filesEventCount - jobSkipEventCount)) jobDestination.append(blockSites[block]) msg += "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount]))) # fill jobs of block dictionary jobsOfBlock[block].append(jobCount+1) # reset counter jobCount = jobCount + 1 totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount jobSkipEventCount = 0 # reset file pString = "" parString = "" filesEventCount = 0 newFile = 1 fileCount += 1 else : # go to next file newFile = 1 fileCount += 1 # if events in file equal to eventsPerJobRequested elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) : # close job and touch new file fullString = parString[:-1] if self.useParent==1: fullParentString = pString[:-1] list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount),block]) else: list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount),block]) msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested)) jobDestination.append(blockSites[block]) msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount]))) jobsOfBlock[block].append(jobCount+1) # reset counter jobCount = jobCount + 1 totalEventCount = totalEventCount + eventsPerJobRequested eventsRemaining = eventsRemaining - eventsPerJobRequested jobSkipEventCount = 0 # reset file pString = "" parString = "" filesEventCount = 0 newFile = 1 fileCount += 1 # if more events in file remain than eventsPerJobRequested else : # close job but don't touch new file fullString = parString[:-1] if self.useParent==1: fullParentString = pString[:-1] list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount),block]) else: list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount),block]) msg += "Job %s can run over %s events.\n"%(str(jobCount+1),str(eventsPerJobRequested)) jobDestination.append(blockSites[block]) msg+= "Job %s Destination: %s\n"%(str(jobCount+1),str(SE2CMS(jobDestination[jobCount]))) jobsOfBlock[block].append(jobCount+1) # increase counter jobCount = jobCount + 1 totalEventCount = totalEventCount + eventsPerJobRequested eventsRemaining = eventsRemaining - eventsPerJobRequested # calculate skip events for last file # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file]) # remove all but the last file filesEventCount = self.eventsbyfile[file] pString_tmp='' if self.useParent==1: for f in parent : pString_tmp += f + ',' pString = pString_tmp parString = file + ',' pass # END if pass # END while (iterate over files in the block) pass # END while (iterate over blocks in the dataset) common.logger.debug(msg) self.ncjobs = self.total_number_of_jobs = jobCount if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ): common.logger.info("Could not run on all requested events because some blocks not hosted at allowed sites.") common.logger.info(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n") # skip check on block with no sites DD if noBboundary == 0 : self.checkBlockNoSite(blocks,jobsOfBlock,blockSites) # prepare dict output if self.global_data_service and self.global_data_rewrite: for job in list_of_lists: GlobalDataService.modifyJobFilenames( job ) dictOut = {} dictOut['params']= ['InputFiles','MaxEvents','SkipEvents','InputBlocks'] if self.useParent: dictOut['params']= ['InputFiles','ParentFiles','MaxEvents','SkipEvents','InputBlocks'] dictOut['args'] = list_of_lists dictOut['jobDestination'] = jobDestination dictOut['njobs']=self.total_number_of_jobs return dictOut # keep trace of block with no sites to print a warning at the end def checkBlockNoSite(self,blocks,jobsOfBlock,blockSites): # screen output screenOutput = "List of jobs and available destination sites:\n\n" noSiteBlock = [] bloskNoSite = [] allBlock = [] blockCounter = 0 saveFblocks ='' for block in blocks: if block in jobsOfBlock.keys() : blockCounter += 1 allBlock.append( blockCounter ) sites=self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],[block]),[block]) screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]), ', '.join(SE2CMS(sites))) if len(sites) == 0: noSiteBlock.append( spanRanges(jobsOfBlock[block]) ) bloskNoSite.append( blockCounter ) else: saveFblocks += str(block)+'\n' writeTXTfile(self, self.fileBlocks_FileName , saveFblocks) common.logger.info(screenOutput) if len(noSiteBlock) > 0 and len(bloskNoSite) > 0: msg = 'WARNING: No sites are hosting any part of data for block:\n ' virgola = "" if len(bloskNoSite) > 1: virgola = "," for block in bloskNoSite: msg += ' ' + str(block) + virgola msg += '\n\t\tRelated jobs:\n ' virgola = "" if len(noSiteBlock) > 1: virgola = "," for range_jobs in noSiteBlock: msg += str(range_jobs) + virgola msg += '\n\t\twill not be submitted and this block of data can not be analyzed!\n' if self.cfg_params.has_key('GRID.se_white_list'): msg += '\tWARNING: SE White List: '+self.cfg_params['GRID.se_white_list']+'\n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if the dataset is available at this site!)' if self.cfg_params.has_key('GRID.ce_white_list'): msg += '\tWARNING: CE White List: '+self.cfg_params['GRID.ce_white_list']+'\n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if the dataset is available at this site!)\n' common.logger.info(msg) if bloskNoSite == allBlock: msg = 'Requested jobs cannot be Created! \n' if self.cfg_params.has_key('GRID.se_white_list'): msg += '\tWARNING: SE White List: '+self.cfg_params['GRID.se_white_list']+'\n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if the dataset is available at this site!)' if self.cfg_params.has_key('GRID.ce_white_list'): msg += '\tWARNING: CE White List: '+self.cfg_params['GRID.ce_white_list']+'\n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if the dataset is available at this site!)\n' raise CrabException(msg) return ######################################################################## def jobSplittingByRun(self): """ """ self.checkUserSettings() blockSites = self.args['blockSites'] pubdata = self.args['pubdata'] if self.selectNumberOfJobs == 0 : self.theNumberOfJobs = 9999999 blocks = {} runList = [] thefiles = Fileset(name='FilesToSplit') fileList = pubdata.getListFiles() for f in fileList: block = f['Block']['Name'] try: f['Block']['StorageElementList'].extend(blockSites[block]) except: continue wmbsFile = File(f['LogicalFileName']) if not blockSites[block]: msg = 'WARNING: No sites are hosting any part of data for block: %s\n' %block msg += 'Related jobs will not be submitted and this block of data can not be analyzed' common.logger.debug(msg) [ wmbsFile['locations'].add(x) for x in blockSites[block] ] wmbsFile['block'] = block runNum = f['RunsList'][0]['RunNumber'] runList.append(runNum) myRun = Run(runNumber=runNum) wmbsFile.addRun( myRun ) thefiles.addFile( wmbsFile ) work = Workflow() subs = Subscription( fileset = thefiles, workflow = work, split_algo = 'RunBased', type = "Processing") splitter = SplitterFactory() jobfactory = splitter(subs) #loop over all runs list_of_lists = [] jobDestination = [] list_of_blocks = [] count = 0 for jobGroup in jobfactory(): if count < self.theNumberOfJobs: res = self.getJobInfo(jobGroup) parString = '' for file in res['lfns']: parString += file + ',' list_of_blocks.append(res['block']) fullString = parString[:-1] blockString=','.join(list_of_blocks) list_of_lists.append([fullString,str(-1),str(0),blockString]) #need to check single file location jobDestination.append(res['locations']) count +=1 # prepare dict output if self.global_data_service and self.global_data_rewrite: for job in list_of_lists: GlobalDataService.modifyJobFilenames( job ) dictOut = {} dictOut['params']= ['InputFiles','MaxEvents','SkipEvents','InputBlocks'] dictOut['args'] = list_of_lists dictOut['jobDestination'] = jobDestination dictOut['njobs']=count self.cacheBlocks(list_of_blocks,jobDestination) return dictOut def getJobInfo( self,jobGroup ): res = {} lfns = [] locations = [] tmp_check=0 for job in jobGroup.jobs: for file in job.getFiles(): lfns.append(file['lfn']) for loc in file['locations']: if tmp_check < 1 : locations.append(loc) res['block']= file['block'] tmp_check = tmp_check + 1 res['lfns'] = lfns res['locations'] = locations return res ######################################################################## def prepareSplittingNoInput(self): """ """ if (self.selectEventsPerJob): common.logger.info('Required '+str(self.eventsPerJob)+' events per job ') if (self.selectNumberOfJobs): common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ') if (self.selectTotalNumberEvents): common.logger.info('Required '+str(self.total_number_of_events)+' events in total ') if (self.total_number_of_events < 0): msg='Cannot split jobs per Events with "-1" as total number of events' raise CrabException(msg) if (self.selectEventsPerJob): if (self.selectTotalNumberEvents): self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob) elif(self.selectNumberOfJobs) : self.total_number_of_jobs =self.theNumberOfJobs self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob) elif (self.selectNumberOfJobs) : self.total_number_of_jobs = self.theNumberOfJobs self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs) def jobSplittingNoInput(self): """ Perform job splitting based on number of event per job """ common.logger.debug('Splitting per events') self.checkUserSettings() jobDestination=[] if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ): msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.' raise CrabException(msg) managedGenerators =self.args['managedGenerators'] generator = self.args['generator'] firstLumi = int(self.cfg_params.get('CMSSW.first_lumi', 1)) self.prepareSplittingNoInput() common.logger.debug('N jobs '+str(self.total_number_of_jobs)) # is there any remainder? check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob) common.logger.debug('Check '+str(check)) common.logger.info(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events') if check > 0: common.logger.info('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob)) # argument is seed number.$i self.list_of_args = [] for i in range(self.total_number_of_jobs): ## Since there is no input, any site is good jobDestination.append([""]) # must be empty to correctly write the XML args=[] if (firstLumi): # Pythia first lumi args.append(str(int(firstLumi)+i)) if (generator in managedGenerators): args.append(generator) if (generator == 'comphep' and i == 0): # COMPHEP is brain-dead and wants event #'s like 1,100,200,300 args.append('1') else: args.append(str(i*self.eventsPerJob)) args.append(str(self.eventsPerJob)) self.list_of_args.append(args) # prepare dict output dictOut = {} dictOut['params'] = ['MaxEvents'] if (firstLumi): dictOut['params'] = ['FirstLumi','MaxEvents'] if (generator in managedGenerators): dictOut['params'] = ['FirstLumi', 'Generator', 'FirstEvent', 'MaxEvents'] else: if (generator in managedGenerators) : dictOut['params'] = ['Generator', 'FirstEvent', 'MaxEvents'] dictOut['args'] = self.list_of_args dictOut['jobDestination'] = jobDestination dictOut['njobs']=self.total_number_of_jobs return dictOut def jobSplittingForScript(self): """ Perform job splitting based on number of job """ self.checkUserSettings() if (self.selectNumberOfJobs == 0): msg = 'must specify number_of_jobs.' raise crabexception(msg) jobDestination = [] common.logger.debug('Splitting per job') common.logger.info('Required '+str(self.theNumberOfJobs)+' jobs in total ') # self.total_number_of_jobs = self.theNumberOfJobs self.prepareSplittingNoInput() common.logger.debug('N jobs '+str(self.total_number_of_jobs)) common.logger.info(str(self.total_number_of_jobs)+' jobs can be created') # argument is seed number.$i self.list_of_args = [] for i in range(self.total_number_of_jobs): args=[] jobDestination.append([""]) if self.eventsPerJob != 0 : args.append(str(self.eventsPerJob)) self.list_of_args.append(args) # prepare dict output dictOut = {} dictOut['params'] = ['MaxEvents'] dictOut['args'] = self.list_of_args dictOut['jobDestination'] = jobDestination dictOut['njobs']=self.total_number_of_jobs return dictOut def jobSplittingByLumi(self): """ Split task into jobs by Lumi section paying attention to which lumis should be run (according to the analysis dataset). This uses WMBS job splitting which does not split files over jobs so the job will have AT LEAST as many lumis as requested, perhaps more """ self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0)) common.logger.debug('Splitting by Lumi') self.checkLumiSettings() blockSites = self.args['blockSites'] pubdata = self.args['pubdata'] lumisPerFile = pubdata.getLumis() self.parentFiles=pubdata.getParent() # Make the list of WMBS files for job splitter fileList = pubdata.getListFiles() wmFileList = [] for jobFile in fileList: block = jobFile['Block']['Name'] try: jobFile['Block']['StorageElementList'].extend(blockSites[block]) except: continue wmbsFile = File(jobFile['LogicalFileName']) if not blockSites[block]: msg = 'WARNING: No sites are hosting any part of data for block: %s\n' %block msg += 'Related jobs will not be submitted and this block of data can not be analyzed' common.logger.debug(msg) # wmbsFile['locations'].add('Nowhere') [ wmbsFile['locations'].add(x) for x in blockSites[block] ] wmbsFile['block'] = block for lumi in lumisPerFile[jobFile['LogicalFileName']]: wmbsFile.addRun(Run(lumi[0], lumi[1])) wmFileList.append(wmbsFile) fileSet = set(wmFileList) thefiles = Fileset(name='FilesToSplit', files = fileSet) # Create the factory and workflow work = Workflow() subs = Subscription(fileset = thefiles, workflow = work, split_algo = 'LumiBased', type = "Processing") splitter = SplitterFactory() jobFactory = splitter(subs) list_of_lists = [] jobDestination = [] jobCount = 0 lumisCreated = 0 list_of_blocks = [] if not self.limitJobLumis: if self.totalNLumis > 0: self.lumisPerJob = max(self.totalNLumis // self.theNumberOfJobs,1) else: self.lumisPerJob = pubdata.getMaxLumis() // self.theNumberOfJobs + 1 common.logger.info('Each job will process about %s lumis.' % self.lumisPerJob) for jobGroup in jobFactory(lumis_per_job = self.lumisPerJob): for job in jobGroup.jobs: if (self.limitNJobs and jobCount >= self.theNumberOfJobs): common.logger.info('Requested number of jobs reached.') break if (self.limitTotalLumis and lumisCreated >= self.totalNLumis): common.logger.info('Requested number of lumis reached.') break lumis = [] lfns = [] if self.useParent==1: parentlfns = [] pString ="" locations = [] blocks = [] firstFile = True # Collect information from all the files for jobFile in job.getFiles(): doFile = False if firstFile: # Get locations from first file in the job for loc in jobFile['locations']: locations.append(loc) blocks.append(jobFile['block']) firstFile = False # Accumulate Lumis from all files for lumiList in jobFile['runs']: theRun = lumiList.run for theLumi in list(lumiList): if (not self.limitTotalLumis) or \ (lumisCreated < self.totalNLumis): doFile = True lumisCreated += 1 lumis.append( (theRun, theLumi) ) if doFile: lfns.append(jobFile['lfn']) if self.useParent==1: parent = self.parentFiles[jobFile['lfn']] for p in parent : pString += p + ',' fileString = ','.join(lfns) lumiLister = LumiList(lumis = lumis) lumiString = lumiLister.getCMSSWString() blockString=','.join(blocks) if self.useParent==1: common.logger.debug("Files: "+fileString+" with the following parents: "+pString[:-1]) pfileString = pString[:-1] list_of_lists.append([fileString, pfileString, str(-1), str(0), lumiString,blockString]) else: list_of_lists.append([fileString, str(-1), str(0), lumiString, blockString]) list_of_blocks.append(blocks) jobDestination.append(locations) jobCount += 1 common.logger.debug('Job %s will run on %s files and %s lumis ' % (jobCount, len(lfns), len(lumis) )) common.logger.info('%s jobs created to run on %s lumis' % (jobCount, lumisCreated)) # Prepare dict output matching back to non-WMBS job creation if self.global_data_service and self.global_data_rewrite: for job in list_of_lists: GlobalDataService.modifyJobFilenames( job ) dictOut = {} dictOut['params'] = ['InputFiles', 'MaxEvents', 'SkipEvents', 'Lumis','InputBlocks'] if self.useParent==1: dictOut['params']= ['InputFiles','ParentFiles','MaxEvents','SkipEvents','Lumis','InputBlocks'] dictOut['args'] = list_of_lists dictOut['jobDestination'] = jobDestination dictOut['njobs'] = jobCount self.cacheBlocks(list_of_blocks,jobDestination) return dictOut def cacheBlocks(self, blocks,destinations): saveFblocks='' for i in range(len(blocks)): sites=self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(destinations[i])) if len(sites) != 0: for block in blocks[i]: saveFblocks += str(block)+'\n' writeTXTfile(self, self.fileBlocks_FileName , saveFblocks) def Algos(self): """ Define key splittingType matrix """ SplitAlogs = { 'EventBased' : self.jobSplittingByEvent, 'RunBased' : self.jobSplittingByRun, 'LumiBased' : self.jobSplittingByLumi, 'NoInput' : self.jobSplittingNoInput, 'ForScript' : self.jobSplittingForScript } return SplitAlogs
def __init__(self, cfg_params, args): self.cfg_params = cfg_params self.args = args self.lumisPerJob = -1 self.totalNLumis = 0 self.theNumberOfJobs = 0 self.limitNJobs = False self.limitTotalLumis = False self.limitJobLumis = False #self.maxEvents # init BlackWhiteListParser self.seWhiteList = cfg_params.get('GRID.se_white_list', []) if type(self.seWhiteList) == type("string"): self.seWhiteList = self.seWhiteList.split(',') seBlackList = cfg_params.get('GRID.se_black_list', []) if type(seBlackList) == type("string"): seBlackList = seBlackList.split(',') if common.scheduler.name().upper() == 'REMOTEGLIDEIN': # use central black list removeBList = cfg_params.get("GRID.remove_default_blacklist", 0) blackAnaOps = None if int(removeBList) == 0: blacklist = Downloader( "http://cmsdoc.cern.ch/cms/LCG/crab/config/") result = blacklist.config( "site_black_list.conf").strip().split(',') if result != None: blackAnaOps = result common.logger.debug("Enforced black list: %s " % blackAnaOps) else: common.logger.info("WARNING: Skipping default black list!") if int(removeBList) == 0 and blackAnaOps: seBlackList += blackAnaOps self.blackWhiteListParser = SEBlackWhiteListParser( self.seWhiteList, seBlackList, common.logger()) if seBlackList != []: common.logger.info("SE black list applied to data location: %s" %\ seBlackList) if self.seWhiteList != []: common.logger.info("SE white list applied to data location: %s" %\ self.seWhiteList) # apply BW list blockSites = args['blockSites'] common.logger.debug("List of blocks and used locations (SE):") for block, dlsDest in blockSites.iteritems(): noBsites = self.blackWhiteListParser.checkBlackList(dlsDest) sites = self.blackWhiteListParser.checkWhiteList(noBsites) if sites: blockSites[block] = sites common.logger.debug("%s : %s" % (block, sites)) args['blockSites'] = blockSites ## check if has been asked for a non default file to store/read analyzed fileBlocks defaultName = common.work_space.shareDir() + 'AnalyzedBlocks.txt' self.fileBlocks_FileName = os.path.abspath( self.cfg_params.get('CMSSW.fileblocks_file', defaultName))
class JobSplitter: def __init__(self, cfg_params, args): self.cfg_params = cfg_params self.args = args self.lumisPerJob = -1 self.totalNLumis = 0 self.theNumberOfJobs = 0 self.limitNJobs = False self.limitTotalLumis = False self.limitJobLumis = False #self.maxEvents # init BlackWhiteListParser self.seWhiteList = cfg_params.get('GRID.se_white_list', []) if type(self.seWhiteList) == type("string"): self.seWhiteList = self.seWhiteList.split(',') seBlackList = cfg_params.get('GRID.se_black_list', []) if type(seBlackList) == type("string"): seBlackList = seBlackList.split(',') if common.scheduler.name().upper() == 'REMOTEGLIDEIN': # use central black list removeBList = cfg_params.get("GRID.remove_default_blacklist", 0) blackAnaOps = None if int(removeBList) == 0: blacklist = Downloader( "http://cmsdoc.cern.ch/cms/LCG/crab/config/") result = blacklist.config( "site_black_list.conf").strip().split(',') if result != None: blackAnaOps = result common.logger.debug("Enforced black list: %s " % blackAnaOps) else: common.logger.info("WARNING: Skipping default black list!") if int(removeBList) == 0 and blackAnaOps: seBlackList += blackAnaOps self.blackWhiteListParser = SEBlackWhiteListParser( self.seWhiteList, seBlackList, common.logger()) if seBlackList != []: common.logger.info("SE black list applied to data location: %s" %\ seBlackList) if self.seWhiteList != []: common.logger.info("SE white list applied to data location: %s" %\ self.seWhiteList) # apply BW list blockSites = args['blockSites'] common.logger.debug("List of blocks and used locations (SE):") for block, dlsDest in blockSites.iteritems(): noBsites = self.blackWhiteListParser.checkBlackList(dlsDest) sites = self.blackWhiteListParser.checkWhiteList(noBsites) if sites: blockSites[block] = sites common.logger.debug("%s : %s" % (block, sites)) args['blockSites'] = blockSites ## check if has been asked for a non default file to store/read analyzed fileBlocks defaultName = common.work_space.shareDir() + 'AnalyzedBlocks.txt' self.fileBlocks_FileName = os.path.abspath( self.cfg_params.get('CMSSW.fileblocks_file', defaultName)) def checkUserSettings(self): ## Events per job if self.cfg_params.has_key('CMSSW.events_per_job'): self.eventsPerJob = int(self.cfg_params['CMSSW.events_per_job']) self.selectEventsPerJob = 1 else: self.eventsPerJob = -1 self.selectEventsPerJob = 0 ## number of jobs if self.cfg_params.has_key('CMSSW.number_of_jobs'): self.theNumberOfJobs = int(self.cfg_params['CMSSW.number_of_jobs']) self.selectNumberOfJobs = 1 else: self.theNumberOfJobs = 0 self.selectNumberOfJobs = 0 if self.cfg_params.has_key('CMSSW.total_number_of_events'): self.total_number_of_events = int( self.cfg_params['CMSSW.total_number_of_events']) self.selectTotalNumberEvents = 1 if self.selectNumberOfJobs == 1: if (self.total_number_of_events != -1) and int( self.total_number_of_events) < int( self.theNumberOfJobs): msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs ' raise CrabException(msg) else: self.total_number_of_events = 0 self.selectTotalNumberEvents = 0 return def checkLumiSettings(self): """ Check to make sure the user has specified enough information to perform splitting by Lumis to run the job """ settings = 0 if self.cfg_params.has_key('CMSSW.lumis_per_job'): self.lumisPerJob = int(self.cfg_params['CMSSW.lumis_per_job']) self.limitJobLumis = True settings += 1 if self.cfg_params.has_key('CMSSW.number_of_jobs'): self.theNumberOfJobs = int(self.cfg_params['CMSSW.number_of_jobs']) self.limitNJobs = True settings += 1 if self.cfg_params.has_key('CMSSW.total_number_of_lumis'): self.totalNLumis = int( self.cfg_params['CMSSW.total_number_of_lumis']) self.limitTotalLumis = (self.totalNLumis != -1) settings += 1 if settings != 2: msg = 'When splitting by lumi section you must specify two and only two of:\n' msg += ' number_of_jobs, lumis_per_job, total_number_of_lumis' raise CrabException(msg) if self.limitNJobs and self.limitJobLumis: self.limitTotalLumis = True self.totalNLumis = self.lumisPerJob * self.theNumberOfJobs # Has the user specified runselection? if (self.cfg_params.has_key('CMSSW.runselection')): common.logger.info( 'You have specified runselection and split by lumi.') common.logger.info( 'Good lumi list will be the intersection of runselection and lumimask or ADS (if any).' ) return def ComputeSubBlockSites(self, blockSites): """ """ sub_blockSites = {} for k, v in blockSites.iteritems(): sites = self.blackWhiteListParser.checkWhiteList(v) if sites: sub_blockSites[k] = v if len(sub_blockSites) < 1: msg = 'WARNING: the sites %s is not hosting any part of data.' % self.seWhiteList raise CrabException(msg) return sub_blockSites ######################################################################## def jobSplittingByEvent(self): """ Perform job splitting. Jobs run over an integer number of files and no more than one block. ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs, self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs, self.maxEvents, self.filesbyblock SETS: jobDestination - Site destination(s) for each job (a list of lists) self.total_number_of_jobs - Total # of jobs self.list_of_args - File(s) job will run on (a list of lists) """ jobDestination = [] self.checkUserSettings() if ((self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2): msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.' raise CrabException(msg) blockSites = self.args['blockSites'] pubdata = self.args['pubdata'] filesbyblock = pubdata.getFiles() self.eventsbyblock = pubdata.getEventsPerBlock() self.eventsbyfile = pubdata.getEventsPerFile() self.parentFiles = pubdata.getParent() ## get max number of events self.maxEvents = pubdata.getMaxEvents() self.useParent = int(self.cfg_params.get('CMSSW.use_parent', 0)) noBboundary = int(self.cfg_params.get('CMSSW.no_block_boundary', 0)) if noBboundary == 1: if self.total_number_of_events == -1: msg = 'You are selecting no_block_boundary=1 which does not allow to set total_number_of_events=-1\n' msg += '\tYou shoud get the number of event from DBS web interface and use it for your configuration.' raise CrabException(msg) if len(self.seWhiteList) != 1: msg = 'You are selecting no_block_boundary=1 which requires to choose one and only one site.\n' msg += "\tPlease set se_white_list with the site's storage element name." raise CrabException(msg) blockSites = self.ComputeSubBlockSites(blockSites) # ---- Handle the possible job splitting configurations ---- # if (self.selectTotalNumberEvents): totalEventsRequested = self.total_number_of_events if (self.selectEventsPerJob): eventsPerJobRequested = self.eventsPerJob if (self.selectNumberOfJobs): totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob # If user requested all the events in the dataset if (totalEventsRequested == -1): eventsRemaining = self.maxEvents # If user requested more events than are in the dataset elif (totalEventsRequested > self.maxEvents): eventsRemaining = self.maxEvents common.logger.info("Requested " + str(self.total_number_of_events) + " events, but only " + str(self.maxEvents) + " events are available.") # If user requested less events than are in the dataset else: eventsRemaining = totalEventsRequested # If user requested more events per job than are in the dataset if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents): eventsPerJobRequested = self.maxEvents # For user info at end totalEventCount = 0 if (self.selectTotalNumberEvents and self.selectNumberOfJobs): eventsPerJobRequested = int(eventsRemaining / self.theNumberOfJobs) if (self.selectNumberOfJobs): common.logger.info( "May not create the exact number_of_jobs requested.") if (self.theNumberOfJobs < 0): common.logger.info( "ERROR: Negative number_of_jobs requested. Will result in no jobs." ) # old... to remove Daniele totalNumberOfJobs = 999999999 blocks = blockSites.keys() blockCount = 0 # Backup variable in case self.maxEvents counted events in a non-included block numBlocksInDataset = len(blocks) jobCount = 0 list_of_lists = [] # list tracking which jobs are in which jobs belong to which block jobsOfBlock = {} parString = "" pString = "" filesEventCount = 0 msg = '' # ---- Iterate over the blocks in the dataset until ---- # # ---- we've met the requested total # of events ---- # while ((eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)): block = blocks[blockCount] blockCount += 1 if block not in jobsOfBlock.keys(): jobsOfBlock[block] = [] if self.eventsbyblock.has_key(block): numEventsInBlock = self.eventsbyblock[block] common.logger.debug('Events in Block File ' + str(numEventsInBlock)) files = filesbyblock[block] numFilesInBlock = len(files) if (numFilesInBlock <= 0): continue fileCount = 0 if noBboundary == 0: # DD # ---- New block => New job ---- # parString = "" pString = "" # counter for number of events in files currently worked on filesEventCount = 0 # flag if next while loop should touch new file newFile = 1 # job event counter jobSkipEventCount = 0 # ---- Iterate over the files in the block until we've met the requested ---- # # ---- total # of events or we've gone over all the files in this block ---- # msg = '\n' while ((eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs)): file = files[fileCount] if self.useParent == 1: parent = self.parentFiles[file] common.logger.log( 10 - 1, "File " + str(file) + " has the following parents: " + str(parent)) if newFile: try: numEventsInFile = self.eventsbyfile[file] common.logger.log( 10 - 1, "File " + str(file) + " has " + str(numEventsInFile) + " events") # increase filesEventCount filesEventCount += numEventsInFile # Add file to current job parString += file + ',' if self.useParent == 1: for f in parent: pString += f + ',' newFile = 0 except KeyError: common.logger.info( "File " + str(file) + " has unknown number of events: skipping") eventsPerJobRequested = min(eventsPerJobRequested, eventsRemaining) # if less events in file remain than eventsPerJobRequested if (filesEventCount - jobSkipEventCount < eventsPerJobRequested): if noBboundary == 1: ## DD newFile = 1 fileCount += 1 else: # if last file in block if (fileCount == numFilesInBlock - 1): # end job using last file, use remaining events in block # close job and touch new file fullString = parString[:-1] if self.useParent == 1: fullParentString = pString[:-1] list_of_lists.append([ fullString, fullParentString, str(-1), str(jobSkipEventCount), block ]) else: list_of_lists.append([ fullString, str(-1), str(jobSkipEventCount), block ]) msg += "Job %s can run over %s events (last file in block).\n" % ( str(jobCount + 1), str(filesEventCount - jobSkipEventCount)) jobDestination.append(blockSites[block]) msg += "Job %s Destination: %s\n" % ( str(jobCount + 1), str(SE2CMS(jobDestination[jobCount]))) # fill jobs of block dictionary jobsOfBlock[block].append(jobCount + 1) # reset counter jobCount = jobCount + 1 totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount jobSkipEventCount = 0 # reset file pString = "" parString = "" filesEventCount = 0 newFile = 1 fileCount += 1 else: # go to next file newFile = 1 fileCount += 1 # if events in file equal to eventsPerJobRequested elif (filesEventCount - jobSkipEventCount == eventsPerJobRequested): # close job and touch new file fullString = parString[:-1] if self.useParent == 1: fullParentString = pString[:-1] list_of_lists.append([ fullString, fullParentString, str(eventsPerJobRequested), str(jobSkipEventCount), block ]) else: list_of_lists.append([ fullString, str(eventsPerJobRequested), str(jobSkipEventCount), block ]) msg += "Job %s can run over %s events.\n" % ( str(jobCount + 1), str(eventsPerJobRequested)) jobDestination.append(blockSites[block]) msg += "Job %s Destination: %s\n" % ( str(jobCount + 1), str(SE2CMS(jobDestination[jobCount]))) jobsOfBlock[block].append(jobCount + 1) # reset counter jobCount = jobCount + 1 totalEventCount = totalEventCount + eventsPerJobRequested eventsRemaining = eventsRemaining - eventsPerJobRequested jobSkipEventCount = 0 # reset file pString = "" parString = "" filesEventCount = 0 newFile = 1 fileCount += 1 # if more events in file remain than eventsPerJobRequested else: # close job but don't touch new file fullString = parString[:-1] if self.useParent == 1: fullParentString = pString[:-1] list_of_lists.append([ fullString, fullParentString, str(eventsPerJobRequested), str(jobSkipEventCount), block ]) else: list_of_lists.append([ fullString, str(eventsPerJobRequested), str(jobSkipEventCount), block ]) msg += "Job %s can run over %s events.\n" % ( str(jobCount + 1), str(eventsPerJobRequested)) jobDestination.append(blockSites[block]) msg += "Job %s Destination: %s\n" % ( str(jobCount + 1), str(SE2CMS(jobDestination[jobCount]))) jobsOfBlock[block].append(jobCount + 1) # increase counter jobCount = jobCount + 1 totalEventCount = totalEventCount + eventsPerJobRequested eventsRemaining = eventsRemaining - eventsPerJobRequested # calculate skip events for last file # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest jobSkipEventCount = eventsPerJobRequested - ( filesEventCount - jobSkipEventCount - self.eventsbyfile[file]) # remove all but the last file filesEventCount = self.eventsbyfile[file] pString_tmp = '' if self.useParent == 1: for f in parent: pString_tmp += f + ',' pString = pString_tmp parString = file + ',' pass # END if pass # END while (iterate over files in the block) pass # END while (iterate over blocks in the dataset) common.logger.debug(msg) self.ncjobs = self.total_number_of_jobs = jobCount if (eventsRemaining > 0 and jobCount < totalNumberOfJobs): common.logger.info( "Could not run on all requested events because some blocks not hosted at allowed sites." ) common.logger.info( str(jobCount) + " job(s) can run on " + str(totalEventCount) + " events.\n") # skip check on block with no sites DD if noBboundary == 0: self.checkBlockNoSite(blocks, jobsOfBlock, blockSites) # prepare dict output dictOut = {} dictOut['params'] = [ 'InputFiles', 'MaxEvents', 'SkipEvents', 'InputBlocks' ] if self.useParent: dictOut['params'] = [ 'InputFiles', 'ParentFiles', 'MaxEvents', 'SkipEvents', 'InputBlocks' ] dictOut['args'] = list_of_lists dictOut['jobDestination'] = jobDestination dictOut['njobs'] = self.total_number_of_jobs return dictOut # keep trace of block with no sites to print a warning at the end def checkBlockNoSite(self, blocks, jobsOfBlock, blockSites): # screen output screenOutput = "List of jobs and available destination sites:\n\n" noSiteBlock = [] bloskNoSite = [] allBlock = [] blockCounter = 0 saveFblocks = '' for block in blocks: if block in jobsOfBlock.keys(): blockCounter += 1 allBlock.append(blockCounter) sites = self.blackWhiteListParser.checkWhiteList( self.blackWhiteListParser.checkBlackList( blockSites[block], [block]), [block]) screenOutput += "Block %5i: jobs %20s: sites: %s\n" % ( blockCounter, spanRanges(jobsOfBlock[block]), ', '.join( SE2CMS(sites))) if len(sites) == 0: noSiteBlock.append(spanRanges(jobsOfBlock[block])) bloskNoSite.append(blockCounter) else: saveFblocks += str(block) + '\n' writeTXTfile(self, self.fileBlocks_FileName, saveFblocks) common.logger.info(screenOutput) if len(noSiteBlock) > 0 and len(bloskNoSite) > 0: msg = 'WARNING: No sites are hosting any part of data for block:\n ' virgola = "" if len(bloskNoSite) > 1: virgola = "," for block in bloskNoSite: msg += ' ' + str(block) + virgola msg += '\n\t\tRelated jobs:\n ' virgola = "" if len(noSiteBlock) > 1: virgola = "," for range_jobs in noSiteBlock: msg += str(range_jobs) + virgola msg += '\n\t\twill not be submitted and this block of data can not be analyzed!\n' if self.cfg_params.has_key('GRID.se_white_list'): msg += '\tWARNING: SE White List: ' + self.cfg_params[ 'GRID.se_white_list'] + '\n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if the dataset is available at this site!)' if self.cfg_params.has_key('GRID.ce_white_list'): msg += '\tWARNING: CE White List: ' + self.cfg_params[ 'GRID.ce_white_list'] + '\n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if the dataset is available at this site!)\n' common.logger.info(msg) if bloskNoSite == allBlock: msg = 'Requested jobs cannot be Created! \n' if self.cfg_params.has_key('GRID.se_white_list'): msg += '\tWARNING: SE White List: ' + self.cfg_params[ 'GRID.se_white_list'] + '\n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if the dataset is available at this site!)' if self.cfg_params.has_key('GRID.ce_white_list'): msg += '\tWARNING: CE White List: ' + self.cfg_params[ 'GRID.ce_white_list'] + '\n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if the dataset is available at this site!)\n' raise CrabException(msg) return ######################################################################## def jobSplittingByRun(self): """ """ self.checkUserSettings() blockSites = self.args['blockSites'] pubdata = self.args['pubdata'] if self.selectNumberOfJobs == 0: self.theNumberOfJobs = 9999999 blocks = {} runList = [] thefiles = Fileset(name='FilesToSplit') fileList = pubdata.getListFiles() for f in fileList: block = f['Block']['Name'] try: f['Block']['StorageElementList'].extend(blockSites[block]) except: continue wmbsFile = File(f['LogicalFileName']) if not blockSites[block]: msg = 'WARNING: No sites are hosting any part of data for block: %s\n' % block msg += 'Related jobs will not be submitted and this block of data can not be analyzed' common.logger.debug(msg) [wmbsFile['locations'].add(x) for x in blockSites[block]] wmbsFile['block'] = block runNum = f['RunsList'][0]['RunNumber'] runList.append(runNum) myRun = Run(runNumber=runNum) wmbsFile.addRun(myRun) thefiles.addFile(wmbsFile) work = Workflow() subs = Subscription(fileset=thefiles, workflow=work, split_algo='RunBased', type="Processing") splitter = SplitterFactory() jobfactory = splitter(subs) #loop over all runs list_of_lists = [] jobDestination = [] list_of_blocks = [] count = 0 for jobGroup in jobfactory(): if count < self.theNumberOfJobs: res = self.getJobInfo(jobGroup) parString = '' for file in res['lfns']: parString += file + ',' list_of_blocks.append(res['block']) fullString = parString[:-1] blockString = ','.join(list_of_blocks) list_of_lists.append( [fullString, str(-1), str(0), blockString]) #need to check single file location jobDestination.append(res['locations']) count += 1 # prepare dict output dictOut = {} dictOut['params'] = [ 'InputFiles', 'MaxEvents', 'SkipEvents', 'InputBlocks' ] dictOut['args'] = list_of_lists dictOut['jobDestination'] = jobDestination dictOut['njobs'] = count self.cacheBlocks(list_of_blocks, jobDestination) return dictOut def getJobInfo(self, jobGroup): res = {} lfns = [] locations = [] tmp_check = 0 for job in jobGroup.jobs: for file in job.getFiles(): lfns.append(file['lfn']) for loc in file['locations']: if tmp_check < 1: locations.append(loc) res['block'] = file['block'] tmp_check = tmp_check + 1 res['lfns'] = lfns res['locations'] = locations return res ######################################################################## def prepareSplittingNoInput(self): """ """ if (self.selectEventsPerJob): common.logger.info('Required ' + str(self.eventsPerJob) + ' events per job ') if (self.selectNumberOfJobs): common.logger.info('Required ' + str(self.theNumberOfJobs) + ' jobs in total ') if (self.selectTotalNumberEvents): common.logger.info('Required ' + str(self.total_number_of_events) + ' events in total ') if (self.total_number_of_events < 0): msg = 'Cannot split jobs per Events with "-1" as total number of events' raise CrabException(msg) if (self.selectEventsPerJob): if (self.selectTotalNumberEvents): self.total_number_of_jobs = int(self.total_number_of_events / self.eventsPerJob) elif (self.selectNumberOfJobs): self.total_number_of_jobs = self.theNumberOfJobs self.total_number_of_events = int(self.theNumberOfJobs * self.eventsPerJob) elif (self.selectNumberOfJobs): self.total_number_of_jobs = self.theNumberOfJobs self.eventsPerJob = int(self.total_number_of_events / self.total_number_of_jobs) def jobSplittingNoInput(self): """ Perform job splitting based on number of event per job """ common.logger.debug('Splitting per events') self.checkUserSettings() jobDestination = [] if ((self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2): msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.' raise CrabException(msg) managedGenerators = self.args['managedGenerators'] generator = self.args['generator'] firstLumi = int(self.cfg_params.get('CMSSW.first_lumi', 1)) self.prepareSplittingNoInput() common.logger.debug('N jobs ' + str(self.total_number_of_jobs)) # is there any remainder? check = int(self.total_number_of_events) - ( int(self.total_number_of_jobs) * self.eventsPerJob) common.logger.debug('Check ' + str(check)) common.logger.info( str(self.total_number_of_jobs) + ' jobs can be created, each for ' + str(self.eventsPerJob) + ' for a total of ' + str(self.total_number_of_jobs * self.eventsPerJob) + ' events') if check > 0: common.logger.info( 'Warning: asked ' + str(self.total_number_of_events) + ' but can do only ' + str(int(self.total_number_of_jobs) * self.eventsPerJob)) # argument is seed number.$i self.list_of_args = [] for i in range(self.total_number_of_jobs): ## Since there is no input, any site is good jobDestination.append( [""]) # must be empty to correctly write the XML args = [] if (firstLumi): # Pythia first lumi args.append(str(int(firstLumi) + i)) if (generator in managedGenerators): args.append(generator) if (generator == 'comphep' and i == 0): # COMPHEP is brain-dead and wants event #'s like 1,100,200,300 args.append('1') else: args.append(str(i * self.eventsPerJob)) args.append(str(self.eventsPerJob)) self.list_of_args.append(args) # prepare dict output dictOut = {} dictOut['params'] = ['MaxEvents'] if (firstLumi): dictOut['params'] = ['FirstLumi', 'MaxEvents'] if (generator in managedGenerators): dictOut['params'] = [ 'FirstLumi', 'Generator', 'FirstEvent', 'MaxEvents' ] else: if (generator in managedGenerators): dictOut['params'] = ['Generator', 'FirstEvent', 'MaxEvents'] dictOut['args'] = self.list_of_args dictOut['jobDestination'] = jobDestination dictOut['njobs'] = self.total_number_of_jobs return dictOut def jobSplittingForScript(self): """ Perform job splitting based on number of job """ self.checkUserSettings() if (self.selectNumberOfJobs == 0): msg = 'must specify number_of_jobs.' raise crabexception(msg) jobDestination = [] common.logger.debug('Splitting per job') common.logger.info('Required ' + str(self.theNumberOfJobs) + ' jobs in total ') # self.total_number_of_jobs = self.theNumberOfJobs self.prepareSplittingNoInput() common.logger.debug('N jobs ' + str(self.total_number_of_jobs)) common.logger.info( str(self.total_number_of_jobs) + ' jobs can be created') # argument is seed number.$i self.list_of_args = [] for i in range(self.total_number_of_jobs): args = [] jobDestination.append([""]) if self.eventsPerJob != 0: args.append(str(self.eventsPerJob)) self.list_of_args.append(args) # prepare dict output dictOut = {} dictOut['params'] = ['MaxEvents'] dictOut['args'] = self.list_of_args dictOut['jobDestination'] = jobDestination dictOut['njobs'] = self.total_number_of_jobs return dictOut def jobSplittingByLumi(self): """ Split task into jobs by Lumi section paying attention to which lumis should be run (according to the analysis dataset). This uses WMBS job splitting which does not split files over jobs so the job will have AT LEAST as many lumis as requested, perhaps more """ self.useParent = int(self.cfg_params.get('CMSSW.use_parent', 0)) common.logger.debug('Splitting by Lumi') self.checkLumiSettings() blockSites = self.args['blockSites'] pubdata = self.args['pubdata'] lumisPerFile = pubdata.getLumis() self.parentFiles = pubdata.getParent() # Make the list of WMBS files for job splitter fileList = pubdata.getListFiles() wmFileList = [] for jobFile in fileList: block = jobFile['Block']['Name'] try: jobFile['Block']['StorageElementList'].extend( blockSites[block]) except: continue wmbsFile = File(jobFile['LogicalFileName']) if not blockSites[block]: msg = 'WARNING: No sites are hosting any part of data for block: %s\n' % block msg += 'Related jobs will not be submitted and this block of data can not be analyzed' common.logger.debug(msg) # wmbsFile['locations'].add('Nowhere') [wmbsFile['locations'].add(x) for x in blockSites[block]] wmbsFile['block'] = block for lumi in lumisPerFile[jobFile['LogicalFileName']]: wmbsFile.addRun(Run(lumi[0], lumi[1])) wmFileList.append(wmbsFile) fileSet = set(wmFileList) thefiles = Fileset(name='FilesToSplit', files=fileSet) # Create the factory and workflow work = Workflow() subs = Subscription(fileset=thefiles, workflow=work, split_algo='LumiBased', type="Processing") splitter = SplitterFactory() jobFactory = splitter(subs) list_of_lists = [] jobDestination = [] jobCount = 0 lumisCreated = 0 list_of_blocks = [] if not self.limitJobLumis: if self.totalNLumis > 0: self.lumisPerJob = max( self.totalNLumis // self.theNumberOfJobs, 1) else: self.lumisPerJob = pubdata.getMaxLumis( ) // self.theNumberOfJobs + 1 common.logger.info('Each job will process about %s lumis.' % self.lumisPerJob) for jobGroup in jobFactory(lumis_per_job=self.lumisPerJob): for job in jobGroup.jobs: if (self.limitNJobs and jobCount >= self.theNumberOfJobs): common.logger.info('Requested number of jobs reached.') break if (self.limitTotalLumis and lumisCreated >= self.totalNLumis): common.logger.info('Requested number of lumis reached.') break lumis = [] lfns = [] if self.useParent == 1: parentlfns = [] pString = "" locations = [] blocks = [] firstFile = True # Collect information from all the files for jobFile in job.getFiles(): doFile = False if firstFile: # Get locations from first file in the job for loc in jobFile['locations']: locations.append(loc) blocks.append(jobFile['block']) firstFile = False # Accumulate Lumis from all files for lumiList in jobFile['runs']: theRun = lumiList.run for theLumi in list(lumiList): if (not self.limitTotalLumis) or \ (lumisCreated < self.totalNLumis): doFile = True lumisCreated += 1 lumis.append((theRun, theLumi)) if doFile: lfns.append(jobFile['lfn']) if self.useParent == 1: parent = self.parentFiles[jobFile['lfn']] for p in parent: pString += p + ',' fileString = ','.join(lfns) lumiLister = LumiList(lumis=lumis) lumiString = lumiLister.getCMSSWString() blockString = ','.join(blocks) if self.useParent == 1: common.logger.debug("Files: " + fileString + " with the following parents: " + pString[:-1]) pfileString = pString[:-1] list_of_lists.append([ fileString, pfileString, str(-1), str(0), lumiString, blockString ]) else: list_of_lists.append( [fileString, str(-1), str(0), lumiString, blockString]) list_of_blocks.append(blocks) jobDestination.append(locations) jobCount += 1 common.logger.debug( 'Job %s will run on %s files and %s lumis ' % (jobCount, len(lfns), len(lumis))) common.logger.info('%s jobs created to run on %s lumis' % (jobCount, lumisCreated)) # Prepare dict output matching back to non-WMBS job creation dictOut = {} dictOut['params'] = [ 'InputFiles', 'MaxEvents', 'SkipEvents', 'Lumis', 'InputBlocks' ] if self.useParent == 1: dictOut['params'] = [ 'InputFiles', 'ParentFiles', 'MaxEvents', 'SkipEvents', 'Lumis', 'InputBlocks' ] dictOut['args'] = list_of_lists dictOut['jobDestination'] = jobDestination dictOut['njobs'] = jobCount self.cacheBlocks(list_of_blocks, jobDestination) return dictOut def cacheBlocks(self, blocks, destinations): saveFblocks = '' for i in range(len(blocks)): sites = self.blackWhiteListParser.checkWhiteList( self.blackWhiteListParser.checkBlackList(destinations[i])) if len(sites) != 0: for block in blocks[i]: saveFblocks += str(block) + '\n' writeTXTfile(self, self.fileBlocks_FileName, saveFblocks) def Algos(self): """ Define key splittingType matrix """ SplitAlogs = { 'EventBased': self.jobSplittingByEvent, 'RunBased': self.jobSplittingByRun, 'LumiBased': self.jobSplittingByLumi, 'NoInput': self.jobSplittingNoInput, 'ForScript': self.jobSplittingForScript } return SplitAlogs
class JdlWriter(Actor): def __init__(self, cfg_params, jobs): self.cfg_params = cfg_params self.nj_list = jobs from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser seWhiteList = cfg_params.get('GRID.se_white_list', []) seBlackList = cfg_params.get('GRID.se_black_list', []) self.blackWhiteListParser = SEBlackWhiteListParser( seWhiteList, seBlackList, common.logger()) self.datasetpath = self.cfg_params['CMSSW.datasetpath'] if string.lower(self.datasetpath) == 'none': self.datasetpath = None return def run(self): """ The main method of the class: write JDL for jobs in range self.nj_list """ common.logger.debug("JdlWriter::run() called") start = time.time() jobs_L = self.listOfjobs() self.writer(jobs_L) stop = time.time() common.logger.log(10 - 1, "JDL writing time :" + str(stop - start)) return def listOfjobs(self): ### define here the list of distinct destinations sites list distinct_dests = common._db.queryDistJob_Attr('dlsDestination', 'jobId', self.nj_list) ### define here the list of jobs Id for each distinct list of sites self.sub_jobs = [] # list of jobs Id list to submit jobs_to_match = [] # list of jobs Id to match all_jobs = [] count = 0 for distDest in distinct_dests: dest = self.blackWhiteListParser.cleanForBlackWhiteList(distDest) if not dest and self.datasetpath: common.logger.info( 'No destination available: will not create jdl \n') continue all_jobs.append( common._db.queryAttrJob({'dlsDestination': distDest}, 'jobId')) sub_jobs_temp = [] for i in self.nj_list: if i in all_jobs[count]: sub_jobs_temp.append(i) if len(sub_jobs_temp) > 0: self.sub_jobs.append(sub_jobs_temp) count += 1 return self.sub_jobs def writer(self, list): """ Materialize JDL into file """ if len(list) == 0: common.logger.info( 'No destination available for any job: will not create jdl \n') task = common._db.getTask() c1 = 1 c2 = 1 for sub_list in list: jdl = common.scheduler.writeJDL(sub_list, task) for stri in jdl: #self.jdlFile='File-'+str(c1)+'_'+str(c2)+'.jdl' self.jdlFile = 'File-' + str(c1) + '_' + str(c2) + '.jdl' j_file = open( common.work_space.shareDir() + '/' + self.jdlFile, 'w') j_file.write(stri) j_file.close() c2 += 1 c1 += 1 common.logger.info('JDL files are written to ' + str(common.work_space.shareDir()) + 'File-*.jdl \n') return
def configure(self, cfg_params): self._boss.configure(cfg_params) self.CRAB_useServer = cfg_params.get('CRAB.use_server', 0) self.CRAB_serverName = cfg_params.get('CRAB.server_name', None) seWhiteList = cfg_params.get('GRID.se_white_list', []) seBlackList = cfg_params.get('GRID.se_black_list', []) self.dontCheckMyProxy = int( cfg_params.get("GRID.dont_check_myproxy", 0)) self.EDG_requirements = cfg_params.get('GRID.requirements', None) self.EDG_addJdlParam = cfg_params.get('GRID.additional_jdl_parameters', None) if (self.EDG_addJdlParam): self.EDG_addJdlParam = string.split(self.EDG_addJdlParam, ';') self.pset = cfg_params.get('CMSSW.pset', None) self.blackWhiteListParser = SEBlackWhiteListParser( seWhiteList, seBlackList, common.logger()) self.return_data = int(cfg_params.get('USER.return_data', 0)) self.copy_data = int(cfg_params.get('USER.copy_data', 0)) self.publish_data = cfg_params.get("USER.publish_data", 0) self.local_stage = int(cfg_params.get('USER.local_stage_out', 0)) self.check_RemoteDir = int( cfg_params.get('USER.check_user_remote_dir', 1)) if int(self.copy_data) == 1: self.SE = cfg_params.get('USER.storage_element', None) if not self.SE: msg = "Error. The [USER] section does not have 'storage_element'" common.logger.info(msg) raise CrabException(msg) if (int(self.return_data) == 0 and int(self.copy_data) == 0): msg = 'Error: return_data = 0 and copy_data = 0 ==> your exe output will be lost\n' msg = msg + 'Please modify return_data and copy_data value in your crab.cfg file\n' raise CrabException(msg) if (int(self.return_data) == 1 and int(self.copy_data) == 1): msg = 'Error: return_data and copy_data cannot be set both to 1\n' msg = msg + 'Please modify return_data or copy_data value in your crab.cfg file\n' raise CrabException(msg) if (int(self.copy_data) == 0 and int(self.local_stage) == 1): msg = 'Error: copy_data = 0 and local_stage_out = 1.\n' msg += 'To enable local stage out the copy_data value has to be = 1\n' msg = msg + 'Please modify copy_data value in your crab.cfg file\n' raise CrabException(msg) if (int(self.copy_data) == 0 and int(self.publish_data) == 1): msg = 'Error: publish_data = 1 must be used with copy_data = 1\n' msg = msg + 'Please modify copy_data value in your crab.cfg file\n' common.logger.info(msg) raise CrabException(msg) if (int(self.publish_data) == 1 and self._name == 'lsf'): msg = 'Error: data publication is not allowed with lsf scheduler, but only with grid scheduler or caf\n' common.logger.info(msg) raise CrabException(msg) self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper', 0)) self.debugWrap = '' if self.debug_wrapper == 1: self.debugWrap = '--debug' self.loc_stage_out = '' if (int(self.local_stage) == 1): self.debugWrap = '--debug' self.loc_stage_out = '--local_stage' # Time padding for minimal job duration. self.minimal_job_duration = 10 self.proxyValid = 0 self.dontCheckProxy = int(cfg_params.get("GRID.dont_check_proxy", 0)) self.proxyServer = 'myproxy.cern.ch' self.group = cfg_params.get("GRID.group", None) self.role = cfg_params.get("GRID.role", None) self.checkProxy() return
class Submitter(Actor): def __init__(self, cfg_params, parsed_range, val): self.cfg_params = cfg_params self.limitJobs = True # get user request self.nsjobs = -1 self.chosenJobsList = None if val: if val == 'range': # for Resubmitter self.chosenJobsList = parsed_range elif val == 'all': pass elif (type(eval(val)) is int) and eval(val) > 0: # positive number self.nsjobs = eval(val) elif (type(eval(val)) is tuple) or (type(eval(val)) is int and eval(val) < 0): self.chosenJobsList = parsed_range self.nsjobs = len(self.chosenJobsList) else: msg = 'Bad submission option <' + str(val) + '>\n' msg += ' Must be an integer or "all"' msg += ' Generic range is not allowed"' raise CrabException(msg) pass self.seWhiteList = cfg_params.get('GRID.se_white_list', []) self.seBlackList = cfg_params.get('GRID.se_black_list', []) self.datasetPath = self.cfg_params['CMSSW.datasetpath'] if string.lower(self.datasetPath) == 'none': self.datasetPath = None self.scram = Scram.Scram(cfg_params) return #wmbs def BuildJobList(self, type=0): # total jobs nj_list = [] self.complete_List = common._db.nJobs('list') if type == 1: self.nj_list = [] if self.chosenJobsList: self.nj_list = self.chosenJobsList return # build job list from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser self.blackWhiteListParser = SEBlackWhiteListParser( self.seWhiteList, self.seBlackList, common.logger()) common.logger.debug('nsjobs ' + str(self.nsjobs)) # get the first not already submitted common.logger.debug('Total jobs ' + str(len(self.complete_List))) jobSetForSubmission = 0 jobSkippedInSubmission = [] tmp_jList = self.complete_List if self.chosenJobsList != None: tmp_jList = self.chosenJobsList for job in common._db.getTask(tmp_jList).jobs: cleanedBlackWhiteList = self.blackWhiteListParser.cleanForBlackWhiteList( job['dlsDestination']) if (cleanedBlackWhiteList != '') or (self.datasetPath == None): #if ( job.runningJob['status'] in ['C','RC'] and job.runningJob['statusScheduler'] in ['Created',None]): if (job.runningJob['state'] in ['Created']): jobSetForSubmission += 1 nj_list.append(job['id']) else: continue else: jobSkippedInSubmission.append(job['id']) if self.nsjobs > 0 and self.nsjobs == jobSetForSubmission: break pass if self.nsjobs > jobSetForSubmission: common.logger.info('asking to submit '+str(self.nsjobs)+' jobs, but only '+\ str(jobSetForSubmission)+' left: submitting those') if len(jobSkippedInSubmission) > 0: mess = "" for jobs in jobSkippedInSubmission: mess += str(jobs) + "," common.logger.info( "Jobs: " + str(mess) + "\n\tskipped because no sites are hosting this data\n") self.submissionError() pass # submit N from last submitted job common.logger.debug('nj_list ' + str(nj_list)) self.nj_list = nj_list if self.limitJobs and len(self.nj_list) > 500: ###### FEDE FOR BUG 85243 ############## msg = "The CRAB client will not submit task with more than 500 jobs.\n" msg += " Use the server mode or submit your jobs in smaller groups" raise CrabException(msg) ######################################## return def run(self): """ The main method of the class: submit jobs in range self.nj_list """ common.logger.debug("Submitter::run() called") start = time.time() self.BuildJobList() check = self.checkIfCreate() if check == 0: self.SendMLpre() list_matched, task = self.performMatch() njs = self.perfromSubmission(list_matched, task) stop = time.time() common.logger.debug("Submission Time: " + str(stop - start)) msg = 'Total of %d jobs submitted' % njs if njs != len(self.nj_list): msg += ' (from %d requested).' % (len(self.nj_list)) else: msg += '.' common.logger.info(msg) if (njs < len(self.nj_list) or len(self.nj_list) == 0): self.submissionError() #wmbs def checkIfCreate(self, type=0): """ """ code = 0 task = common._db.getTask() if type == 1 and len(task.jobs) == 0: if task['jobType'] == 'Submitted': common.logger.info( "No Request to be submitted: first create it.\n") code = 1 else: totalCreatedJobs = 0 for job in task.jobs: if job.runningJob['state'] == 'Created': totalCreatedJobs += 1 if (totalCreatedJobs == 0): common.logger.info( "No jobs to be submitted: first create them") code = 1 return code def performMatch(self): """ """ common.logger.info("Checking available resources...") ### define here the list of distinct destinations sites list distinct_dests = common._db.queryDistJob_Attr('dlsDestination', 'jobId', self.nj_list) ### define here the list of jobs Id for each distinct list of sites self.sub_jobs = [] # list of jobs Id list to submit jobs_to_match = [] # list of jobs Id to match all_jobs = [] count = 0 for distDest in distinct_dests: all_jobs.append( common._db.queryAttrJob({'dlsDestination': distDest}, 'jobId')) sub_jobs_temp = [] for i in self.nj_list: if i in all_jobs[count]: sub_jobs_temp.append(i) if len(sub_jobs_temp) > 0: self.sub_jobs.append(sub_jobs_temp) jobs_to_match.append(self.sub_jobs[count][0]) count += 1 sel = 0 matched = [] task = common._db.getTask() for id_job in jobs_to_match: match = common.scheduler.listMatch(distinct_dests[sel], False) if len(match) > 0: common.logger.info("Found compatible site(s) for job " + str(id_job)) matched.append(sel) else: common.logger.info( "No compatible site found, will not submit jobs " + str(self.sub_jobs[sel])) self.submissionError() sel += 1 return matched, task def perfromSubmission(self, matched, task): njs = 0 ### Progress Bar indicator, deactivate for debug if common.debugLevel == 0: term = TerminalController() if len(matched) > 0: common.logger.info( str(len(matched)) + " blocks of jobs will be submitted") common.logger.debug("Delegating proxy ") try: common.scheduler.delegateProxy() except CrabException: common.logger.debug("Proxy delegation failed ") for ii in matched: common.logger.debug('Submitting jobs ' + str(self.sub_jobs[ii])) # fix arguments for unique naming of the output common._db.updateResubAttribs(self.sub_jobs[ii]) try: common.scheduler.submit(self.sub_jobs[ii], task) except CrabException: common.logger.debug( 'common.scheduler.submit exception. Job(s) possibly not submitted' ) raise CrabException("Job not submitted") if common.debugLevel == 0: try: pbar = ProgressBar( term, 'Submitting ' + str(len(self.sub_jobs[ii])) + ' jobs') except: pbar = None if common.debugLevel == 0: if pbar: pbar.update( float(ii + 1) / float(len(self.sub_jobs)), 'please wait') ### check the if the submission succeded Maybe not needed or at least simplified sched_Id = common._db.queryRunJob('schedulerId', self.sub_jobs[ii]) listId = [] run_jobToSave = {'status': 'S'} listRunField = [] for j in range(len(self.sub_jobs[ii])): if str(sched_Id[j]) != '': listId.append(self.sub_jobs[ii][j]) listRunField.append(run_jobToSave) common.logger.debug("Submitted job # " + str(self.sub_jobs[ii][j])) njs += 1 common._db.updateRunJob_(listId, listRunField) self.stateChange(listId, "SubSuccess") self.SendMLpost(self.sub_jobs[ii]) else: common.logger.info("The whole task doesn't found compatible site ") return njs def submissionError(self): ## add some more verbose message in case submission is not complete msg = 'Submission performed using the Requirements: \n' ### TODO_ DS--BL #msg += common.taskDB.dict("jobtype")+' version: '+common.taskDB.dict("codeVersion")+'\n' #msg += '(Hint: please check if '+common.taskDB.dict("jobtype")+' is available at the Sites)\n' if self.cfg_params.has_key('GRID.se_white_list'): msg += '\tSE White List: ' + self.cfg_params[ 'GRID.se_white_list'] + '\n' if self.cfg_params.has_key('GRID.se_black_list'): msg += '\tSE Black List: ' + self.cfg_params[ 'GRID.se_black_list'] + '\n' if self.cfg_params.has_key('GRID.ce_white_list'): msg += '\tCE White List: ' + self.cfg_params[ 'GRID.ce_white_list'] + '\n' if self.cfg_params.has_key('GRID.ce_black_list'): msg += '\tCE Black List: ' + self.cfg_params[ 'GRID.ce_black_list'] + '\n' removeDefBL = self.cfg_params.get('GRID.remove_default_blacklist', 0) if removeDefBL == '0': msg += '\tNote: All CMS T1s are BlackListed by default \n' msg += '\t(Hint: By whitelisting you force the job to run at this particular site(s).\n' msg += '\tPlease check if:\n' msg += '\t\t -- the dataset is available at this site\n' msg += '\t\t -- the CMSSW version is available at this site\n' msg += '\t\t -- grid submission to CERN & FNAL CAFs is not allowed)\n' msg += '\tPlease also look at the Site Status Page for CMS sites,\n' msg += '\t to check if the sites hosting your data are ok\n' msg += '\t http://dashb-ssb.cern.ch/dashboard/request.py/siteviewhome\n' common.logger.info(msg) return def collect_MLInfo(self): """ Prepare DashBoard information """ taskId = common._db.queryTask('name') gridName = string.strip(common.scheduler.userName()) gridScheduler = common.scheduler.name() if gridScheduler.upper() == 'REMOTEGLIDEIN': gridScheduler = 'GLIDEIN' common.logger.debug("GRIDNAME: %s " % gridName) #### FEDE for taskType (savannah 76950) taskType = self.cfg_params.get('USER.tasktype', 'analysis') #### taskType = 'analysis' self.executable = self.cfg_params.get('CMSSW.executable', 'cmsRun') VO = self.cfg_params.get('GRID.virtual_organization', 'cms') params = {'tool': common.prog_name, 'SubmissionType':'direct', 'JSToolVersion': common.prog_version_str, 'tool_ui': os.environ.get('HOSTNAME',''), 'scheduler': gridScheduler, 'GridName': gridName, 'ApplicationVersion': self.scram.getSWVersion(), 'taskType': taskType, 'vo': VO, 'CMSUser': getUserName(), 'user': getUserName(), 'taskId': str(taskId), 'datasetFull': self.datasetPath, 'resubmitter': 'user', \ 'exe': self.executable } return params def SendMLpre(self): """ Send Pre info to ML """ params = self.collect_MLInfo() params['jobId'] = 'TaskMeta' common.apmon.sendToML(params) common.logger.debug('Submission DashBoard Pre-Submission report: %s' % str(params)) return def SendMLpost(self, allList): """ Send post-submission info to ML """ task = common._db.getTask(allList) params = {} for k, v in self.collect_MLInfo().iteritems(): params[k] = v msg = '' Sub_Type = 'Direct' for job in task.jobs: jj = job['jobId'] jobId = '' localId = '' jid = str(job.runningJob['schedulerId']) if common.scheduler.name().upper() in ['CONDOR_G']: rb = 'OSG' taskHash = sha1(common._db.queryTask('name')).hexdigest() jobId = str(jj) + '_https://' + common.scheduler.name( ) + '/' + taskHash + '/' + str(jj) msg += ( 'JobID for ML monitoring is created for CONDOR_G scheduler: %s \n' % str(jobId)) elif common.scheduler.name().upper() in ['GLIDEIN']: rb = common.scheduler.name() jobId = str(jj) + '_https://' + str(jid) msg += ( 'JobID for ML monitoring is created for GLIDEIN scheduler: %s \n' % str(jobId)) elif common.scheduler.name().upper() in ['REMOTEGLIDEIN']: rb = str(task['serverName']) jobId = str(jj) + '_https://' + str(jid) msg += ( 'JobID for ML monitoring is created for REMOTEGLIDEIN scheduler: %s\n' % str(jobId)) elif common.scheduler.name().upper() in ['LSF', 'CAF', 'PBS']: jobId = str(jj) + "_https://" + common.scheduler.name().upper( ) + ":/" + jid + "-" + string.replace(str(task['name']), "_", "-") msg += ( 'JobID for ML monitoring is created for %s scheduler: %s\n' % (common.scheduler.name().upper(), str(jobId))) rb = common.scheduler.name() localId = jid elif common.scheduler.name().upper() in ['CONDOR']: taskHash = sha1(common._db.queryTask('name')).hexdigest() jobId = str(jj) + '_https://' + socket.gethostname( ) + '/' + taskHash + '/' + str(jj) rb = common.scheduler.name() msg += ( 'JobID for ML monitoring is created for CONDOR scheduler: %s\n' % str(jobId)) elif common.scheduler.name().upper() in ['ARC']: jobId = str(jj) + '_' + str(jid) msg += ( 'JobID for ML monitoring is created for ARC scheduler: %s\n' % str(jobId)) rb = 'ARC' else: jobId = str(jj) + '_' + str(jid) msg += ( 'JobID for ML monitoring is created for gLite scheduler %s\n' % str(jobId)) rb = str(job.runningJob['service']) dlsDest = job['dlsDestination'] if len(dlsDest) == 1: T_SE = str(dlsDest[0]) elif len(dlsDest) == 2: T_SE = str(dlsDest[0]) + ',' + str(dlsDest[1]) else: T_SE = str(len(dlsDest)) + '_Selected_SE' infos = { 'jobId': jobId, \ 'sid': jid, \ 'broker': rb, \ 'bossId': jj, \ 'SubmissionType': Sub_Type, \ 'TargetSE': T_SE, \ 'localId' : localId} for k, v in infos.iteritems(): params[k] = v msg += ('Submission DashBoard report: %s\n' % str(params)) common.apmon.sendToML(params) #common.logger.debug(msg) return
class JdlWriter( Actor ): def __init__(self, cfg_params, jobs): self.cfg_params = cfg_params self.nj_list = jobs from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser seWhiteList = cfg_params.get('GRID.se_white_list',[]) seBlackList = cfg_params.get('GRID.se_black_list',[]) self.blackWhiteListParser = SEBlackWhiteListParser(seWhiteList, seBlackList, common.logger()) self.datasetpath=self.cfg_params['CMSSW.datasetpath'] if string.lower(self.datasetpath)=='none': self.datasetpath = None return def run(self): """ The main method of the class: write JDL for jobs in range self.nj_list """ common.logger.debug( "JdlWriter::run() called") start = time.time() jobs_L = self.listOfjobs() self.writer(jobs_L) stop = time.time() common.logger.log(10-1,"JDL writing time :"+str(stop - start)) return def listOfjobs(self): ### define here the list of distinct destinations sites list distinct_dests = common._db.queryDistJob_Attr('dlsDestination', 'jobId' ,self.nj_list) ### define here the list of jobs Id for each distinct list of sites self.sub_jobs =[] # list of jobs Id list to submit jobs_to_match =[] # list of jobs Id to match all_jobs=[] count=0 for distDest in distinct_dests: dest = self.blackWhiteListParser.cleanForBlackWhiteList(distDest) if not dest and self.datasetpath: common.logger.info('No destination available: will not create jdl \n' ) continue all_jobs.append(common._db.queryAttrJob({'dlsDestination':distDest},'jobId')) sub_jobs_temp=[] for i in self.nj_list: if i in all_jobs[count]: sub_jobs_temp.append(i) if len(sub_jobs_temp)>0: self.sub_jobs.append(sub_jobs_temp) count +=1 return self.sub_jobs def writer(self,list): """ Materialize JDL into file """ if len(list)==0: common.logger.info('No destination available for any job: will not create jdl \n' ) task = common._db.getTask() c1 = 1 c2 = 1 for sub_list in list: jdl = common.scheduler.writeJDL(sub_list, task) for stri in jdl: #self.jdlFile='File-'+str(c1)+'_'+str(c2)+'.jdl' self.jdlFile='File-'+str(c1)+'_'+str(c2)+'.jdl' j_file = open(common.work_space.shareDir()+'/'+self.jdlFile, 'w') j_file.write( stri ) j_file.close() c2 += 1 c1 += 1 common.logger.info('JDL files are written to '+str(common.work_space.shareDir())+'File-*.jdl \n' ) return