def _validateDatatier(datatier, dbsUrl, expiration=3600): """ _validateDatatier_ Provided a list of datatiers extracted from the outputDatasets, checks whether they all exist in DBS. """ cacheName = "dataTierList_" + md5( encodeUnicodeToBytesConditional(dbsUrl, condition=PY3)).hexdigest() if not GenericDataCache.cacheExists(cacheName): mc = MemoryCacheStruct(expiration, getDataTiers, kwargs={'dbsUrl': dbsUrl}) GenericDataCache.registerCache(cacheName, mc) cacheData = GenericDataCache.getCacheData(cacheName) dbsTiers = cacheData.getData() badTiers = list(set(datatier) - set(dbsTiers)) if badTiers: raise InvalidSpecParameterValue( "Bad datatier(s): %s not available in DBS." % badTiers)
def handleDQMFileSaver(self): """ _handleDQMFileSaver_ Harvesting jobs have the dqmFileSaver EDAnalyzer that must be tweaked with the dataset name in order to store it properly in the DQMGUI, others tweaks can be added as well """ runIsComplete = getattr(self.jobBag, "runIsComplete", False) multiRun = getattr(self.jobBag, "multiRun", False) runLimits = getattr(self.jobBag, "runLimits", "") self.logger.info( "DQMFileSaver set to multiRun: %s, runIsComplete: %s, runLimits: %s", multiRun, runIsComplete, runLimits) procScript = "cmssw_handle_dqm_filesaver.py" cmd = "%s --input_pkl %s --output_pkl %s" % ( procScript, os.path.join(self.stepSpace.location, self.configPickle), os.path.join(self.stepSpace.location, self.configPickle)) if hasattr(self.step.data.application.configuration, "pickledarguments"): pklArgs = encodeUnicodeToBytesConditional( self.step.data.application.configuration.pickledarguments, condition=PY3) args = pickle.loads(pklArgs) datasetName = args.get('datasetName', None) if datasetName: cmd += " --datasetName %s" % (datasetName) if multiRun and runLimits: cmd += " --multiRun --runLimits=%s" % (runLimits) if runIsComplete: cmd += " --runIsComplete" self.scramRun(cmd) return
def createTopLevelFileset(self, topLevelFilesetName=None): """ _createTopLevelFileset_ Create the top level fileset for the workflow. If the name of the top level fileset is not given create one. """ if topLevelFilesetName is None: filesetName = ("%s-%s" % (self.wmSpec.name(), self.wmSpec.getTopLevelTask()[0].name())) if self.block: filesetName += "-%s" % self.block if self.mask: from hashlib import md5 mask_string = ",".join(["%s=%s" % (x, self.mask[x]) for x in sorted(self.mask)]) mask_string = encodeUnicodeToBytesConditional(mask_string, condition=PY3) filesetName += "-%s" % md5(mask_string).hexdigest() else: filesetName = topLevelFilesetName self.topLevelFileset = Fileset(filesetName) self.topLevelFileset.create() return
def id(self): """Generate id for element id is deterministic and can be used to identify duplicate elements. Calculation only includes fields which affect the workflow and input data. Result is an md5 hash of a ';' separated list of: workflow name, task name, list of inputs, mask, ACDC info, Dbs instance. Parent file info not accounted. Example: >>> WorkQueueElement(RequestName = 'a', TaskName = 'b').id '9ef03a6ad8f16d74fb5ba44df92bf1ef' Warning: Any change to this function may prevent identical existing and new elements from appearing equivalent, thus in the case of expanding work subscriptions work duplication can occur. Care must be taken if any modification is made. """ if self._id: return self._id # Assume md5 is good enough for now myhash = md5() spacer = ';' # character not present in any field myhash.update(encodeUnicodeToBytesConditional(self['RequestName'] + spacer, condition=PY3)) # Task will be None in global inbox myhash.update(encodeUnicodeToBytesConditional(repr(self['TaskName']) + spacer, condition=PY3)) myhash.update(encodeUnicodeToBytesConditional(",".join(sorted(self['Inputs'].keys())) + spacer, condition=PY3)) # Check repr is reproducible - should be if self['Mask']: myhash.update(encodeUnicodeToBytesConditional(",".join(["%s=%s" % (x, y) for x, y in viewitems(self['Mask'])]) + spacer, condition=PY3)) else: myhash.update(encodeUnicodeToBytesConditional("None" + spacer, condition=PY3)) # Check ACDC is deterministic and all params relevant myhash.update(encodeUnicodeToBytesConditional(",".join(["%s=%s" % (x, y) for x, y in viewitems(self['ACDC'])]) + spacer, condition=PY3)) myhash.update(encodeUnicodeToBytesConditional(repr(self['Dbs']) + spacer, condition=PY3)) self._id = myhash.hexdigest() return self._id
def execute(self, emulator=None): """ _execute_ """ if emulator is not None: return emulator.emulate(self.step, self.job) logging.info("Steps.Executors.%s.execute called", self.__class__.__name__) stepModule = "WMTaskSpace.%s" % self.stepName overrides = {} if hasattr(self.step, 'override'): overrides = self.step.override.dictionary_() self.failedPreviousStep = overrides.get('previousCmsRunFailure', False) if self.failedPreviousStep: # the previous cmsRun step within this task failed # don't bother executing anything else then msg = WM_JOB_ERROR_CODES[99108] logging.critical(msg) self._setStatus(99108, msg) raise WMExecutionFailure(99108, "CmsRunFailure", msg) # write the wrapper script to a temporary location # I don't pass it directly through os.system because I don't # trust that there won't be shell-escape shenanigans with # arbitrary input files scramSetup = self.step.application.setup.softwareEnvironment scramCommand = self.step.application.setup.scramCommand scramProject = self.step.application.setup.scramProject scramArch = self.step.application.setup.scramArch cmsswVersion = self.step.application.setup.cmsswVersion jobReportXML = self.step.output.jobReport cmsswCommand = self.step.application.command.executable cmsswConfig = self.step.application.command.configuration cmsswArguments = self.step.application.command.arguments userTarball = ','.join(self.step.user.inputSandboxes) userFiles = ','.join(self.step.user.userFiles) logging.info('User files are %s', userFiles) logging.info('User sandboxes are %s', userTarball) scramArch = getSingleScramArch(scramArch) try: multicoreSettings = self.step.application.multicore logging.info("CMSSW configured for %s cores and %s event streams", multicoreSettings.numberOfCores, multicoreSettings.eventStreams) except AttributeError: logging.info( "No value set for multicore numberOfCores or eventStreams") try: gpuSettings = self.step.application.gpu logging.info( "CMSSW configured for GPU required: %s, with these settings: %s", gpuSettings.gpuRequired, gpuSettings.gpuRequirements) except AttributeError: logging.info( "No value set for GPU gpuRequired and/or gpuRequirements") logging.info("Executing CMSSW step") # # set any global environment variables # try: os.environ['FRONTIER_ID'] = 'wmagent_%s' % ( self.report.data.workload) except Exception as ex: logging.error('Have critical error in setting FRONTIER_ID: %s', str(ex)) logging.error( 'Continuing, as this is not a critical function yet.') # # scram bootstrap # scram = Scram( command=scramCommand, version=cmsswVersion, initialise=self.step.application.setup.softwareEnvironment, directory=self.step.builder.workingDir, architecture=scramArch, ) logging.info("Runing SCRAM") try: projectOutcome = scram.project() except Exception as ex: msg = WM_JOB_ERROR_CODES[50513] msg += "\nDetails: %s" % str(ex) logging.critical(msg) raise WMExecutionFailure(50513, "ScramSetupFailure", msg) if projectOutcome > 0: msg = WM_JOB_ERROR_CODES[50513] msg += "\nDetails: %s" % str(scram.diagnostic()) logging.critical(msg) raise WMExecutionFailure(50513, "ScramSetupFailure", msg) runtimeOutcome = scram.runtime() if runtimeOutcome > 0: msg = WM_JOB_ERROR_CODES[50513] msg += "\nDetails: %s" % str(scram.diagnostic()) logging.critical(msg) raise WMExecutionFailure(50513, "ScramSetupFailure", msg) # # pre scripts # logging.info("Running PRE scripts") for script in self.step.runtime.preScripts: # TODO: Exception handling and error handling & logging scriptProcess = subprocess.Popen( ["/bin/bash"], shell=True, cwd=self.step.builder.workingDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, ) # BADPYTHON invokeCommand = "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH\n" invokeCommand += "{} -m WMCore.WMRuntime.ScriptInvoke {} {} \n".format( sys.executable, stepModule, script) logging.info(" Invoking command:\n%s", invokeCommand) scriptProcess.stdin.write( encodeUnicodeToBytesConditional(invokeCommand, condition=PY3)) stdout, stderr = scriptProcess.communicate() retCode = scriptProcess.returncode if retCode > 0: msg = "Error running command\n%s\n" % invokeCommand msg += "%s\n %s\n %s\n" % (retCode, stdout, stderr) logging.critical("Error running command") logging.critical(msg) raise WMExecutionFailure(50513, "PreScriptFailure", msg) # # pre scripts with scram # logging.info("RUNNING SCRAM SCRIPTS") for script in self.step.runtime.scramPreScripts: # invoke scripts with scram() runtimeDir = getattr(self.step.runtime, 'scramPreDir', None) invokeCommand = self.step.runtime.invokeCommand if hasattr(self.step.runtime, 'invokeCommand') else \ "%s -m WMCore.WMRuntime.ScriptInvoke %s" % (sys.executable, stepModule) invokeCommand += " %s \n" % script retCode = scram(invokeCommand, runtimeDir=runtimeDir) if retCode > 0: msg = "Error running command\n%s\n" % invokeCommand msg += "%s\n " % retCode msg += scram.diagnostic() logging.critical(msg) raise WMExecutionFailure(50513, "PreScriptScramFailure", msg) configPath = "%s/%s-main.sh" % (self.step.builder.workingDir, self.stepName) with open(configPath, 'w') as handle: handle.write(CONFIG_BLOB) # spawn this new process # the script looks for: # <SCRAM_COMMAND> <SCRAM_PROJECT> <CMSSW_VERSION> <JOB_REPORT> <EXECUTABLE> <CONFIG> # open the output files stdoutHandle = open(self.step.output.stdout, 'w') stderrHandle = open(self.step.output.stderr, 'w') args = [ '/bin/bash', configPath, scramSetup, scramArch, scramCommand, scramProject, cmsswVersion, jobReportXML, cmsswCommand, cmsswConfig, userTarball, userFiles, cmsswArguments ] logging.info("Executing CMSSW. args: %s", args) # possibly needed environment overrides for CMSSW call go here envOverride = {} # Do not pass WM PYTHONPATH to CMSSW environment pythonPath = os.environ.get('PYTHONPATH', '') envOverride['PYTHONPATH'] = "" # work around problem with GSI authentication plugin and EOS at CERN if socket.getfqdn().endswith("cern.ch"): envOverride['XRD_LOADBALANCERTTL'] = "86400" # some libraries linked with CMSSW need HOME in the environment if 'HOME' not in os.environ: envOverride['HOME'] = os.environ.get('PWD', "/") os.environ.update(envOverride) returnCode = subprocess.call(args, stdout=stdoutHandle, stderr=stderrHandle) returnMessage = None # Return PYTHONPATH to its original value, as this # is needed for stepChain workflows, so other prescripts # are able to find WMCore modules envOverride['PYTHONPATH'] = pythonPath os.environ.update(envOverride) if returnCode != 0: argsDump = {'arguments': args} msg = "Error running cmsRun\n%s\n" % argsDump try: self.report.parse(jobReportXML, stepName=self.stepName) (returnCode, returnMessage) = self.report.getStepExitCodeAndMessage( stepName=self.stepName) msg += "CMSSW Return code: %s\n" % returnCode except Exception as ex: # If report parsing fails, report linux exit code msg += "Linux Return code: %s\n" % returnCode finally: logging.critical(msg) logging.critical("Error message: %s", returnMessage) self._setStatus(returnCode, returnMessage) raise WMExecutionFailure(returnCode, "CmsRunFailure", msg) else: self._setStatus(returnCode, returnMessage) stdoutHandle.close() stderrHandle.close() try: self.report.parse(jobReportXML, stepName=self.stepName) except Exception as ex: msg = WM_JOB_ERROR_CODES[50115] msg += "\nDetails: %s" % str(ex) raise WMExecutionFailure(50115, "BadJobReportXML", msg) stepHelper = WMStepHelper(self.step) typeHelper = stepHelper.getTypeHelper() acquisitionEra = typeHelper.getAcqEra() or self.task.getAcquisitionEra( ) processingVer = typeHelper.getProcVer( ) or self.task.getProcessingVersion() processingStr = typeHelper.getProcStr( ) or self.task.getProcessingString() prepID = typeHelper.getPrepId() or self.task.getPrepID() globalTag = typeHelper.getGlobalTag() validStatus = self.workload.getValidStatus() inputPath = self.task.getInputDatasetPath() campaign = self.workload.getCampaign() cacheUrl, cacheDB, configID = stepHelper.getConfigInfo() self.report.setValidStatus(validStatus=validStatus) self.report.setGlobalTag(globalTag=globalTag) self.report.setCampaign(campaign) self.report.setPrepID(prepID) self.report.setInputDataset(inputPath=inputPath) self.report.setAcquisitionProcessing(acquisitionEra=acquisitionEra, processingVer=processingVer, processingStr=processingStr) self.report.setConfigURL(configURL="%s;;%s;;%s" % (cacheUrl, cacheDB, configID)) # Attach info to files self.report.addInfoToOutputFilesForStep(stepName=self.stepName, step=self.step) self.report.checkForOutputFiles(stepName=self.stepName) self.report.checkForAdlerChecksum(stepName=self.stepName) self.report.checkForRunLumiInformation(stepName=self.stepName) if self.step.output.keep != True: self.report.killOutput() else: # Check that we only keep the desired output for module in stepHelper.getIgnoredOutputModules(): self.report.deleteOutputModuleForStep(stepName=self.stepName, moduleName=module) # Add stageout LFN to existing TFileService files reportAnalysisFiles = self.report.getAnalysisFilesFromStep( self.stepName) for reportAnalysisFile in reportAnalysisFiles: newLFN = analysisFileLFN(reportAnalysisFile.fileName, self.step.user.lfnBase, self.job) addAttributesToFile(reportAnalysisFile, pfn=reportAnalysisFile.fileName, lfn=newLFN, validate=False) # Add analysis file entries for additional files listed in workflow for fileName in stepHelper.listAnalysisFiles(): analysisFile = stepHelper.getAnalysisFile(fileName) if os.path.isfile(analysisFile.fileName): newLFN = analysisFileLFN(analysisFile.fileName, analysisFile.lfnBase, self.job) self.report.addAnalysisFile(analysisFile.fileName, lfn=newLFN, Source='UserDefined', pfn=os.path.join( os.getcwd(), analysisFile.fileName), validate=False) return
def stream_maybe_etag(size_limit, etag, reply): """Maybe generate ETag header for the response, and handle If-Match and If-None-Match request headers. Consumes the reply until at most `size_limit` bytes. If the response fits into that size, adds the ETag header and matches it against any If-Match / If-None-Match request headers and replies appropriately. If the response is fully buffered, and the `reply` generator actually results in an error and sets X-Error-HTTP / X-Error-Detail headers, converts that error back into a real HTTP error response. Otherwise responds with the fully buffered body directly, without generator and chunking. In other words, responses smaller than `size_limit` are always fully buffered and replied immediately without chunking. If the response is not fully buffered, it's guaranteed to be output at original chunk boundaries. Note that if this function is fed the output from `stream_compress()` as it normally would be, the `size_limit` constrains the compressed size, and chunk boundaries correspond to compressed chunks.""" req = cherrypy.request res = cherrypy.response match = [str(x) for x in (req.headers.elements('If-Match') or [])] nomatch = [str(x) for x in (req.headers.elements('If-None-Match') or [])] # If ETag is already set, match conditions and output without buffering. etagval = res.headers.get('ETag', None) if etagval: _etag_match(res.status or 200, etagval, match, nomatch) res.headers['Trailer'] = 'X-REST-Status' return _etag_tail([], reply, None) # Buffer up to size_limit bytes internally. This interally builds up the # ETag value inside 'etag'. In case of exceptions the ETag invalidates. # If we exceed the limit, fall back to streaming without checking ETag # against If-Match/If-None-Match. We'll still set the ETag in the trailer # headers, so clients which understand trailers will get the value; most # clients including browsers will ignore them. size = 0 result = [] for chunk in reply: result.append(chunk) size += len(chunk) if size > size_limit: res.headers['Trailer'] = 'X-REST-Status' return _etag_tail(result, reply, etag) # We've buffered the entire response, but it may be an error reply. The # generator code does not know if it's allowed to raise exceptions, so # it swallows all errors and converts them into X-* headers. We recover # the original HTTP response code and message from X-Error-{HTTP,Detail} # headers, if any are present. err = res.headers.get('X-Error-HTTP', None) if err: message = res.headers.get('X-Error-Detail', 'Original error lost') raise cherrypy.HTTPError(int(err), message) # OK, we buffered the entire reply and it's ok. Check ETag match criteria. # The original stream generator must guarantee that if it fails it resets # the 'etag' value, even if the error handlers above didn't run. etagval = etag.value() if etagval: res.headers['ETag'] = etagval _etag_match(res.status or 200, etagval, match, nomatch) # OK, respond with the buffered reply as a plain string. res.headers['Content-Length'] = size # TODO investigate why `result` is a list of bytes strings in py3 # The current solution seems to work in both py2 and py3 resp = b"" if PY3 else "" for item in result: resp += encodeUnicodeToBytesConditional(item, condition=PY3) assert len(resp) == size return resp
def gen_color(val): "Generate unique color code for given string value" keyhash = hashlib.md5() keyhash.update(encodeUnicodeToBytesConditional(val, condition=PY3)) col = '#%s' % keyhash.hexdigest()[:6] return col
def getJobParameters(self, jobList): """ _getJobParameters_ Return a list of dictionaries with submit parameters per job. """ undefined = 'UNDEFINED' jobParameters = [] for job in jobList: ad = {} ad['initial_Dir'] = encodeUnicodeToBytesConditional(job['cache_dir'], condition=PY2) ad['transfer_input_files'] = "%s,%s/%s,%s" % (job['sandbox'], job['packageDir'], 'JobPackage.pkl', self.unpacker) ad['Arguments'] = "%s %i %s" % (os.path.basename(job['sandbox']), job['id'], job["retry_count"]) ad['transfer_output_files'] = "Report.%i.pkl,wmagentJob.log" % job["retry_count"] # Dictionary keys need to be consistent across all jobs within the same # clusterId when working with queue_with_itemdata() # Initialize 'Requirements' to an empty string for all jobs. # See issue: https://htcondor-wiki.cs.wisc.edu/index.cgi/tktview?tn=7715 ad['Requirements'] = '' # Do not define custom Requirements for Volunteer resources if self.reqStr is not None: ad['Requirements'] = self.reqStr ad['My.x509userproxy'] = classad.quote(self.x509userproxy) sites = ','.join(sorted(job.get('possibleSites'))) ad['My.DESIRED_Sites'] = classad.quote(str(sites)) sites = ','.join(sorted(job.get('potentialSites'))) ad['My.ExtDESIRED_Sites'] = classad.quote(str(sites)) ad['My.CMS_JobRetryCount'] = str(job['retry_count']) ad['My.WMAgent_RequestName'] = classad.quote(encodeUnicodeToBytesConditional(job['request_name'], condition=PY2)) match = re.compile("^[a-zA-Z0-9_]+_([a-zA-Z0-9]+)-").match(job['request_name']) if match: ad['My.CMSGroups'] = classad.quote(match.groups()[0]) else: ad['My.CMSGroups'] = undefined ad['My.WMAgent_JobID'] = str(job['jobid']) ad['My.WMAgent_SubTaskName'] = classad.quote(encodeUnicodeToBytesConditional(job['task_name'], condition=PY2)) ad['My.CMS_JobType'] = classad.quote(encodeUnicodeToBytesConditional(job['task_type'], condition=PY2)) ad['My.CMS_Type'] = classad.quote(activityToType(job['activity'])) ad['My.CMS_RequestType'] = classad.quote(job['requestType']) # Handling for AWS, cloud and opportunistic resources ad['My.AllowOpportunistic'] = str(job.get('allowOpportunistic', False)) if job.get('inputDataset'): ad['My.DESIRED_CMSDataset'] = classad.quote(encodeUnicodeToBytesConditional(job['inputDataset'], condition=PY2)) else: ad['My.DESIRED_CMSDataset'] = undefined if job.get('inputDatasetLocations'): sites = ','.join(sorted(job['inputDatasetLocations'])) ad['My.DESIRED_CMSDataLocations'] = classad.quote(str(sites)) else: ad['My.DESIRED_CMSDataLocations'] = undefined if job.get('inputPileup'): cmsPileups=','.join(sorted(job['inputPileup'])) ad['My.DESIRED_CMSPileups'] = classad.quote(str(cmsPileups)) else: ad['My.DESIRED_CMSPileups'] = undefined # HighIO ad['My.Requestioslots'] = str(1 if job['task_type'] in ["Merge", "Cleanup", "LogCollect"] else 0) # GPU resource handling # while we do not support a third option for RequiresGPU, make a binary decision if job['requiresGPU'] == "required": ad['My.RequiresGPU'] = "1" ad['request_GPUs'] = "1" else: ad['My.RequiresGPU'] = "0" ad['request_GPUs'] = "0" if job.get('gpuRequirements', None): ad['My.GPUMemoryMB'] = str(job['gpuRequirements']['GPUMemoryMB']) cudaCapabilities = ','.join(sorted(job['gpuRequirements']['CUDACapabilities'])) ad['My.CUDACapability'] = classad.quote(str(cudaCapabilities)) ad['My.CUDARuntime'] = classad.quote(job['gpuRequirements']['CUDARuntime']) else: ad['My.GPUMemoryMB'] = undefined ad['My.CUDACapability'] = undefined ad['My.CUDARuntime'] = undefined # Performance and resource estimates (including JDL magic tweaks) origCores = job.get('numberOfCores', 1) estimatedMins = int(job['estimatedJobTime'] / 60.0) if job.get('estimatedJobTime') else 12 * 60 estimatedMinsSingleCore = estimatedMins * origCores # For now, assume a 15 minute job startup overhead -- condor will round this up further ad['My.EstimatedSingleCoreMins'] = str(estimatedMinsSingleCore) ad['My.OriginalMaxWallTimeMins'] = str(estimatedMins) ad['My.MaxWallTimeMins'] = 'WMCore_ResizeJob ? (EstimatedSingleCoreMins/RequestCpus + 15) : OriginalMaxWallTimeMins' requestMemory = int(job['estimatedMemoryUsage']) if job.get('estimatedMemoryUsage', None) else 1000 ad['My.OriginalMemory'] = str(requestMemory) ad['My.ExtraMemory'] = str(self.extraMem) ad['request_memory'] = 'OriginalMemory + ExtraMemory * (WMCore_ResizeJob ? (RequestCpus-OriginalCpus) : 0)' requestDisk = int(job['estimatedDiskUsage']) if job.get('estimatedDiskUsage', None) else 20 * 1000 * 1000 * origCores ad['request_disk'] = str(requestDisk) # Set up JDL for multithreaded jobs. # By default, RequestCpus will evaluate to whatever CPU request was in the workflow. # If the job is labelled as resizable, then the logic is more complex: # - If the job is running in a slot with N cores, this should evaluate to N # - If the job is being matched against a machine, match all available CPUs, provided # they are between min and max CPUs. # - Otherwise, just use the original CPU count. ad['My.MinCores'] = str(job.get('minCores', max(1, origCores / 2))) ad['My.MaxCores'] = str(max(int(job.get('maxCores', origCores)), origCores)) ad['My.OriginalCpus'] = str(origCores) # Prefer slots that are closest to our MaxCores without going over. # If the slot size is _greater_ than our MaxCores, we prefer not to # use it - we might unnecessarily fragment the slot. ad['Rank'] = 'isUndefined(Cpus) ? 0 : ifThenElse(Cpus > MaxCores, -Cpus, Cpus)' # Record the number of CPUs utilized at match time. We'll use this later # for monitoring and accounting. Defaults to 0; once matched, it'll # put an attribute in the job MATCH_EXP_JOB_GLIDEIN_Cpus = 4 ad['My.JOB_GLIDEIN_Cpus'] = classad.quote("$$(Cpus:0)") # Make sure the resize request stays within MinCores and MaxCores. ad['My.RequestResizedCpus'] = '(Cpus>MaxCores) ? MaxCores : ((Cpus < MinCores) ? MinCores : Cpus)' # If the job is running, then we should report the matched CPUs in RequestCpus - but only if there are sane # values. Otherwise, we just report the original CPU request ad['My.JobCpus'] = ('((JobStatus =!= 1) && (JobStatus =!= 5) && !isUndefined(MATCH_EXP_JOB_GLIDEIN_Cpus) ' '&& (int(MATCH_EXP_JOB_GLIDEIN_Cpus) isnt error)) ? int(MATCH_EXP_JOB_GLIDEIN_Cpus) : OriginalCpus') # Cpus is taken from the machine ad - hence it is only defined when we are doing negotiation. # Otherwise, we use either the cores in the running job (if available) or the original cores. ad['request_cpus'] = 'WMCore_ResizeJob ? (!isUndefined(Cpus) ? RequestResizedCpus : JobCpus) : OriginalCpus' ad['My.WMCore_ResizeJob'] = str(job.get('resizeJob', False)) taskPriority = int(job.get('taskPriority', 1)) priority = int(job.get('wf_priority', 0)) ad['My.JobPrio'] = str(int(priority + taskPriority * self.maxTaskPriority)) ad['My.PostJobPrio1'] = str(int(-1 * len(job.get('potentialSites', [])))) ad['My.PostJobPrio2'] = str(int(-1 * job['task_id'])) # Add OS requirements for jobs requiredOSes = self.scramArchtoRequiredOS(job.get('scramArch')) ad['My.REQUIRED_OS'] = classad.quote(encodeUnicodeToBytesConditional(requiredOSes, condition=PY2)) cmsswVersions = ','.join(job.get('swVersion')) ad['My.CMSSW_Versions'] = classad.quote(encodeUnicodeToBytesConditional(cmsswVersions, condition=PY2)) requiredArch = self.scramArchtoRequiredArch(job.get('scramArch')) if not requiredArch: # only Cleanup jobs should not have ScramArch defined ad['Requirements'] = '(TARGET.Arch =!= Undefined)' else: ad['Requirements'] = '(TARGET.Arch =?= "{}")'.format(requiredArch) jobParameters.append(ad) return jobParameters
newWriter.__name__ = func.__name__ newWriter.__doc__ = func.__doc__ newWriter.__dict__.update(func.__dict__) subproc = args[0] line = args[1] escapedLine = "echo \"%s\"\n" % line func(subproc, escapedLine) return newWriter # // # // Interceptable function to push commands to the subshell, used to # // enable test mode. procWriter = lambda s, l: s.stdin.write( encodeUnicodeToBytesConditional(l, condition=PY3)) class Scram(object): """ _Scram_ Object to encapsulate a scram "session" that can be used to create a project area, bootstrap the environment and then use that to execute commands Simple enumeration of scram errors is performed, to allow to be mapped to standard exception/error conditions """ def __init__(self, **options):
def newWriter(*args): newWriter.__name__ = func.__name__ newWriter.__doc__ = func.__doc__ newWriter.__dict__.update(func.__dict__) subproc = args[0] line = args[1] escapedLine = "echo \"%s\"\n" % line func(subproc, escapedLine) return newWriter # // # // Interceptable function to push commands to the subshell, used to # // enable test mode. procWriter = lambda s, l: s.stdin.write(encodeUnicodeToBytesConditional(l, condition=PY3)) class Scram(object): """ _Scram_ Object to encapsulate a scram "session" that can be used to create a project area, bootstrap the environment and then use that to execute commands Simple enumeration of scram errors is performed, to allow to be mapped to standard exception/error conditions """
def __call__(self): """ _call_ Examine the step configuration and construct a PSet from that. """ self.logger.info("Executing SetupCMSSWPSet...") self.jobBag = self.job.getBaggage() self.configPickle = getattr(self.step.data.application.command, "configurationPickle", "PSet.pkl") self.psetFile = getattr(self.step.data.application.command, "configuration", "PSet.py") self.scram = self.createScramEnv() scenario = getattr(self.step.data.application.configuration, "scenario", None) funcName = getattr(self.step.data.application.configuration, "function", None) if scenario is not None and scenario != "": self.logger.info("Setting up job scenario/process") if getattr(self.step.data.application.configuration, "pickledarguments", None) is not None: pklArgs = encodeUnicodeToBytesConditional( self.step.data.application.configuration.pickledarguments, condition=PY3) funcArgs = pickle.loads(pklArgs) else: funcArgs = {} # Create process try: self.createProcess(scenario, funcName, funcArgs) except Exception as ex: self.logger.exception( "Error creating process for Config/DataProcessing:") raise ex # Now, load the new picked process try: with open( os.path.join(self.stepSpace.location, self.configPickle), 'rb') as f: self.process = Unpickler(f).load() except ImportError as ex: msg = "Unable to import pset from %s:\n" % self.psetFile msg += str(ex) self.logger.error(msg) raise ex if funcName == "repack": self.handleRepackSettings() if funcName in ["merge", "alcaHarvesting"]: self.handleSingleCoreOverride() if socket.getfqdn().endswith("cern.ch"): self.handleSpecialCERNMergeSettings(funcName) else: self.logger.info("DEBUG: Now in the none scenario to load PSET") try: self.loadPSet() except Exception as ex: self.logger.exception("Error loading PSet:") raise ex # Check process.source exists self.logger.info("Debug: Self.process") self.logger.info(dir(self.process)) if getattr(self.process, "source", None) is None and getattr( self.process, "_Process__source", None) is None: msg = "Error in CMSSW PSet: process is missing attribute 'source'" msg += " or process.source is defined with None value." self.logger.error(msg) raise RuntimeError(msg) self.handleCondorStatusService() self.fixupProcess() # In case of CRAB3, the number of threads in the PSet should not be overridden if not self.crabPSet: try: self.makeThreadsStreamsTweak() except AttributeError as ex: self.logger.error("Failed to override numberOfThreads: %s", str(ex)) # Apply task level tweaks makeTaskTweak(self.step.data, self.tweak) self.applyPsetTweak(self.tweak, cleanupTweak=True) # Check if chained processing is enabled # If not - apply the per job tweaks # If so - create an override TFC (like done in PA) and then modify thePSet accordingly if hasattr(self.step.data.input, "chainedProcessing" ) and self.step.data.input.chainedProcessing: self.logger.info("Handling Chain processing tweaks") self.handleChainedProcessingTweak() else: self.logger.info("Creating job level tweaks") makeJobTweak(self.job, self.tweak) self.applyPsetTweak(self.tweak, cleanupTweak=True) # check for pileup settings presence, pileup support implementation # and if enabled, process pileup configuration / settings if hasattr(self.step.data, "pileup"): self.handlePileup() # Apply per output module PSet Tweaks self.logger.info("Output module section") cmsswStep = self.step.getTypeHelper() for om in cmsswStep.listOutputModules(): mod = cmsswStep.getOutputModule(om) modName = mod.getInternalName() if funcName == 'merge': # Do not use both Merged output label unless useErrorDataset is False # Do not use both MergedError output label unless useErrorDataset is True useErrorDataset = getattr(self.jobBag, "useErrorDataset", False) if useErrorDataset and modName != 'MergedError': continue if not useErrorDataset and modName == 'MergedError': continue makeOutputTweak(mod, self.job, self.tweak) # allow failed tweaks in this case, to replicate the previous implementation, where it would ignore # and continue if it found an output module that doesn't exist and don't want in the pset like: process.Sqlite self.applyPsetTweak(self.tweak, allowFailedTweaks=True, cleanupTweak=True) # revlimiter for testing if getattr(self.step.data.application.command, "oneEventMode", False): self.tweak.addParameter('process.maxEvents.input', "customTypeCms.untracked.int32(1)") # check for random seeds and the method of seeding which is in the job baggage self.handleSeeding() # make sure default parametersets for perf reports are installed self.handlePerformanceSettings() # fixup the dqmFileSaver self.handleDQMFileSaver() # tweak for jobs reading LHE articles from CERN self.handleLHEInput() # tweak jobs for enforceGUIDInFileName self.handleEnforceGUIDInFileName() # Check if we accept skipping bad files if hasattr(self.step.data.application.configuration, "skipBadFiles"): self.tweak.addParameter( "process.source.skipBadFiles", "customTypeCms.untracked.bool(%s)" % self.step.data.application.configuration.skipBadFiles) # Apply events per lumi section if available if hasattr(self.step.data.application.configuration, "eventsPerLumi"): self.tweak.addParameter( "process.source.numberEventsInLuminosityBlock", "customTypeCms.untracked.uint32(%s)" % self.step.data.application.configuration.eventsPerLumi) # limit run time if desired if hasattr(self.step.data.application.configuration, "maxSecondsUntilRampdown"): self.tweak.addParameter( "process.maxSecondsUntilRampdown.input", "customTypeCms.untracked.PSet(input=cms.untracked.int32(%s))" % self.step.data.application.configuration. maxSecondsUntilRampdown) # accept an overridden TFC from the step if hasattr(self.step.data.application, 'overrideCatalog'): self.logger.info("Found a TFC override: %s", self.step.data.application.overrideCatalog) self.tweak.addParameter( "process.source.overrideCatalog", "customTypeCms.untracked.string('%s')" % self.step.data.application.overrideCatalog) configFile = self.step.data.application.command.configuration workingDir = self.stepSpace.location try: self.applyPsetTweak(self.tweak) with open("%s/%s" % (workingDir, configFile), 'w') as handle: handle.write("import FWCore.ParameterSet.Config as cms\n") handle.write("import pickle\n") handle.write("with open('%s', 'rb') as handle:\n" % self.configPickle) handle.write(" process = pickle.load(handle)\n") except Exception as ex: self.logger.exception("Error writing out PSet:") raise ex # check for event numbers in the producers self.handleProducersNumberOfEvents() self.logger.info("CMSSW PSet setup completed!") return 0