class ProductionJob(Job): #pylint: disable=too-many-public-methods, too-many-instance-attributes """ Production job class. Suitable for CLIC studies. Need to sub class and overload for other clients. """ def __init__(self, script = None): super(ProductionJob, self).__init__( script ) self.prodVersion = __RCSID__ self.dryrun = False self.created = False self.checked = False self.call_finalization = False self.finalsdict = {} self.transfid = 0 self.type = 'Production' self.csSection = '/Production/Defaults' self.ops = Operations() self.fc = FileCatalogClient() self.trc = TransformationClient() self.defaultProdID = '12345' self.defaultProdJobID = '12345' self.jobFileGroupSize = 1 self.nbtasks = 1 self.slicesize =0 self.basename = '' self.basepath = self.ops.getValue('/Production/CLIC/BasePath','/ilc/prod/clic/') self.evttype = '' self.datatype = '' self.energycat = '' self.detector = '' self.currtrans = None self.description = '' self.finalpaths = [] self.finalMetaDict = defaultdict( dict ) self.prodMetaDict = {} self.finalMetaDictNonSearch = {} self.metadict_external = {} self.outputStorage = '' self.proxyinfo = getProxyInfo() self.inputdataquery = False self.inputBKSelection = {} self.plugin = 'Standard' self.prodGroup = '' self.prodTypes = ['MCGeneration', 'MCSimulation', 'Test', 'MCReconstruction', 'MCReconstruction_Overlay', 'Merge', 'Split', 'MCGeneration_ILD', 'MCSimulation_ILD', 'MCReconstruction_ILD', 'MCReconstruction_Overlay_ILD', 'Split_ILD' ] self.prodparameters = {} self.prodparameters['NbInputFiles'] = 1 self.prodparameters['nbevts'] = 0 #self.prodparameters["SWPackages"] = '' self._addParameter(self.workflow, "IS_PROD", 'JDL', True, "This job is a production job") if not script: self.__setDefaults() self._recBasePaths = {} self.maxFCFoldersToCheck = 100000 ############################################################################# def __setDefaults(self): """Sets some default parameters. """ self.setPlatform(self.ops.getValue('%s/Platform' % (self.csSection), 'x86_64-slc5-gcc43-opt')) self.setCPUTime('300000') self.setLogLevel('verbose') self.setJobGroup('@{PRODUCTION_ID}') #version control self._setParameter('productionVersion', 'string', self.prodVersion, 'ProdAPIVersion') #General workflow parameters self._setParameter('PRODUCTION_ID', 'string', self.defaultProdID.zfill(8), 'ProductionID') self._setParameter('JOB_ID', 'string', self.defaultProdJobID.zfill(8), 'ProductionJobID') self._setParameter('Priority', 'JDL', '1', 'Priority') self._setParameter('emailAddress', 'string', '*****@*****.**', 'CrashEmailAddress') def _setParameter(self, name, parameterType, parameterValue, description): """Set parameters checking in CS in case some defaults need to be changed. """ if self.ops.getValue('%s/%s' % (self.csSection, name), ''): LOG.debug('Setting %s from CS defaults = %s' % (name, self.ops.getValue('%s/%s' % (self.csSection, name)))) self._addParameter(self.workflow, name, parameterType, self.ops.getValue('%s/%s' % (self.csSection, name), 'default'), description) else: LOG.debug('Setting parameter %s = %s' % (name, parameterValue)) self._addParameter(self.workflow, name, parameterType, parameterValue, description) def setConfig(self,version): """ Define the Configuration package to obtain """ appName = 'ILDConfig' self._addSoftware(appName.lower(), version) self.prodparameters['ILDConfigVersion'] = version self._addParameter( self.workflow, 'ILDConfigPackage', 'JDL', appName+version, 'ILDConfig package' ) return S_OK() def setClicConfig(self, version): """Define the ClicConfig package to obtain.""" return self.setConfigPackage('ClicConfig', version) def setConfigPackage(self, appName, version): """Define the config package to obtain.""" self._addSoftware(appName.lower(), version) self._addParameter(self.workflow, appName + 'Package', 'JDL', appName + version, appName + 'package') self.prodparameters[appName + 'Version'] = version return S_OK() def setDryRun(self, run): """ In case one wants to get all the info as if the prod was being submitted """ self.dryrun = run ############################################################################# def setProdGroup(self, group): """ Sets a user defined tag for the production as appears on the monitoring page """ self.prodGroup = group ############################################################################# def setProdPlugin(self, plugin): """ Sets the plugin to be used to creating the production jobs """ self.plugin = plugin ############################################################################# def setJobFileGroupSize(self, files): """ Sets the number of files to be input to each job created. """ if self.checked: return self._reportError("This input is needed at the beginning of the production definition: it is \ needed for total number of evts.") self.jobFileGroupSize = files self.prodparameters['NbInputFiles'] = files def setNbEvtsPerSlice(self,nbevts): """ Define the number of events in a slice. """ self.slicesize = nbevts ############################################################################# def setProdType(self, prodType): """Set prod type. """ if prodType not in self.prodTypes: raise TypeError('Prod must be one of %s' % (', '.join(self.prodTypes))) self.setType(prodType) ############################################################################# def setWorkflowName(self, name): """Set workflow name. """ self.workflow.setName(name) self.name = name ############################################################################# def setWorkflowDescription(self, desc): """Set workflow name. """ self.workflow.setDescription(desc) ############################################################################# def createWorkflow(self): """ Create XML for local testing. """ name = '%s.xml' % self.name if os.path.exists(name): shutil.move(name,'%s.backup' % name) self.workflow.toXMLFile(name) ############################################################################# def setOutputSE(self, outputse): """ Define where the output file(s) will go. """ self.outputStorage = outputse return S_OK() ############################################################################# def setInputDataQuery(self, metadata): """ Define the input data query needed """ retMetaKey = self._checkMetaKeys( metadata.keys() ) if not retMetaKey['OK']: return retMetaKey if "ProdID" not in metadata: return self._reportError("Input metadata dictionary must contain at least a key 'ProdID' as reference") retDirs = self._checkFindDirectories( metadata ) if not retDirs['OK']: return retDirs dirs = retDirs['Value'].values() for mdir in dirs[:self.maxFCFoldersToCheck]: LOG.notice("Directory: %s" % mdir) res = self.fc.getDirectoryUserMetadata(mdir) if not res['OK']: return self._reportError("Error looking up the catalog for directory metadata") compatmeta = res['Value'] compatmeta.update(metadata) if 'EvtType' in compatmeta: self.evttype = JobHelpers.getValue( compatmeta['EvtType'], str, basestring ) else: return self._reportError("EvtType is not in the metadata, it has to be!") if 'NumberOfEvents' in compatmeta: self.nbevts = JobHelpers.getValue( compatmeta['NumberOfEvents'], int, None ) self.basename = self.evttype LOG.notice("MetaData: %s" % compatmeta) LOG.notice("MetaData: %s" % metadata) if "Energy" in compatmeta: self.energycat = JobHelpers.getValue( compatmeta["Energy"], str, (int, long, basestring) ) if self.energycat.count("tev"): self.energy = Decimal("1000.") * Decimal(self.energycat.split("tev")[0]) elif self.energycat.count("gev"): self.energy = Decimal("1.") * Decimal(self.energycat.split("gev")[0]) else: self.energy = Decimal("1.") * Decimal(self.energycat) gendata = False if 'Datatype' in compatmeta: self.datatype = JobHelpers.getValue( compatmeta['Datatype'], str, basestring ) if self.datatype == 'gen': gendata = True if "DetectorType" in compatmeta and not gendata: self.detector = JobHelpers.getValue( compatmeta["DetectorType"], str, basestring ) self.inputBKSelection = metadata self.inputdataquery = True self.prodparameters['nbevts'] = self.nbevts self.prodparameters["FCInputQuery"] = self.inputBKSelection return S_OK() def setDescription(self, desc): """ Set the production's description :param str desc: Description """ self.description = desc return S_OK() def getBasePath(self): """ Return the base path. Updated by :any:`setInputDataQuery`. """ return self.basepath def addFinalization(self, uploadData = False, registerData = False, uploadLog = False, sendFailover=False): """ Add finalization step :param bool uploadData: Upload or not the data to the storage :param bool uploadLog: Upload log file to storage (currently only available for admins, thus add them to OutputSandbox) :param bool sendFailover: Send Failover requests, and declare files as processed or unused in transfDB :param bool registerData: Register data in the file catalog """ #TODO: Do the registration only once, instead of once for each job self.call_finalization = True self.finalsdict['uploadData'] = uploadData self.finalsdict['registerData'] = registerData self.finalsdict['uploadLog'] = uploadLog self.finalsdict['sendFailover'] = sendFailover def _addRealFinalization(self): """ This is called at creation: now that the workflow is created at the last minute, we need to add this also at the last minute """ importLine = 'from ILCDIRAC.Workflow.Modules.<MODULE> import <MODULE>' dataUpload = ModuleDefinition('UploadOutputData') dataUpload.setDescription('Uploads the output data') self._addParameter(dataUpload, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'UploadOutputData') dataUpload.setBody(body) failoverRequest = ModuleDefinition('FailoverRequest') failoverRequest.setDescription('Sends any failover requests') self._addParameter(failoverRequest, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'FailoverRequest') failoverRequest.setBody(body) registerdata = ModuleDefinition('RegisterOutputData') registerdata.setDescription('Module to add in the metadata catalog the relevant info about the files') self._addParameter(registerdata, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'RegisterOutputData') registerdata.setBody(body) logUpload = ModuleDefinition('UploadLogFile') logUpload.setDescription('Uploads the output log files') self._addParameter(logUpload, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'UploadLogFile') logUpload.setBody(body) errorReport = ModuleDefinition('ReportErrors') errorReport.setDescription('Reports errors at the end') body = importLine.replace('<MODULE>', 'ReportErrors') errorReport.setBody(body) finalization = StepDefinition('Job_Finalization') finalization.addModule(dataUpload) up = finalization.createModuleInstance('UploadOutputData', 'dataUpload') up.setValue("enable", self.finalsdict['uploadData']) finalization.addModule(registerdata) ro = finalization.createModuleInstance('RegisterOutputData', 'RegisterOutputData') ro.setValue("enable", self.finalsdict['registerData']) finalization.addModule(logUpload) ul = finalization.createModuleInstance('UploadLogFile', 'logUpload') ul.setValue("enable", self.finalsdict['uploadLog']) finalization.addModule(failoverRequest) fr = finalization.createModuleInstance('FailoverRequest', 'failoverRequest') fr.setValue("enable", self.finalsdict['sendFailover']) finalization.addModule(errorReport) fr = finalization.createModuleInstance('ReportErrors', 'reportErrors') self.workflow.addStep(finalization) self.workflow.createStepInstance('Job_Finalization', 'finalization') return S_OK() def createProduction(self, name = None): """ Create production. """ if not self.proxyinfo['OK']: return S_ERROR("Not allowed to create production, you need a production proxy.") if 'groupProperties' not in self.proxyinfo['Value']: return S_ERROR("Could not determine groupProperties, you do not have the right proxy.") groupProperties = self.proxyinfo['Value']['groupProperties'] if 'ProductionManagement' not in groupProperties: return S_ERROR("Not allowed to create production, you need a production proxy.") if self.created: return S_ERROR("Production already created.") ###We need to add the applications to the workflow res = self._addToWorkflow() if not res['OK']: return res if self.call_finalization: self._addRealFinalization() workflowName = self.workflow.getName() fileName = '%s.xml' % workflowName LOG.verbose('Workflow XML file name is:', '%s' % fileName) try: self.createWorkflow() except Exception as x: LOG.error("Exception creating workflow", repr(x)) return S_ERROR('Could not create workflow') with open(fileName, 'r') as oFile: workflowXML = oFile.read() if not name: name = workflowName res = self.trc.getTransformationStats(name) if res['OK']: return self._reportError("Transformation with name %s already exists! Cannot proceed." % name) ###Create Tranformation Trans = Transformation() Trans.setTransformationName(name) Trans.setDescription(self.description) Trans.setLongDescription(self.description) Trans.setType(self.type) self.prodparameters['JobType'] = self.type Trans.setPlugin(self.plugin) if self.inputdataquery: Trans.setGroupSize(self.jobFileGroupSize) Trans.setTransformationGroup(self.prodGroup) Trans.setBody(workflowXML) if not self.slicesize: Trans.setEventsPerTask(self.jobFileGroupSize * self.nbevts) else: Trans.setEventsPerTask(self.slicesize) self.currtrans = Trans if self.dryrun: LOG.notice('Would create prod called', name) self.transfid = 12345 else: res = Trans.addTransformation() if not res['OK']: LOG.error(res['Message']) return res self.transfid = Trans.getTransformationID()['Value'] if self.inputBKSelection: res = self.applyInputDataQuery() if not self.dryrun: Trans.setAgentType("Automatic") Trans.setStatus("Active") finals = [] for finalpaths in self.finalpaths: finalpaths = finalpaths.rstrip("/") finalpaths += "/"+str(self.transfid).zfill(8) finals.append(finalpaths) self.finalMetaDict[finalpaths].update( { "ProdID": self.transfid } ) self.finalMetaDict[finalpaths].update( self.prodMetaDict ) # if 'ILDConfigVersion' in self.prodparameters: # self.finalMetaDict[finalpaths].update({"ILDConfig":self.prodparameters['ILDConfigVersion']}) if self.nbevts: self.finalMetaDict[finalpaths].update({'NumberOfEvents' : self.jobFileGroupSize * self.nbevts}) self.finalpaths = finals self.created = True return S_OK() def setNbOfTasks(self, nbtasks): """ Define the number of tasks you want. Useful for generation jobs. """ if not self.currtrans: LOG.error("Not transformation defined earlier") return S_ERROR("No transformation defined") if self.inputBKSelection and self.plugin not in ['Limited', 'SlicedLimited']: LOG.error('Metadata selection activated, should not specify the number of jobs') return S_ERROR() self.nbtasks = nbtasks self.currtrans.setMaxNumberOfTasks(self.nbtasks) #pylint: disable=E1101 return S_OK() def applyInputDataQuery(self, metadata = None, prodid = None): """ Tell the production to update itself using the metadata query specified, i.e. submit new jobs if new files are added corresponding to same query. """ if not self.transfid and self.currtrans: self.transfid = self.currtrans.getTransformationID()['Value'] #pylint: disable=E1101 elif prodid: self.transfid = prodid if not self.transfid: LOG.error("Not transformation defined earlier") return S_ERROR("No transformation defined") if metadata: self.inputBKSelection = metadata if not self.dryrun: res = self.trc.createTransformationInputDataQuery(self.transfid, self.inputBKSelection) if not res['OK']: return res else: LOG.notice("Would use %s as metadata query for production" % str(self.inputBKSelection)) return S_OK() def addMetadataToFinalFiles(self, metadict): """ Add additionnal non-query metadata """ self.metadict_external = metadict return S_OK() def finalizeProd(self, prodid = None, prodinfo = None): """ Finalize definition: submit to Transformation service and register metadata """ currtrans = 0 if self.currtrans: if not self.dryrun: currtrans = self.currtrans.getTransformationID()['Value'] #pylint: disable=E1101 else: currtrans = 12345 if prodid: currtrans = prodid if not currtrans: LOG.error("Not transformation defined earlier") return S_ERROR("No transformation defined") if prodinfo: self.prodparameters = prodinfo info = [] info.append('%s Production %s has following parameters:\n' % (self.prodparameters['JobType'], currtrans)) if "Process" in self.prodparameters: info.append('- Process %s' % self.prodparameters['Process']) if "Energy" in self.prodparameters: info.append('- Energy %s GeV' % self.prodparameters["Energy"]) if not self.slicesize: self.prodparameters['nbevts'] = self.jobFileGroupSize * self.nbevts else: self.prodparameters['nbevts'] = self.slicesize if self.prodparameters['nbevts']: info.append("- %s events per job" % (self.prodparameters['nbevts'])) if self.prodparameters.get('lumi', False): info.append(' corresponding to a luminosity %s fb' % (self.prodparameters['lumi'] * \ self.prodparameters['NbInputFiles'])) if 'FCInputQuery' in self.prodparameters: info.append('Using InputDataQuery :') for key, val in self.prodparameters['FCInputQuery'].iteritems(): info.append(' %s = %s' % (key, val)) if "SWPackages" in self.prodparameters: info.append('- SW packages %s' % self.prodparameters["SWPackages"]) if "SoftwareTag" in self.prodparameters: info.append('- SW tags %s' % self.prodparameters["SoftwareTag"]) if "ILDConfigVersion" in self.prodparameters: info.append('- ILDConfig %s' % self.prodparameters['ILDConfigVersion']) if 'ClicConfigVersion' in self.prodparameters: info.append('- ClicConfig %s' % self.prodparameters['ClicConfigVersion'] ) if 'extraCLIArguments' in self.prodparameters: info.append('- ExtraCLIArguments %s' % self.prodparameters['extraCLIArguments'] ) # as this is the very last call all applications are registered, so all software packages are known #add them the the metadata registration for finalpath in self.finalpaths: if finalpath not in self.finalMetaDictNonSearch: self.finalMetaDictNonSearch[finalpath] = {} if "SWPackages" in self.prodparameters: self.finalMetaDictNonSearch[finalpath]["SWPackages"] = self.prodparameters["SWPackages"] if self.metadict_external: self.finalMetaDictNonSearch[finalpath].update(self.metadict_external) info.append('- Registered metadata: ') for path, metadata in sorted( self.finalMetaDict.iteritems() ): info.append(' %s = %s' % (path, metadata)) info.append('- Registered non searchable metadata: ') for path, metadata in sorted( self.finalMetaDictNonSearch.iteritems() ): info.append(' %s = %s' % (path, metadata)) infoString = '\n'.join(info) self.prodparameters['DetailedInfo'] = infoString for name, val in self.prodparameters.iteritems(): result = self._setProdParameter(currtrans, name, val) if not result['OK']: LOG.error(result['Message']) res = self._registerMetadata() if not res['OK']: LOG.error('Could not register the following directories:', res['Message']) return res return S_OK() def _createDirectory(self, path, failed, mode=0o775): """Create the directory at path if it does not exist. :param str path: path to check :param list failed: list of failed paths :param int mode: mode to set for directory """ exists = returnSingleResult(self.fc.isDirectory(path)) if exists['OK'] and exists['Value']: LOG.verbose('Directory already exists:', path) return S_OK() result = returnSingleResult(self.fc.createDirectory(path)) if not result['OK']: LOG.error('Failed to create directory:', '%s: %s' % (path, result['Message'])) failed[path].append(result['Message']) return S_ERROR() LOG.verbose('Successfully created directory:', path) res = self.fc.changePathMode({path: mode}, False) if not res['OK']: LOG.error(res['Message']) failed[path].append(res['Message']) return S_ERROR() LOG.verbose('Successfully changed mode:', path) return S_OK() def _checkMetadata(self, path, metaCopy): """Get existing metadata, if it is the same do not set it again, otherwise return error.""" existingMetadata = self.fc.getDirectoryUserMetadata(path.rstrip('/')) if not existingMetadata['OK']: return S_OK() failure = False for key, value in existingMetadata['Value'].iteritems(): if key in metaCopy and metaCopy[key] != value: LOG.error('Metadata values for folder %s disagree for key %s: Existing(%r), new(%r)' % (path, key, value, metaCopy[key])) failure = True elif key in metaCopy and metaCopy[key] == value: LOG.verbose('Meta entry is unchanged', '%s = %s' % (key, value)) metaCopy.pop(key, None) if failure: return S_ERROR('Error when setting new metadata, already existing metadata disagrees!') return S_OK() def _registerMetadata(self): """Set metadata for given folders. Register path and metadata before the production actually runs. This allows for the definition of the full chain in 1 go. """ prevent_registration = self.ops.getValue('Production/PreventMetadataRegistration', False) if self.dryrun or prevent_registration: LOG.notice('Would have created and registered the following\n', '\n '.join([' * %s: %s' % (fPath, val) for fPath, val in self.finalMetaDict.iteritems()])) LOG.notice('Would have set this as non searchable metadata', str(self.finalMetaDictNonSearch)) return S_OK() failed = defaultdict(list) for path, meta in sorted(self.finalMetaDict.items()): res = self._createDirectory(path, failed) if not res['OK']: continue LOG.verbose('Checking to set metadata:', meta) metaCopy = dict(meta) res = self._checkMetadata(path, metaCopy) if not res['OK']: return res if not metaCopy: LOG.verbose('No new metadata to set') continue LOG.verbose('Setting metadata information: ', '%s: %s' % (path, metaCopy)) result = self.fc.setMetadata(path.rstrip('/'), metaCopy) if not result['OK']: LOG.error('Could not preset metadata', str(metaCopy)) LOG.error('Could not preset metadata', result['Message']) failed[path].append(result['Message']) for path, meta in sorted(self.finalMetaDictNonSearch.items()): res = self._createDirectory(path, failed) if not res['OK']: continue LOG.verbose('Setting non searchable metadata information: ', '%s: %s' % (path, meta)) result = self.fc.setMetadata(path.rstrip('/'), meta) if not result['OK']: LOG.error('Could not preset non searchable metadata', str(meta)) LOG.error('Could not preset non searchable metadata', result['Message']) failed[path].append(result['Message']) if failed: return S_ERROR('Failed to register some metadata: %s' % dict(failed)) return S_OK() def getMetadata(self): """ Return the corresponding metadata of the last step """ metadict = {} for meta in self.finalMetaDict.values(): metadict.update(meta) if 'NumberOfEvents' in metadict: del metadict['NumberOfEvents'] #As this is not supposed to be a searchable thing return metadict def _setProdParameter(self, prodID, pname, pvalue): """ Set a production parameter. """ if isinstance( pvalue, list ): pvalue = '\n'.join(pvalue) if isinstance( pvalue, (int, long) ): pvalue = str(pvalue) if not self.dryrun: result = self.trc.setTransformationParameter(int(prodID), str(pname), str(pvalue)) if not result['OK']: LOG.error('Problem setting parameter %s for production %s and value:\n%s' % (prodID, pname, pvalue)) else: LOG.notice("Adding %s=%s to transformation" % (str(pname), str(pvalue))) result = S_OK() return result def _jobSpecificParams(self, application): """ For production additional checks are needed: ask the user """ if self.created: return S_ERROR("The production was created, you cannot add new applications to the job.") if not application.logFile: logf = application.appname + "_" + application.version + "_@{STEP_ID}.log" res = application.setLogFile(logf) if not res['OK']: return res #in fact a bit more tricky as the log files have the prodID and jobID in them ### Retrieve from the application the essential info to build the prod info. if not self.nbevts and not self.slicesize: self.nbevts = application.numberOfEvents if not self.nbevts: return S_ERROR("Number of events to process is not defined.") elif not application.numberOfEvents: if not self.slicesize: res = application.setNumberOfEvents(self.jobFileGroupSize * self.nbevts) else: res = application.setNumberOfEvents(self.slicesize) if not res['OK']: return res if application.numberOfEvents > 0 and (self.jobFileGroupSize * self.nbevts > application.numberOfEvents or self.slicesize > application.numberOfEvents): self.nbevts = application.numberOfEvents if not self.energy: if application.energy: self.energy = Decimal((("%1.2f" % float(application.energy)).rstrip('0').rstrip('.'))) else: return S_ERROR("Could not find the energy defined, it is needed for the production definition.") elif not application.energy: res = application.setEnergy(float(self.energy)) if not res['OK']: return res if self.energy: self._setParameter( "Energy", "float", float(self.energy), "Energy used") self.prodparameters["Energy"] = float(self.energy) if not self.evttype: if hasattr(application, 'eventType'): self.evttype = application.eventType else: return S_ERROR("Event type not found nor specified, it's mandatory for the production paths.") self.prodparameters['Process'] = self.evttype if not self.outputStorage: return S_ERROR("You need to specify the Output storage element") curpackage = "%s.%s" % (application.appname, application.version) if "SWPackages" in self.prodparameters: if not self.prodparameters["SWPackages"].count(curpackage): self.prodparameters["SWPackages"] += ";%s" % ( curpackage ) else : self.prodparameters["SWPackages"] = "%s" % (curpackage) if not application.accountInProduction: res = self._updateProdParameters(application) if not res['OK']: return res self.checked = True return S_OK() res = application.setOutputSE(self.outputStorage) if not res['OK']: return res energypath = self.getEnergyPath() if not self.basename: self.basename = self.evttype evttypepath = '' if not self.evttype[-1] == '/': evttypepath = self.evttype + '/' path = self.basepath ###Need to resolve file names and paths if self.energy: self.finalMetaDict[self.basepath + energypath] = {"Energy":str(self.energy)} if hasattr(application, "setOutputRecFile") and not application.willBeCut: evtPath = self.basepath + energypath + evttypepath self.finalMetaDict[evtPath] = {'EvtType': self.evttype} detPath = evtPath + application.detectortype self.finalMetaDict[detPath] = {'DetectorType': application.detectortype} if application.keepRecFile: path = self.basepath + energypath + evttypepath + application.detectortype + '/REC' self.finalMetaDict[path] = {'Datatype': 'REC'} fname = self.basename + '_rec.slcio' application.setOutputRecFile(fname, path) LOG.info('Will store the files under', path) self.finalpaths.append(path) path = self.basepath + energypath + evttypepath + application.detectortype + '/DST' self.finalMetaDict[path] = {'Datatype': 'DST'} fname = self.basename + '_dst.slcio' application.setOutputDstFile(fname, path) LOG.info('Will store the files under', path) self.finalpaths.append(path) elif hasattr(application, "outputFile") and hasattr(application, 'datatype') and not application.outputFile and not application.willBeCut: path = self.basepath + energypath + evttypepath self.finalMetaDict[path] = {"EvtType" : self.evttype} if hasattr(application, "detectortype"): if application.detectortype: path += application.detectortype self.finalMetaDict[path] = {"DetectorType" : application.detectortype} path += '/' elif self.detector: path += self.detector self.finalMetaDict[path] = {"DetectorType" : self.detector} path += '/' if not application.datatype and self.datatype: application.datatype = self.datatype path += application.datatype self.finalMetaDict[path] = {'Datatype' : application.datatype} LOG.info("Will store the files under", "%s" % path) self.finalpaths.append(path) extension = 'stdhep' if application.datatype in ['SIM', 'REC']: extension = 'slcio' fname = self.basename + "_%s" % (application.datatype.lower()) + "." + extension application.setOutputFile(fname, path) self.basepath = path res = self._updateProdParameters(application) if not res['OK']: return res self.checked = True return S_OK() def _updateProdParameters(self, application): """ Update the prod parameters stored in the production parameters visible from the web """ try: self.prodparameters.update(application.prodparameters) except Exception as x: return S_ERROR("Exception: %r" % x ) if hasattr( application, 'extraCLIArguments' ) and application.extraCLIArguments: self.prodparameters['extraCLIArguments'] = repr(application.extraCLIArguments) return S_OK() def _jobSpecificModules(self, application, step): return application._prodjobmodules(step) def getEnergyPath(self): """returns the energy path 250gev or 3tev or 1.4tev etc.""" energy = Decimal(str(self.energy)) tD = Decimal('1000.0') unit = 'gev' if energy < tD else 'tev' energy = energy if energy < tD else energy/tD energyPath = ("%1.2f" % energy).rstrip('0').rstrip('.') energyPath = energyPath+unit+'/' LOG.info("Energy path is: ", energyPath) return energyPath def _checkMetaKeys( self, metakeys, extendFileMeta=False ): """ check if metadata keys are allowed to be metadata :param list metakeys: metadata keys for production metadata :param bool extendFileMeta: also use FileMetaFields for checking meta keys :returns: S_OK, S_ERROR """ res = self.fc.getMetadataFields() if not res['OK']: LOG.error("Could not contact File Catalog") return S_ERROR("Could not contact File Catalog") metaFCkeys = res['Value']['DirectoryMetaFields'].keys() if extendFileMeta: metaFCkeys.extend( res['Value']['FileMetaFields'].keys() ) for key in metakeys: for meta in metaFCkeys: if meta != key and meta.lower() == key.lower(): return self._reportError("Key syntax error %r, should be %r" % (key, meta), name = self.__class__.__name__) if key not in metaFCkeys: return self._reportError("Key %r not found in metadata keys, allowed are %r" % (key, metaFCkeys)) return S_OK() def _checkFindDirectories( self, metadata ): """ find directories by metadata and check that there are directories found :param dict metadata: metadata dictionary :returns: S_OK, S_ERROR """ res = self.fc.findDirectoriesByMetadata(metadata) if not res['OK']: return self._reportError("Error looking up the catalog for available directories") elif len(res['Value']) < 1: return self._reportError('Could not find any directories corresponding to the query issued') return res def setReconstructionBasePaths( self, recPath, dstPath ): """ set the output Base paths for the reconstruction REC and DST files """ self._recBasePaths['REC'] = recPath self._recBasePaths['DST'] = dstPath
class ProductionJob(Job): #pylint: disable=too-many-public-methods, too-many-instance-attributes """ Production job class. Suitable for CLIC studies. Need to sub class and overload for other clients. """ def __init__(self, script=None): super(ProductionJob, self).__init__(script) self.prodVersion = __RCSID__ self.dryrun = False self.created = False self.checked = False self.call_finalization = False self.finalsdict = {} self.transfid = 0 self.type = 'Production' self.csSection = '/Production/Defaults' self.ops = Operations() self.fc = FileCatalogClient() self.trc = TransformationClient() self.defaultProdID = '12345' self.defaultProdJobID = '12345' self.jobFileGroupSize = 1 self.nbtasks = 1 self.slicesize = 0 self.basename = '' self.basepath = self.ops.getValue('/Production/CLIC/BasePath', '/ilc/prod/clic/') self.evttype = '' self.datatype = '' self.energycat = '' self.detector = '' self.currtrans = None self.description = '' self.finalpaths = [] self.finalMetaDict = defaultdict(dict) self.prodMetaDict = {} self.finalMetaDictNonSearch = {} self.metadict_external = {} self.outputStorage = '' self.proxyinfo = getProxyInfo() self.inputdataquery = False self.inputBKSelection = {} self.plugin = 'Standard' self.prodGroup = '' self.prodTypes = [ 'MCGeneration', 'MCSimulation', 'Test', 'MCReconstruction', 'MCReconstruction_Overlay', 'Merge', 'Split', 'MCGeneration_ILD', 'MCSimulation_ILD', 'MCReconstruction_ILD', 'MCReconstruction_Overlay_ILD', 'Split_ILD' ] self.prodparameters = {} self.prodparameters['NbInputFiles'] = 1 self.prodparameters['nbevts'] = 0 #self.prodparameters["SWPackages"] = '' self._addParameter(self.workflow, "IS_PROD", 'JDL', True, "This job is a production job") if not script: self.__setDefaults() self._recBasePaths = {} ############################################################################# def __setDefaults(self): """Sets some default parameters. """ self.setPlatform( self.ops.getValue('%s/Platform' % (self.csSection), 'x86_64-slc5-gcc43-opt')) self.setCPUTime('300000') self.setLogLevel('verbose') self.setJobGroup('@{PRODUCTION_ID}') #version control self._setParameter('productionVersion', 'string', self.prodVersion, 'ProdAPIVersion') #General workflow parameters self._setParameter('PRODUCTION_ID', 'string', self.defaultProdID.zfill(8), 'ProductionID') self._setParameter('JOB_ID', 'string', self.defaultProdJobID.zfill(8), 'ProductionJobID') self._setParameter('Priority', 'JDL', '1', 'Priority') self._setParameter('emailAddress', 'string', '*****@*****.**', 'CrashEmailAddress') def _setParameter(self, name, parameterType, parameterValue, description): """Set parameters checking in CS in case some defaults need to be changed. """ if self.ops.getValue('%s/%s' % (self.csSection, name), ''): self.log.debug( 'Setting %s from CS defaults = %s' % (name, self.ops.getValue('%s/%s' % (self.csSection, name)))) self._addParameter( self.workflow, name, parameterType, self.ops.getValue('%s/%s' % (self.csSection, name), 'default'), description) else: self.log.debug('Setting parameter %s = %s' % (name, parameterValue)) self._addParameter(self.workflow, name, parameterType, parameterValue, description) def setConfig(self, version): """ Define the Configuration package to obtain """ appName = 'ILDConfig' self._addSoftware(appName.lower(), version) self.prodparameters['ILDConfigVersion'] = version self._addParameter(self.workflow, 'ILDConfigPackage', 'JDL', appName + version, 'ILDConfig package') return S_OK() def setClicConfig(self, version): """ Define the ClicConfig package to obtain """ appName = 'ClicConfig' self._addSoftware(appName.lower(), version) self._addParameter(self.workflow, 'ClicConfigPackage', 'JDL', appName + version, 'ClicConfig package') self.prodparameters['ClicConfigVersion'] = version return S_OK() def setDryRun(self, run): """ In case one wants to get all the info as if the prod was being submitted """ self.dryrun = run ############################################################################# def setProdGroup(self, group): """ Sets a user defined tag for the production as appears on the monitoring page """ self.prodGroup = group ############################################################################# def setProdPlugin(self, plugin): """ Sets the plugin to be used to creating the production jobs """ self.plugin = plugin ############################################################################# def setJobFileGroupSize(self, files): """ Sets the number of files to be input to each job created. """ if self.checked: return self._reportError( "This input is needed at the beginning of the production definition: it is \ needed for total number of evts.") self.jobFileGroupSize = files self.prodparameters['NbInputFiles'] = files def setNbEvtsPerSlice(self, nbevts): """ Define the number of events in a slice. """ self.slicesize = nbevts ############################################################################# def setProdType(self, prodType): """Set prod type. """ if prodType not in self.prodTypes: raise TypeError('Prod must be one of %s' % (', '.join(self.prodTypes))) self.setType(prodType) ############################################################################# def setWorkflowName(self, name): """Set workflow name. """ self.workflow.setName(name) self.name = name ############################################################################# def setWorkflowDescription(self, desc): """Set workflow name. """ self.workflow.setDescription(desc) ############################################################################# def createWorkflow(self): """ Create XML for local testing. """ name = '%s.xml' % self.name if os.path.exists(name): shutil.move(name, '%s.backup' % name) self.workflow.toXMLFile(name) ############################################################################# def setOutputSE(self, outputse): """ Define where the output file(s) will go. """ self.outputStorage = outputse return S_OK() ############################################################################# def setInputDataQuery(self, metadata): """ Define the input data query needed """ retMetaKey = self._checkMetaKeys(metadata.keys()) if not retMetaKey['OK']: return retMetaKey if "ProdID" not in metadata: return self._reportError( "Input metadata dictionary must contain at least a key 'ProdID' as reference" ) retDirs = self._checkFindDirectories(metadata) if not retDirs['OK']: return retDirs dirs = retDirs['Value'].values() for mdir in dirs: gLogger.notice("Directory: %s" % mdir) res = self.fc.getDirectoryUserMetadata(mdir) if not res['OK']: return self._reportError( "Error looking up the catalog for directory metadata") compatmeta = res['Value'] compatmeta.update(metadata) if 'EvtType' in compatmeta: self.evttype = JobHelpers.getValue(compatmeta['EvtType'], str, basestring) else: return self._reportError( "EvtType is not in the metadata, it has to be!") if 'NumberOfEvents' in compatmeta: self.nbevts = JobHelpers.getValue(compatmeta['NumberOfEvents'], int, None) self.basename = self.evttype gLogger.notice("MetaData: %s" % compatmeta) gLogger.notice("MetaData: %s" % metadata) if "Energy" in compatmeta: self.energycat = JobHelpers.getValue(compatmeta["Energy"], str, (int, long, basestring)) if self.energycat.count("tev"): self.energy = Decimal("1000.") * Decimal( self.energycat.split("tev")[0]) elif self.energycat.count("gev"): self.energy = Decimal("1.") * Decimal( self.energycat.split("gev")[0]) else: self.energy = Decimal("1.") * Decimal(self.energycat) gendata = False if 'Datatype' in compatmeta: self.datatype = JobHelpers.getValue(compatmeta['Datatype'], str, basestring) if self.datatype == 'gen': gendata = True if "DetectorType" in compatmeta and not gendata: self.detector = JobHelpers.getValue(compatmeta["DetectorType"], str, basestring) self.inputBKSelection = metadata self.inputdataquery = True self.prodparameters['nbevts'] = self.nbevts self.prodparameters["FCInputQuery"] = self.inputBKSelection return S_OK() def setDescription(self, desc): """ Set the production's description :param string desc: Description """ self.description = desc return S_OK() def getBasePath(self): """ Return the base path. Updated by :any:`setInputDataQuery`. """ return self.basepath def addFinalization(self, uploadData=False, registerData=False, uploadLog=False, sendFailover=False): """ Add finalization step :param bool uploadData: Upload or not the data to the storage :param bool uploadLog: Upload log file to storage (currently only available for admins, thus add them to OutputSandbox) :param bool sendFailover: Send Failover requests, and declare files as processed or unused in transfDB :param bool registerData: Register data in the file catalog """ #TODO: Do the registration only once, instead of once for each job self.call_finalization = True self.finalsdict['uploadData'] = uploadData self.finalsdict['registerData'] = registerData self.finalsdict['uploadLog'] = uploadLog self.finalsdict['sendFailover'] = sendFailover def _addRealFinalization(self): """ This is called at creation: now that the workflow is created at the last minute, we need to add this also at the last minute """ importLine = 'from ILCDIRAC.Workflow.Modules.<MODULE> import <MODULE>' dataUpload = ModuleDefinition('UploadOutputData') dataUpload.setDescription('Uploads the output data') self._addParameter(dataUpload, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'UploadOutputData') dataUpload.setBody(body) failoverRequest = ModuleDefinition('FailoverRequest') failoverRequest.setDescription('Sends any failover requests') self._addParameter(failoverRequest, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'FailoverRequest') failoverRequest.setBody(body) registerdata = ModuleDefinition('RegisterOutputData') registerdata.setDescription( 'Module to add in the metadata catalog the relevant info about the files' ) self._addParameter(registerdata, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'RegisterOutputData') registerdata.setBody(body) logUpload = ModuleDefinition('UploadLogFile') logUpload.setDescription('Uploads the output log files') self._addParameter(logUpload, 'enable', 'bool', False, 'EnableFlag') body = importLine.replace('<MODULE>', 'UploadLogFile') logUpload.setBody(body) finalization = StepDefinition('Job_Finalization') finalization.addModule(dataUpload) up = finalization.createModuleInstance('UploadOutputData', 'dataUpload') up.setValue("enable", self.finalsdict['uploadData']) finalization.addModule(registerdata) ro = finalization.createModuleInstance('RegisterOutputData', 'RegisterOutputData') ro.setValue("enable", self.finalsdict['registerData']) finalization.addModule(logUpload) ul = finalization.createModuleInstance('UploadLogFile', 'logUpload') ul.setValue("enable", self.finalsdict['uploadLog']) finalization.addModule(failoverRequest) fr = finalization.createModuleInstance('FailoverRequest', 'failoverRequest') fr.setValue("enable", self.finalsdict['sendFailover']) self.workflow.addStep(finalization) self.workflow.createStepInstance('Job_Finalization', 'finalization') return S_OK() def createProduction(self, name=None): """ Create production. """ if not self.proxyinfo['OK']: return S_ERROR( "Not allowed to create production, you need a ilc_prod proxy.") if 'group' in self.proxyinfo['Value']: group = self.proxyinfo['Value']['group'] if not group == "ilc_prod": return S_ERROR( "Not allowed to create production, you need a ilc_prod proxy." ) else: return S_ERROR( "Could not determine group, you do not have the right proxy.") if self.created: return S_ERROR("Production already created.") ###We need to add the applications to the workflow res = self._addToWorkflow() if not res['OK']: return res if self.call_finalization: self._addRealFinalization() workflowName = self.workflow.getName() fileName = '%s.xml' % workflowName self.log.verbose('Workflow XML file name is:', '%s' % fileName) try: self.createWorkflow() except Exception as x: self.log.error("Exception creating workflow", repr(x)) return S_ERROR('Could not create workflow') with open(fileName, 'r') as oFile: workflowXML = oFile.read() if not name: name = workflowName res = self.trc.getTransformationStats(name) if res['OK']: return self._reportError( "Transformation with name %s already exists! Cannot proceed." % name) ###Create Tranformation Trans = Transformation() Trans.setTransformationName(name) Trans.setDescription(self.description) Trans.setLongDescription(self.description) Trans.setType(self.type) self.prodparameters['JobType'] = self.type Trans.setPlugin(self.plugin) if self.inputdataquery: Trans.setGroupSize(self.jobFileGroupSize) Trans.setTransformationGroup(self.prodGroup) Trans.setBody(workflowXML) if not self.slicesize: Trans.setEventsPerTask(self.jobFileGroupSize * self.nbevts) else: Trans.setEventsPerTask(self.slicesize) self.currtrans = Trans if self.dryrun: self.log.notice('Would create prod called', name) self.transfid = 12345 else: res = Trans.addTransformation() if not res['OK']: print res['Message'] return res self.transfid = Trans.getTransformationID()['Value'] if self.inputBKSelection: res = self.applyInputDataQuery() if not self.dryrun: Trans.setAgentType("Automatic") Trans.setStatus("Active") finals = [] for finalpaths in self.finalpaths: finalpaths = finalpaths.rstrip("/") finalpaths += "/" + str(self.transfid).zfill(8) finals.append(finalpaths) self.finalMetaDict[finalpaths].update({"ProdID": self.transfid}) self.finalMetaDict[finalpaths].update(self.prodMetaDict) # if 'ILDConfigVersion' in self.prodparameters: # self.finalMetaDict[finalpaths].update({"ILDConfig":self.prodparameters['ILDConfigVersion']}) if self.nbevts: self.finalMetaDict[finalpaths].update( {'NumberOfEvents': self.jobFileGroupSize * self.nbevts}) self.finalpaths = finals self.created = True return S_OK() def setNbOfTasks(self, nbtasks): """ Define the number of tasks you want. Useful for generation jobs. """ if not self.currtrans: self.log.error("Not transformation defined earlier") return S_ERROR("No transformation defined") if self.inputBKSelection and self.plugin not in [ 'Limited', 'SlicedLimited' ]: self.log.error( "Meta data selection activated, should not specify the number of jobs" ) return S_ERROR() self.nbtasks = nbtasks self.currtrans.setMaxNumberOfTasks(self.nbtasks) #pylint: disable=E1101 return S_OK() def applyInputDataQuery(self, metadata=None, prodid=None): """ Tell the production to update itself using the metadata query specified, i.e. submit new jobs if new files are added corresponding to same query. """ if not self.transfid and self.currtrans: self.transfid = self.currtrans.getTransformationID()['Value'] #pylint: disable=E1101 elif prodid: self.transfid = prodid if not self.transfid: print "Not transformation defined earlier" return S_ERROR("No transformation defined") if metadata: self.inputBKSelection = metadata client = TransformationClient() if not self.dryrun: res = client.createTransformationInputDataQuery( self.transfid, self.inputBKSelection) if not res['OK']: return res else: self.log.notice("Would use %s as metadata query for production" % str(self.inputBKSelection)) return S_OK() def addMetadataToFinalFiles(self, metadict): """ Add additionnal non-query metadata """ self.metadict_external = metadict return S_OK() def finalizeProd(self, prodid=None, prodinfo=None): """ Finalize definition: submit to Transformation service and register metadata """ currtrans = 0 if self.currtrans: if not self.dryrun: currtrans = self.currtrans.getTransformationID()['Value'] #pylint: disable=E1101 else: currtrans = 12345 if prodid: currtrans = prodid if not currtrans: print "Not transformation defined earlier" return S_ERROR("No transformation defined") if prodinfo: self.prodparameters = prodinfo info = [] info.append('%s Production %s has following parameters:\n' % (self.prodparameters['JobType'], currtrans)) if "Process" in self.prodparameters: info.append('- Process %s' % self.prodparameters['Process']) if "Energy" in self.prodparameters: info.append('- Energy %s GeV' % self.prodparameters["Energy"]) if not self.slicesize: self.prodparameters['nbevts'] = self.jobFileGroupSize * self.nbevts else: self.prodparameters['nbevts'] = self.slicesize if self.prodparameters['nbevts']: info.append("- %s events per job" % (self.prodparameters['nbevts'])) if self.prodparameters.get('lumi', False): info.append(' corresponding to a luminosity %s fb' % (self.prodparameters['lumi'] * \ self.prodparameters['NbInputFiles'])) if 'FCInputQuery' in self.prodparameters: info.append('Using InputDataQuery :') for key, val in self.prodparameters['FCInputQuery'].iteritems(): info.append(' %s = %s' % (key, val)) if "SWPackages" in self.prodparameters: info.append('- SW packages %s' % self.prodparameters["SWPackages"]) if "SoftwareTag" in self.prodparameters: info.append('- SW tags %s' % self.prodparameters["SoftwareTag"]) if "ILDConfigVersion" in self.prodparameters: info.append('- ILDConfig %s' % self.prodparameters['ILDConfigVersion']) if 'ClicConfigVersion' in self.prodparameters: info.append('- ClicConfig %s' % self.prodparameters['ClicConfigVersion']) if 'extraCLIArguments' in self.prodparameters: info.append('- ExtraCLIArguments %s' % self.prodparameters['extraCLIArguments']) # as this is the very last call all applications are registered, so all software packages are known #add them the the metadata registration for finalpath in self.finalpaths: if finalpath not in self.finalMetaDictNonSearch: self.finalMetaDictNonSearch[finalpath] = {} if "SWPackages" in self.prodparameters: self.finalMetaDictNonSearch[finalpath][ "SWPackages"] = self.prodparameters["SWPackages"] if self.metadict_external: self.finalMetaDictNonSearch[finalpath].update( self.metadict_external) info.append('- Registered metadata: ') for path, metadata in sorted(self.finalMetaDict.iteritems()): info.append(' %s = %s' % (path, metadata)) info.append('- Registered non searchable metadata: ') for path, metadata in sorted(self.finalMetaDictNonSearch.iteritems()): info.append(' %s = %s' % (path, metadata)) pprint.pprint(info) infoString = '\n'.join(info) self.prodparameters['DetailedInfo'] = infoString for name, val in self.prodparameters.iteritems(): result = self._setProdParameter(currtrans, name, val) if not result['OK']: self.log.error(result['Message']) res = self._registerMetadata() if not res['OK']: self.log.error("Could not register the following directories :", "%s" % str(res)) return S_OK() ############################################################################# def _registerMetadata(self): """ Private method Register path and metadata before the production actually runs. This allows for the definition of the full chain in 1 go. """ prevent_registration = self.ops.getValue( "Production/PreventMetadataRegistration", False) if self.dryrun or prevent_registration: self.log.notice( "Would have created and registered the following\n", "\n ".join([ " * %s: %s" % (par, val) for par, val in self.finalMetaDict.iteritems() ])) self.log.notice("Would have set this as non searchable metadata", str(self.finalMetaDictNonSearch)) return S_OK() failed = [] for path, meta in self.finalMetaDict.items(): result = self.fc.createDirectory(path) if result['OK']: if result['Value']['Successful']: if path in result['Value']['Successful']: self.log.verbose("Successfully created directory:", "%s" % path) res = self.fc.changePathMode({path: 0o775}, False) if not res['OK']: self.log.error(res['Message']) failed.append(path) elif result['Value']['Failed']: if path in result['Value']['Failed']: self.log.error( 'Failed to create directory:', "%s" % str(result['Value']['Failed'][path])) failed.append(path) else: self.log.error('Failed to create directory:', result['Message']) failed.append(path) ## Get existing metadata, if it is the same don't set it again, otherwise throw error existingMetadata = self.fc.getDirectoryUserMetadata( path.rstrip("/")) metaCopy = dict(meta) if existingMetadata['OK']: failure = False for key, value in existingMetadata['Value'].iteritems(): if key in meta and meta[key] != value: self.log.error( "Metadata values for folder %s disagree for key %s: Existing(%r), new(%r)" % (path, key, value, meta[key])) failure = True elif key in meta and meta[key] == value: metaCopy.pop(key, None) if failure: return S_ERROR( "Error when setting new metadata, already existing metadata disagrees!" ) result = self.fc.setMetadata(path.rstrip("/"), metaCopy) if not result['OK']: self.log.error("Could not preset metadata", "%s" % str(metaCopy)) self.log.error("Could not preset metadata", "%s" % result['Message']) for path, meta in self.finalMetaDictNonSearch.items(): result = self.fc.createDirectory(path) if result['OK']: if result['Value']['Successful']: if path in result['Value']['Successful']: self.log.verbose("Successfully created directory:", "%s" % path) res = self.fc.changePathMode({path: 0o775}, False) if not res['OK']: self.log.error(res['Message']) failed.append(path) elif result['Value']['Failed']: if path in result['Value']['Failed']: self.log.error( 'Failed to create directory:', "%s" % str(result['Value']['Failed'][path])) failed.append(path) else: self.log.error('Failed to create directory:', result['Message']) failed.append(path) result = self.fc.setMetadata(path.rstrip("/"), meta) if not result['OK']: self.log.error("Could not preset metadata", "%s" % str(meta)) if failed: return {'OK': False, 'Failed': failed} return S_OK() def getMetadata(self): """ Return the corresponding metadata of the last step """ metadict = {} for meta in self.finalMetaDict.values(): metadict.update(meta) if 'NumberOfEvents' in metadict: del metadict[ 'NumberOfEvents'] #As this is not supposed to be a searchable thing return metadict def _setProdParameter(self, prodID, pname, pvalue): """ Set a production parameter. """ if isinstance(pvalue, list): pvalue = '\n'.join(pvalue) prodClient = RPCClient('Transformation/TransformationManager', timeout=120) if isinstance(pvalue, (int, long)): pvalue = str(pvalue) if not self.dryrun: result = prodClient.setTransformationParameter( int(prodID), str(pname), str(pvalue)) if not result['OK']: self.log.error( 'Problem setting parameter %s for production %s and value:\n%s' % (prodID, pname, pvalue)) else: self.log.notice("Adding %s=%s to transformation" % (str(pname), str(pvalue))) result = S_OK() return result def _jobSpecificParams(self, application): """ For production additional checks are needed: ask the user """ if self.created: return S_ERROR( "The production was created, you cannot add new applications to the job." ) if not application.logFile: logf = application.appname + "_" + application.version + "_@{STEP_ID}.log" res = application.setLogFile(logf) if not res['OK']: return res #in fact a bit more tricky as the log files have the prodID and jobID in them ### Retrieve from the application the essential info to build the prod info. if not self.nbevts and not self.slicesize: self.nbevts = application.numberOfEvents if not self.nbevts: return S_ERROR("Number of events to process is not defined.") elif not application.numberOfEvents: if not self.slicesize: res = application.setNumberOfEvents(self.jobFileGroupSize * self.nbevts) else: res = application.setNumberOfEvents(self.slicesize) if not res['OK']: return res if application.numberOfEvents > 0 and ( self.jobFileGroupSize * self.nbevts > application.numberOfEvents or self.slicesize > application.numberOfEvents): self.nbevts = application.numberOfEvents if not self.energy: if application.energy: self.energy = Decimal(str(application.energy)) else: return S_ERROR( "Could not find the energy defined, it is needed for the production definition." ) elif not application.energy: res = application.setEnergy(float(self.energy)) if not res['OK']: return res if self.energy: self._setParameter("Energy", "float", float(self.energy), "Energy used") self.prodparameters["Energy"] = float(self.energy) if not self.evttype: if hasattr(application, 'eventType'): self.evttype = application.eventType else: return S_ERROR( "Event type not found nor specified, it's mandatory for the production paths." ) self.prodparameters['Process'] = self.evttype if not self.outputStorage: return S_ERROR("You need to specify the Output storage element") curpackage = "%s.%s" % (application.appname, application.version) if "SWPackages" in self.prodparameters: if not self.prodparameters["SWPackages"].count(curpackage): self.prodparameters["SWPackages"] += ";%s" % (curpackage) else: self.prodparameters["SWPackages"] = "%s" % (curpackage) if not application.accountInProduction: res = self._updateProdParameters(application) if not res['OK']: return res self.checked = True return S_OK() res = application.setOutputSE(self.outputStorage) if not res['OK']: return res energypath = self.getEnergyPath() if not self.basename: self.basename = self.evttype evttypepath = '' if not self.evttype[-1] == '/': evttypepath = self.evttype + '/' path = self.basepath ###Need to resolve file names and paths if self.energy: self.finalMetaDict[self.basepath + energypath] = { "Energy": str(self.energy) } if hasattr(application, "setOutputRecFile") and not application.willBeCut: path = self.basepath + energypath + evttypepath + application.detectortype + "/REC" self.finalMetaDict[self.basepath + energypath + evttypepath] = { "EvtType": self.evttype } self.finalMetaDict[self.basepath + energypath + evttypepath + application.detectortype] = { "DetectorType": application.detectortype } self.finalMetaDict[self.basepath + energypath + evttypepath + application.detectortype + "/REC"] = { 'Datatype': "REC" } fname = self.basename + "_rec.slcio" application.setOutputRecFile(fname, path) self.log.info("Will store the files under", "%s" % path) self.finalpaths.append(path) path = self.basepath + energypath + evttypepath + application.detectortype + "/DST" self.finalMetaDict[self.basepath + energypath + evttypepath + application.detectortype + "/DST"] = { 'Datatype': "DST" } fname = self.basename + "_dst.slcio" application.setOutputDstFile(fname, path) self.log.info("Will store the files under", "%s" % path) self.finalpaths.append(path) elif hasattr(application, "outputFile") and hasattr( application, 'datatype' ) and not application.outputFile and not application.willBeCut: path = self.basepath + energypath + evttypepath self.finalMetaDict[path] = {"EvtType": self.evttype} if hasattr(application, "detectortype"): if application.detectortype: path += application.detectortype self.finalMetaDict[path] = { "DetectorType": application.detectortype } path += '/' elif self.detector: path += self.detector self.finalMetaDict[path] = {"DetectorType": self.detector} path += '/' if not application.datatype and self.datatype: application.datatype = self.datatype path += application.datatype self.finalMetaDict[path] = {'Datatype': application.datatype} self.log.info("Will store the files under", "%s" % path) self.finalpaths.append(path) extension = 'stdhep' if application.datatype in ['SIM', 'REC']: extension = 'slcio' fname = self.basename + "_%s" % ( application.datatype.lower()) + "." + extension application.setOutputFile(fname, path) self.basepath = path res = self._updateProdParameters(application) if not res['OK']: return res self.checked = True return S_OK() def _updateProdParameters(self, application): """ Update the prod parameters stored in the production parameters visible from the web """ try: self.prodparameters.update(application.prodparameters) except Exception as x: return S_ERROR("Exception: %r" % x) if hasattr(application, 'extraCLIArguments') and application.extraCLIArguments: self.prodparameters['extraCLIArguments'] = repr( application.extraCLIArguments) return S_OK() def _jobSpecificModules(self, application, step): return application._prodjobmodules(step) def getEnergyPath(self): """returns the energy path 250gev or 3tev or 1.4tev etc.""" energy = Decimal(str(self.energy)) tD = Decimal('1000.0') unit = 'gev' if energy < tD else 'tev' energy = energy if energy < tD else energy / tD if float(energy).is_integer(): energyPath = str(int(energy)) else: energyPath = "%1.1f" % energy energyPath = energyPath + unit + '/' self.log.info("Energy path is: ", energyPath) return energyPath def _checkMetaKeys(self, metakeys, extendFileMeta=False): """ check if metadata keys are allowed to be metadata :param list metakeys: metadata keys for production metadata :param bool extendFileMeta: also use FileMetaFields for checking meta keys :returns: S_OK, S_ERROR """ res = self.fc.getMetadataFields() if not res['OK']: print "Could not contact File Catalog" return S_ERROR("Could not contact File Catalog") metaFCkeys = res['Value']['DirectoryMetaFields'].keys() if extendFileMeta: metaFCkeys.extend(res['Value']['FileMetaFields'].keys()) for key in metakeys: for meta in metaFCkeys: if meta != key and meta.lower() == key.lower(): return self._reportError( "Key syntax error %r, should be %r" % (key, meta), name=self.__class__.__name__) if key not in metaFCkeys: return self._reportError( "Key %r not found in metadata keys, allowed are %r" % (key, metaFCkeys)) return S_OK() def _checkFindDirectories(self, metadata): """ find directories by metadata and check that there are directories found :param dict metadata: metadata dictionary :returns: S_OK, S_ERROR """ res = self.fc.findDirectoriesByMetadata(metadata) if not res['OK']: return self._reportError( "Error looking up the catalog for available directories") elif len(res['Value']) < 1: return self._reportError( 'Could not find any directories corresponding to the query issued' ) return res def setReconstructionBasePaths(self, recPath, dstPath): """ set the output Base paths for the reconstruction REC and DST files """ self._recBasePaths['REC'] = recPath self._recBasePaths['DST'] = dstPath
def chown(directories, user=None, group=None, mode=None, recursive=False, ndirs=None, fcClient=None): """ This method may change the user, group or mode of a directory and apply it recursively if required """ if ndirs is None: ndirs = 0 if not directories: return S_OK(ndirs) if isinstance(directories, basestring): directories = directories.split(',') if fcClient is None: fcClient = FileCatalogClient() if user is not None: res = fcClient.changePathOwner(dict.fromkeys(directories, user)) if not res['OK']: res = fcClient.changePathOwner( dict.fromkeys(directories, {'Owner': user})) if not res['OK']: return res if group is not None: res = fcClient.changePathGroup(dict.fromkeys(directories, group)) if not res['OK']: res = fcClient.changePathGroup( dict.fromkeys(directories, {'Group': group})) if not res['OK']: return res if mode is not None: res = fcClient.changePathMode(dict.fromkeys(directories, mode)) if not res['OK']: res = fcClient.changePathMode( dict.fromkeys(directories, {'Mode': mode})) if not res['OK']: return res if recursive: for subDir in directories: if ndirs % 10 == 0: sys.stdout.write('.') sys.stdout.flush() ndirs += 1 res = fcClient.listDirectory(subDir) if res['OK']: subDirectories = res['Value']['Successful'][subDir]['SubDirs'] if subDirectories: # print subDir, len( subDirectories ), ndirs res = chown(subDirectories, user, group=group, mode=mode, recursive=True, ndirs=ndirs, fcClient=fcClient) if not res['OK']: return res ndirs = res['Value'] else: ndirs += len(directories) return S_OK(ndirs)