class BenderScript(GaudiBase): """The application handler for BenderScript The user specifies: - script file(s) which contain BenderScript scripts, - configuration/Configurables files, to be used for 'importOptions' - set of command line arguments At least one 'script' or 'import' file is required. The application executes the following line: bender {scripts} {arguments} --import {imports} --no-color --no-castor --import=data.py --batch e.g. bender script1.py --import import1.py -w -p5 --no-color --no-castor --import=data.py --batch where data.py a file with input data and xml catalog, automatically generated by Ganga/DIRAC """ _name = 'BenderScript' _category = 'applications' _exportmethods = GaudiBase._exportmethods[:] _exportmethods += ['prepare', 'unprepare'] _schema = GaudiBase._schema.inherit_copy() _schema.datadict['package'] = SimpleItem( defvalue = None, typelist = ['str', 'type(None)'], doc = """The package the application belongs to (e.g. 'Sim', 'Phys') """ ) _schema.datadict['masterpackage'] = SimpleItem ( defvalue = None, typelist = [ 'str', 'type(None)' ], doc = """The package where your top level requirements file is read from. Can be written either as a path 'Tutorial/Analysis/v6r0' or in traditional notation 'Analysis v6r0 Tutorial' """ ) _schema.datadict['setupProjectOptions'] = SimpleItem( defvalue = '' , typelist = [ 'str', 'type(None)'], doc = """Extra options to be passed onto the SetupProject command used for configuring the environment. As an example setting it to '--dev' will give access to the DEV area. For full documentation of the available options see https://twiki.cern.ch/twiki/bin/view/LHCb/SetupProject """ ) _schema.datadict['scripts'] = FileItem( preparable = 1 , sequence = 1 , strict_sequence = 0 , defvalue = [] , doc = """The names of the script files to execute. A copy will be made at submission time """ ) _schema.datadict['imports'] = FileItem ( preparable = 1 , sequence = 1 , strict_sequence = 0 , defvalue = [] , doc = """The names of the files to be used for 'importOptions'. A copy will be made at submission time """ ) _schema.datadict['commands'] = SimpleItem( defvalue = [] , typelist = ['str'] , sequence = 1 , doc = """The commands to be executed, e.g. [ 'run(10)' , 'print ls()' , 'print dir()' ] """ ) _schema.datadict['arguments'] = SimpleItem( defvalue = [] , typelist = ['str'] , sequence = 1 , doc = """List of command-line arguments for bender script, e.g. ['-w','-p5'], etc. For python scripts and configuration/Configurable files for 'importOptions' it is much better to use the separate options 'scripts' and 'imports' Following arguments will be appended automatically: --no-color, --no-castor and --batch """ ) _schema.version.major += 2 _schema.version.minor += 0 #def __init__(self): # super(BenderScrip, self).__init__() def _get_default_version(self, gaudi_app): return guess_version(self, gaudi_app) def _auto__init__(self): if not self.appname : self.appname = 'Bender' self._init() def _getshell(self): import EnvironFunctions return EnvironFunctions._getshell(self) def prepare(self, force=False): super(BenderScript, self).prepare(force) self._check_inputs() share_dir = os.path.join ( expandfilename ( getConfig('Configuration')['gangadir'] ) , 'shared' , getConfig('Configuration')['user'] , self.is_prepared.name ) input_sandbox_tar = os.path.join ( share_dir , 'inputsandbox', '_input_sandbox_%s.tar' % self.is_prepared.name ) input_sandbox_tgz = os.path.join ( share_dir , 'inputsandbox', '_input_sandbox_%s.tgz' % self.is_prepared.name ) fillPackedSandbox ( self.scripts + self.imports , input_sandbox_tar ) gzipFile ( input_sandbox_tar , input_sandbox_tgz , True ) # add the newly created shared directory into the metadata system if # the app is associated with a persisted object self.checkPreparedHasParent(self) self.post_prepare() logger.debug("Finished Preparing Application in %s" % share_dir) def master_configure(self): return (None, StandardJobConfig()) def configure(self, master_appconfig): ## strip leading and trailing blanks from arguments self.arguments = [ a.strip() for a in self.arguments ] ## strip leading and trailing blanks from the command self.commands = [ a.strip() for a in self.commands ] ## the script layout the_script = layout.format ( scripts = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.scripts ] , imports = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.imports ] , arguments = self.arguments , command = self.commands ) print 'SCRIPT:\n', the_script # add summary.xml outputsandbox_temp = XMLPostProcessor._XMLJobFiles() outputsandbox_temp += unique(self.getJobObject().outputsandbox) outputsandbox = unique(outputsandbox_temp) input_files = [] input_files += [ FileBuffer('gaudipython-wrapper.py', the_script ) ] logger.debug("Returning StandardJobConfig") return (None, StandardJobConfig(inputbox=input_files, outputbox=outputsandbox)) def _check_inputs(self): """Checks the validity of user's entries for BenderScript schema""" if not self.scripts and not self.imports : raise ApplicationConfigurationError("Application scripts/imports are not defined") if isinstance ( self.scripts , str ) : self.scripts = [ File ( self.scripts ) ] if isinstance ( self.imports , str ) : self.imports = [ File ( self.imports ) ] for f in self.scripts : f.name = fullpath ( f.name ) for f in self.imports : f.name = fullpath ( f.name ) def postprocess(self): XMLPostProcessor.postprocess(self, logger)
class Bender(GaudiBase): """The Bender application handler The user specifies a module file (via Bender.module) which contains a Bender python module and the number of events they want to run on (via Bender.events). The user's module is then run on the data by calling: USERMODULE.configure(EventSelectorInput,FileCatalogCatalogs) USERMODULE.run(NUMEVENTS) """ _name = 'Bender' _category = 'applications' _exportmethods = GaudiBase._exportmethods[:] _exportmethods += ['prepare', 'unprepare'] _schema = GaudiBase._schema.inherit_copy() docstr = 'The package the application belongs to (e.g. "Sim", "Phys")' _schema.datadict['package'] = SimpleItem(defvalue=None, typelist=['str', 'type(None)'], doc=docstr) docstr = 'The package where your top level requirements file is read ' \ 'from. Can be written either as a path ' \ '\"Tutorial/Analysis/v6r0\" or in traditional notation ' \ '\"Analysis v6r0 Tutorial\"' _schema.datadict['masterpackage'] = SimpleItem( defvalue=None, typelist=['str', 'type(None)'], doc=docstr) docstr = 'Extra options to be passed onto the SetupProject command '\ 'used for configuring the environment. As an example '\ 'setting it to \'--dev\' will give access to the DEV area. '\ 'For full documentation of the available options see '\ 'https://twiki.cern.ch/twiki/bin/view/LHCb/SetupProject' _schema.datadict['setupProjectOptions'] = SimpleItem( defvalue='', typelist=['str', 'type(None)'], doc=docstr) docstr = 'The name of the module to import. A copy will be made ' \ 'at submission time' _schema.datadict['module'] = FileItem(preparable=1, defvalue=File(), doc=docstr) docstr = 'The name of the Gaudi application (Bender)' _schema.datadict['project'] = SimpleItem(preparable=1, defvalue='Bender', hidden=1, protected=1, typelist=['str'], doc=docstr) docstr = 'The number of events ' _schema.datadict['events'] = SimpleItem(defvalue=-1, typelist=['int'], doc=docstr) docstr = 'Parameres for module ' _schema.datadict['params'] = SimpleItem( defvalue={}, typelist=['dict', 'str', 'int', 'bool', 'float'], doc=docstr) _schema.version.major += 2 _schema.version.minor += 0 #def __init__(self): # super(Bender, self).__init__() def _get_default_version(self, gaudi_app): return guess_version(self, gaudi_app) def _auto__init__(self): if (not self.appname) and (not self.project): self.project = 'Bender' # default if (not self.appname): self.appname = self.project self._init() def _getshell(self): import EnvironFunctions return EnvironFunctions._getshell(self) def prepare(self, force=False): super(Bender, self).prepare(force) self._check_inputs() share_dir = os.path.join( expandfilename(getConfig('Configuration')['gangadir']), 'shared', getConfig('Configuration')['user'], self.is_prepared.name) fillPackedSandbox([self.module], os.path.join( share_dir, 'inputsandbox', '_input_sandbox_%s.tar' % self.is_prepared.name)) gzipFile( os.path.join(share_dir, 'inputsandbox', '_input_sandbox_%s.tar' % self.is_prepared.name), os.path.join(share_dir, 'inputsandbox', '_input_sandbox_%s.tgz' % self.is_prepared.name), True) # add the newly created shared directory into the metadata system if # the app is associated with a persisted object self.checkPreparedHasParent(self) self.post_prepare() logger.debug("Finished Preparing Application in %s" % share_dir) def master_configure(self): return (None, StandardJobConfig()) def configure(self, master_appconfig): # self._configure() modulename = split(self.module.name)[-1].split('.')[0] script = """ from copy import deepcopy from Gaudi.Configuration import * importOptions('data.py') import %s as USERMODULE EventSelectorInput = deepcopy(EventSelector().Input) FileCatalogCatalogs = deepcopy(FileCatalog().Catalogs) EventSelector().Input=[] FileCatalog().Catalogs=[]\n""" % modulename script_configure = "USERMODULE.configure(EventSelectorInput,FileCatalogCatalogs%s)\n" if self.params: param_string = ",params=%s" % self.params else: param_string = "" script_configure = script_configure % param_string script += script_configure script += "USERMODULE.run(%d)\n" % self.events script += getXMLSummaryScript() # add summary.xml outputsandbox_temp = XMLPostProcessor._XMLJobFiles() outputsandbox_temp += unique(self.getJobObject().outputsandbox) outputsandbox = unique(outputsandbox_temp) input_files = [] input_files += [FileBuffer('gaudipython-wrapper.py', script)] logger.debug("Returning StandardJobConfig") return (None, StandardJobConfig(inputbox=input_files, outputbox=outputsandbox)) def _check_inputs(self): """Checks the validity of user's entries for GaudiPython schema""" # Always check for None OR empty #logger.info("self.module: %s" % str(self.module)) if isType(self.module, str): self.module = File(self.module) if self.module.name == None: raise ApplicationConfigurationError( "Application Module not requested") elif self.module.name == "": raise ApplicationConfigurationError( "Application Module not requested") else: # Always check we've been given a FILE! self.module.name = fullpath(self.module.name) if not os.path.isfile(self.module.name): msg = 'Module file %s not found.' % self.module.name raise ApplicationConfigurationError(msg) def postprocess(self): XMLPostProcessor.postprocess(self, logger)
class AMIDataset(DQ2Dataset): '''ATLAS DDM Dataset With AMI Connection''' _category = 'datasets' _name = 'AMIDataset' _schema = DQ2Dataset._schema.inherit_copy() _schema.datadict['logicalDatasetName'] = SimpleItem(defvalue='', doc='') _schema.datadict['project'] = SimpleItem(defvalue='Atlas_Production', doc='') _schema.datadict['processingStep'] = SimpleItem( defvalue='Atlas_Production', doc='') _schema.datadict['amiStatus'] = SimpleItem(defvalue='VALID', doc='') _schema.datadict['entity'] = SimpleItem(defvalue='dataset', doc='') _schema.datadict['metadata'] = SimpleItem(defvalue={}, doc="Metadata") _schema.datadict['provenance'] = SimpleItem(defvalue=[], typelist=['str'], sequence=1, doc='Dataset provenance chain') _schema.datadict['goodRunListXML'] = FileItem( doc='GoodRunList XML file to search on') _exportmethods = [ 'search', 'fill_provenance', 'get_datasets_metadata', 'get_files_metadata', 'get_contents' ] def __init__(self): super(AMIDataset, self).__init__() def fill_provenance(self, extraargs=[]): dataType = "" if (len(extraargs) > 1): dataType = extraargs[1] self.provenance = [] for d in self.dataset: logger.info("Filling provenance info for dataset %s...", d) prov = [] self.provenance.append(prov) ds = d[:-1] argument = [] argument.append("ListDatasetProvenance") argument.append("logicalDatasetName=" + ds) argument.append('-output=xml') result = amiclient.execute(argument) dom = result.getAMIdom() graph = dom.getElementsByTagName('graph') nFound = 0 dictOfLists = {} for line in graph: nodes = line.getElementsByTagName('node') for node in nodes: level = int(node.attributes['level'].value) dataset = node.attributes['name'].value if (len(dataType) > 0) and (dataset.find(dataType) >= 0): # print only selected dataType levelList = dictOfLists.get(level, []) levelList.append(dataset) dictOfLists[level] = levelList nFound = nFound + 1 elif (len(dataType) == 0): #print everything levelList = dictOfLists.get(level, []) levelList.append(dataset) dictOfLists[level] = levelList #print level,dictOfLists[level] nFound = nFound + 1 if (nFound == 0) and (len(dataType) > 0): logger.warning("No datasets found of type", dataType) else: keys = dictOfLists.keys() keys.sort() for key in keys: datasetList = dictOfLists.get(key) datasetList.sort() #print "generation =",key #for dataset in datasetList: # print " ",dataset for dataset in datasetList: prov.append("%s/" % dataset.strip()) def search(self, pattern='', maxresults=config['MaxNumOfDatasets'], extraargs=[]): argument = [] dsetList = [] if self.goodRunListXML.name != '': # open the GRL if os.path.exists(self.goodRunListXML.name): logger.info("Good Run List '%s' file selected" % self.goodRunListXML.name) grl_text = open(self.goodRunListXML.name).read() else: logger.error('Could not read Good Run List XML file') return [] argument = [] argument.append("GetGoodDatasetList") argument.append("prodStep=merge") #argument.append("dataType=%s" % self.dataType) argument.append("goodRunList=%s" % grl_text) argument.append("logicalDatasetName=%s" % self.logicalDatasetName) elif self.logicalDatasetName: pattern = self.logicalDatasetName pattern = pattern.replace("/", "") pattern = pattern.replace('*', '%') limit = "0,%d" % config['MaxNumOfDatasets'] argument.append("SearchQuery") argument.append("entity=" + self.entity) argument.append( "glite=SELECT logicalDatasetName WHERE amiStatus='" + self.amiStatus + "' AND logicalDatasetName like '" + pattern + "' LIMIT " + limit) argument.append("project=" + self.project) argument.append("processingStep=" + self.processingStep) argument.append("mode=defaultField") argument.extend(extraargs) else: logger.error( "AMI search not set up correctly. No datasetname or good runs list supplied." ) return [] try: result = amiclient.execute(argument) if argument[0] == "GetGoodDatasetList": # GRL has different output res_text = result.output() dsetList = [] for ln in res_text.split('\n'): if ln.find("logicalDatasetName") != -1: # add to the dataset list - check container... if resolve_dataset_name(ln.split('=')[1].strip()): dsetList.append(ln.split('=')[1].strip() + "/") else: dsetList.append(ln.split('=')[1].strip()) else: resultDict = result.getDict() resultByRow = resultDict['Element_Info'] for row, vals in resultByRow.iteritems(): dsName = str(vals['logicalDatasetName']) # check with DQ2 since AMI doesn't store / if resolve_dataset_name(dsName): dsName += '/' dsetList.append(dsName) except Exception as msg: logger.error(msg) return dsetList def get_datasets_metadata(self): datasets = self.search() metadata = [] for dataset in datasets: dataset = dataset.replace("/", "") tmp = get_metadata(dataset=dataset) metadata.append(tmp) return metadata def get_files_metadata(self, all=False): datasets = self.search() metadata = {} for dataset in datasets: dataset = dataset.replace("/", "") job = self._getParent() file_info = get_file_metadata(dataset=dataset, all=all, numevtsperfile=0) metadata.update(file_info) return metadata