Example #1
0
class BenderScript(GaudiBase):    
    """The application handler for BenderScript
    
    The user specifies:
    - script file(s) which contain BenderScript scripts,
    - configuration/Configurables files, to be used for 'importOptions'
    - set of command line arguments
    
    At least one 'script' or 'import' file is required.
    
    The application executes the following line:
    
    bender {scripts} {arguments} --import {imports} --no-color --no-castor --import=data.py --batch
    
    e.g.
    
    bender script1.py  --import import1.py -w -p5 --no-color --no-castor --import=data.py --batch
    
    where data.py a file with input data and xml catalog, automatically generated by Ganga/DIRAC
    
    """
     
    _name           = 'BenderScript'
    _category       = 'applications'
    _exportmethods  = GaudiBase._exportmethods[:]
    _exportmethods += ['prepare', 'unprepare']
    
    _schema = GaudiBase._schema.inherit_copy()
    
    _schema.datadict['package'] = SimpleItem(
        defvalue = None,
        typelist = ['str', 'type(None)'],
        doc      = """The package the application belongs to (e.g. 'Sim', 'Phys')
        """
        )
    _schema.datadict['masterpackage'] = SimpleItem (
        defvalue = None,
        typelist = [ 'str', 'type(None)' ],
        doc      = """The package where your top level requirements file is read from.
        Can be written either as a path 'Tutorial/Analysis/v6r0' or in traditional notation 
        'Analysis v6r0 Tutorial'
        """
        )
    
    _schema.datadict['setupProjectOptions'] = SimpleItem(
        defvalue = ''     ,
        typelist = [ 'str', 'type(None)'],
        doc      = """Extra options to be passed onto the SetupProject command
        used for configuring the environment. As an example 
        setting it to '--dev' will give access to the DEV area. 
        For full documentation of the available options see 
        https://twiki.cern.ch/twiki/bin/view/LHCb/SetupProject
        """
        )
    
    _schema.datadict['scripts'] = FileItem(
        preparable      = 1      ,
        sequence        = 1      ,
        strict_sequence = 0      ,
        defvalue        = []     ,
        doc             = """The names of the script files to execute.
        A copy will be made at submission time
        """
        )
    
    _schema.datadict['imports'] = FileItem (
        preparable      =  1     ,
        sequence        =  1     ,
        strict_sequence =  0     ,
        defvalue        = []     ,
        doc             = """The names of the files to be used for 'importOptions'.
        A copy will be made at submission time
        """
        )
    
    _schema.datadict['commands'] = SimpleItem(
        defvalue = []      ,
        typelist = ['str'] ,
        sequence =  1      ,
        doc      = """The commands to be executed,
        e.g. [ 'run(10)' , 'print ls()' , 'print dir()' ]
        """
        )
    
    _schema.datadict['arguments'] = SimpleItem(
        defvalue = []      ,
        typelist = ['str'] ,
        sequence =  1      ,
        doc      = """List of command-line arguments for bender script,
        e.g. ['-w','-p5'], etc.
        For python scripts and configuration/Configurable files for 'importOptions'
        it is much better to use the separate options 'scripts' and 'imports'
        Following arguments will be appended automatically:  --no-color, --no-castor and --batch
        """
        )
    
    _schema.version.major += 2
    _schema.version.minor += 0
    
    #def __init__(self):
    #    super(BenderScrip, self).__init__()

    def _get_default_version(self, gaudi_app):
        return guess_version(self, gaudi_app)

    def _auto__init__(self):
        if not self.appname : self.appname = 'Bender'
        self._init()

    def _getshell(self):

        import EnvironFunctions
        return EnvironFunctions._getshell(self)

    def prepare(self, force=False):

        super(BenderScript, self).prepare(force)
        self._check_inputs()

        
        share_dir = os.path.join (
            expandfilename ( getConfig('Configuration')['gangadir'] ) ,
            'shared'                            ,
            getConfig('Configuration')['user']  ,
            self.is_prepared.name               )
        
        input_sandbox_tar = os.path.join ( share_dir , 'inputsandbox',
                                           '_input_sandbox_%s.tar' % self.is_prepared.name ) 
        input_sandbox_tgz = os.path.join ( share_dir , 'inputsandbox',
                                           '_input_sandbox_%s.tgz' % self.is_prepared.name ) 
        
        fillPackedSandbox ( self.scripts + self.imports , input_sandbox_tar  ) 
        gzipFile          ( input_sandbox_tar , input_sandbox_tgz , True     )
        
        # add the newly created shared directory into the metadata system if
        # the app is associated with a persisted object
        self.checkPreparedHasParent(self)
        self.post_prepare()
        logger.debug("Finished Preparing Application in %s" % share_dir)

    def master_configure(self):
        return (None, StandardJobConfig())

    def configure(self, master_appconfig):
        
        ## strip leading and trailing blanks from arguments 
        self.arguments = [ a.strip() for a in self.arguments ]

        ## strip leading and trailing blanks from the command 
        self.commands  = [ a.strip() for a in self.commands  ]
        
        ## the script layout
        the_script    = layout.format (
            scripts   = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.scripts ] , 
            imports   = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.imports ] , 
            arguments = self.arguments  ,
            command   = self.commands    
            )

        print 'SCRIPT:\n', the_script
        
        # add summary.xml
        outputsandbox_temp  = XMLPostProcessor._XMLJobFiles()
        outputsandbox_temp += unique(self.getJobObject().outputsandbox)
        outputsandbox       = unique(outputsandbox_temp)
        
        input_files  = []
        input_files += [ FileBuffer('gaudipython-wrapper.py', the_script ) ]
        logger.debug("Returning StandardJobConfig")
        return (None, StandardJobConfig(inputbox=input_files,
                                        outputbox=outputsandbox))
    
    def _check_inputs(self):
        """Checks the validity of user's entries for BenderScript schema"""
        
        if not self.scripts and not self.imports : 
            raise ApplicationConfigurationError("Application scripts/imports are not defined")
        
        if isinstance ( self.scripts , str ) : self.scripts = [ File ( self.scripts ) ]
        if isinstance ( self.imports , str ) : self.imports = [ File ( self.imports ) ]
        
        for f in self.scripts : f.name = fullpath ( f.name )
        for f in self.imports : f.name = fullpath ( f.name )

    
    def postprocess(self):
        XMLPostProcessor.postprocess(self, logger)
Example #2
0
class Bender(GaudiBase):
    """The Bender application handler

    The user specifies a module file (via Bender.module) which contains a
    Bender python module and the number of events they want to run on
    (via Bender.events).  The user's module is then run on the data by
    calling:

    USERMODULE.configure(EventSelectorInput,FileCatalogCatalogs)
    USERMODULE.run(NUMEVENTS)
    """

    _name = 'Bender'
    _category = 'applications'
    _exportmethods = GaudiBase._exportmethods[:]
    _exportmethods += ['prepare', 'unprepare']

    _schema = GaudiBase._schema.inherit_copy()
    docstr = 'The package the application belongs to (e.g. "Sim", "Phys")'
    _schema.datadict['package'] = SimpleItem(defvalue=None,
                                             typelist=['str', 'type(None)'],
                                             doc=docstr)
    docstr = 'The package where your top level requirements file is read '  \
             'from. Can be written either as a path '  \
             '\"Tutorial/Analysis/v6r0\" or in traditional notation '  \
             '\"Analysis v6r0 Tutorial\"'
    _schema.datadict['masterpackage'] = SimpleItem(
        defvalue=None, typelist=['str', 'type(None)'], doc=docstr)
    docstr = 'Extra options to be passed onto the SetupProject command '\
             'used for configuring the environment. As an example '\
             'setting it to \'--dev\' will give access to the DEV area. '\
             'For full documentation of the available options see '\
             'https://twiki.cern.ch/twiki/bin/view/LHCb/SetupProject'
    _schema.datadict['setupProjectOptions'] = SimpleItem(
        defvalue='', typelist=['str', 'type(None)'], doc=docstr)
    docstr = 'The name of the module to import. A copy will be made ' \
             'at submission time'
    _schema.datadict['module'] = FileItem(preparable=1,
                                          defvalue=File(),
                                          doc=docstr)
    docstr = 'The name of the Gaudi application (Bender)'
    _schema.datadict['project'] = SimpleItem(preparable=1,
                                             defvalue='Bender',
                                             hidden=1,
                                             protected=1,
                                             typelist=['str'],
                                             doc=docstr)
    docstr = 'The number of events '
    _schema.datadict['events'] = SimpleItem(defvalue=-1,
                                            typelist=['int'],
                                            doc=docstr)
    docstr = 'Parameres for module '
    _schema.datadict['params'] = SimpleItem(
        defvalue={},
        typelist=['dict', 'str', 'int', 'bool', 'float'],
        doc=docstr)
    _schema.version.major += 2
    _schema.version.minor += 0

    #def __init__(self):
    #    super(Bender, self).__init__()

    def _get_default_version(self, gaudi_app):
        return guess_version(self, gaudi_app)

    def _auto__init__(self):
        if (not self.appname) and (not self.project):
            self.project = 'Bender'  # default
        if (not self.appname):
            self.appname = self.project
        self._init()

    def _getshell(self):

        import EnvironFunctions
        return EnvironFunctions._getshell(self)

    def prepare(self, force=False):
        super(Bender, self).prepare(force)
        self._check_inputs()

        share_dir = os.path.join(
            expandfilename(getConfig('Configuration')['gangadir']), 'shared',
            getConfig('Configuration')['user'], self.is_prepared.name)
        fillPackedSandbox([self.module],
                          os.path.join(
                              share_dir, 'inputsandbox',
                              '_input_sandbox_%s.tar' % self.is_prepared.name))

        gzipFile(
            os.path.join(share_dir, 'inputsandbox',
                         '_input_sandbox_%s.tar' % self.is_prepared.name),
            os.path.join(share_dir, 'inputsandbox',
                         '_input_sandbox_%s.tgz' % self.is_prepared.name),
            True)

        # add the newly created shared directory into the metadata system if
        # the app is associated with a persisted object
        self.checkPreparedHasParent(self)
        self.post_prepare()
        logger.debug("Finished Preparing Application in %s" % share_dir)

    def master_configure(self):
        return (None, StandardJobConfig())

    def configure(self, master_appconfig):

        # self._configure()
        modulename = split(self.module.name)[-1].split('.')[0]
        script = """
from copy import deepcopy
from Gaudi.Configuration import *
importOptions('data.py')
import %s as USERMODULE
EventSelectorInput = deepcopy(EventSelector().Input)
FileCatalogCatalogs = deepcopy(FileCatalog().Catalogs)
EventSelector().Input=[]
FileCatalog().Catalogs=[]\n""" % modulename

        script_configure = "USERMODULE.configure(EventSelectorInput,FileCatalogCatalogs%s)\n"
        if self.params:
            param_string = ",params=%s" % self.params
        else:
            param_string = ""

        script_configure = script_configure % param_string
        script += script_configure

        script += "USERMODULE.run(%d)\n" % self.events
        script += getXMLSummaryScript()
        # add summary.xml
        outputsandbox_temp = XMLPostProcessor._XMLJobFiles()
        outputsandbox_temp += unique(self.getJobObject().outputsandbox)
        outputsandbox = unique(outputsandbox_temp)

        input_files = []
        input_files += [FileBuffer('gaudipython-wrapper.py', script)]
        logger.debug("Returning StandardJobConfig")
        return (None,
                StandardJobConfig(inputbox=input_files,
                                  outputbox=outputsandbox))

    def _check_inputs(self):
        """Checks the validity of user's entries for GaudiPython schema"""
        # Always check for None OR empty
        #logger.info("self.module: %s" % str(self.module))
        if isType(self.module, str):
            self.module = File(self.module)
        if self.module.name == None:
            raise ApplicationConfigurationError(
                "Application Module not requested")
        elif self.module.name == "":
            raise ApplicationConfigurationError(
                "Application Module not requested")
        else:
            # Always check we've been given a FILE!
            self.module.name = fullpath(self.module.name)
            if not os.path.isfile(self.module.name):
                msg = 'Module file %s not found.' % self.module.name
                raise ApplicationConfigurationError(msg)

    def postprocess(self):
        XMLPostProcessor.postprocess(self, logger)
Example #3
0
class AMIDataset(DQ2Dataset):
    '''ATLAS DDM Dataset With AMI Connection'''

    _category = 'datasets'
    _name = 'AMIDataset'

    _schema = DQ2Dataset._schema.inherit_copy()
    _schema.datadict['logicalDatasetName'] = SimpleItem(defvalue='', doc='')
    _schema.datadict['project'] = SimpleItem(defvalue='Atlas_Production',
                                             doc='')
    _schema.datadict['processingStep'] = SimpleItem(
        defvalue='Atlas_Production', doc='')
    _schema.datadict['amiStatus'] = SimpleItem(defvalue='VALID', doc='')
    _schema.datadict['entity'] = SimpleItem(defvalue='dataset', doc='')
    _schema.datadict['metadata'] = SimpleItem(defvalue={}, doc="Metadata")
    _schema.datadict['provenance'] = SimpleItem(defvalue=[],
                                                typelist=['str'],
                                                sequence=1,
                                                doc='Dataset provenance chain')
    _schema.datadict['goodRunListXML'] = FileItem(
        doc='GoodRunList XML file to search on')

    _exportmethods = [
        'search', 'fill_provenance', 'get_datasets_metadata',
        'get_files_metadata', 'get_contents'
    ]

    def __init__(self):
        super(AMIDataset, self).__init__()

    def fill_provenance(self, extraargs=[]):

        dataType = ""
        if (len(extraargs) > 1):
            dataType = extraargs[1]

        self.provenance = []

        for d in self.dataset:

            logger.info("Filling provenance info for dataset %s...", d)

            prov = []
            self.provenance.append(prov)

            ds = d[:-1]

            argument = []
            argument.append("ListDatasetProvenance")
            argument.append("logicalDatasetName=" + ds)
            argument.append('-output=xml')

            result = amiclient.execute(argument)

            dom = result.getAMIdom()
            graph = dom.getElementsByTagName('graph')
            nFound = 0
            dictOfLists = {}
            for line in graph:
                nodes = line.getElementsByTagName('node')
                for node in nodes:
                    level = int(node.attributes['level'].value)
                    dataset = node.attributes['name'].value
                    if (len(dataType) > 0) and (dataset.find(dataType) >= 0):
                        # print only selected dataType
                        levelList = dictOfLists.get(level, [])
                        levelList.append(dataset)
                        dictOfLists[level] = levelList
                        nFound = nFound + 1
                    elif (len(dataType) == 0):
                        #print everything
                        levelList = dictOfLists.get(level, [])
                        levelList.append(dataset)
                        dictOfLists[level] = levelList
                        #print level,dictOfLists[level]
                        nFound = nFound + 1
            if (nFound == 0) and (len(dataType) > 0):
                logger.warning("No datasets found of type", dataType)
            else:
                keys = dictOfLists.keys()

                keys.sort()

                for key in keys:
                    datasetList = dictOfLists.get(key)
                    datasetList.sort()
                    #print "generation =",key
                    #for dataset in datasetList:
                    #    print " ",dataset
                    for dataset in datasetList:
                        prov.append("%s/" % dataset.strip())

    def search(self,
               pattern='',
               maxresults=config['MaxNumOfDatasets'],
               extraargs=[]):

        argument = []
        dsetList = []

        if self.goodRunListXML.name != '':

            # open the GRL
            if os.path.exists(self.goodRunListXML.name):
                logger.info("Good Run List '%s' file selected" %
                            self.goodRunListXML.name)
                grl_text = open(self.goodRunListXML.name).read()
            else:
                logger.error('Could not read Good Run List XML file')
                return []

            argument = []
            argument.append("GetGoodDatasetList")
            argument.append("prodStep=merge")
            #argument.append("dataType=%s" % self.dataType)
            argument.append("goodRunList=%s" % grl_text)
            argument.append("logicalDatasetName=%s" % self.logicalDatasetName)

        elif self.logicalDatasetName:
            pattern = self.logicalDatasetName

            pattern = pattern.replace("/", "")
            pattern = pattern.replace('*', '%')
            limit = "0,%d" % config['MaxNumOfDatasets']

            argument.append("SearchQuery")
            argument.append("entity=" + self.entity)

            argument.append(
                "glite=SELECT logicalDatasetName WHERE amiStatus='" +
                self.amiStatus + "' AND logicalDatasetName like '" + pattern +
                "' LIMIT " + limit)

            argument.append("project=" + self.project)
            argument.append("processingStep=" + self.processingStep)
            argument.append("mode=defaultField")
            argument.extend(extraargs)

        else:
            logger.error(
                "AMI search not set up correctly. No datasetname or good runs list supplied."
            )
            return []

        try:
            result = amiclient.execute(argument)
            if argument[0] == "GetGoodDatasetList":
                # GRL has different output
                res_text = result.output()
                dsetList = []
                for ln in res_text.split('\n'):
                    if ln.find("logicalDatasetName") != -1:
                        # add to the dataset list - check container...
                        if resolve_dataset_name(ln.split('=')[1].strip()):
                            dsetList.append(ln.split('=')[1].strip() + "/")
                        else:
                            dsetList.append(ln.split('=')[1].strip())

            else:
                resultDict = result.getDict()
                resultByRow = resultDict['Element_Info']
                for row, vals in resultByRow.iteritems():
                    dsName = str(vals['logicalDatasetName'])
                    # check with DQ2 since AMI doesn't store /
                    if resolve_dataset_name(dsName):
                        dsName += '/'
                    dsetList.append(dsName)

        except Exception as msg:
            logger.error(msg)

        return dsetList

    def get_datasets_metadata(self):
        datasets = self.search()
        metadata = []

        for dataset in datasets:
            dataset = dataset.replace("/", "")
            tmp = get_metadata(dataset=dataset)
            metadata.append(tmp)

        return metadata

    def get_files_metadata(self, all=False):
        datasets = self.search()
        metadata = {}

        for dataset in datasets:
            dataset = dataset.replace("/", "")
            job = self._getParent()
            file_info = get_file_metadata(dataset=dataset,
                                          all=all,
                                          numevtsperfile=0)
            metadata.update(file_info)

        return metadata