Example #1
0
    def __setup_sandboxcache__(self, job):
        '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend'''

        re_token = re.compile('^token:(.*):(.*)$')

        self.sandboxcache.vo = config['VirtualOrganisation']
        self.sandboxcache.middleware = 'GLITE'
        self.sandboxcache.timeout = config['SandboxTransferTimeout']

        if self.sandboxcache._name == 'LCGSandboxCache':
            if not self.sandboxcache.lfc_host:
                self.sandboxcache.lfc_host = grids[
                    self.sandboxcache.middleware].__get_lfc_host__()

            if not self.sandboxcache.se:

                token = ''
                se_host = config['DefaultSE']
                m = re_token.match(se_host)
                if m:
                    token = m.group(1)
                    se_host = m.group(2)

                self.sandboxcache.se = se_host

                if token:
                    self.sandboxcache.srm_token = token

            if (self.sandboxcache.se_type
                    in ['srmv2']) and (not self.sandboxcache.srm_token):
                self.sandboxcache.srm_token = config['DefaultSRMToken']

        elif self.sandboxcache._name == 'DQ2SandboxCache':

            # generate a new dataset name if not given
            if not self.sandboxcache.dataset_name:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2outputdatasetname
                self.sandboxcache.dataset_name, unused = dq2outputdatasetname(
                    "%s.input" % get_uuid(), 0, False, '')

            # subjobs inherits the dataset name from the master job
            for sj in job.subjobs:
                sj.backend.sandboxcache.dataset_name = self.sandboxcache.dataset_name

        elif self.sandboxcache._name == 'GridftpSandboxCache':
            if config['CreamInputSandboxBaseURI']:
                self.sandboxcache.baseURI = config['CreamInputSandboxBaseURI']
            elif self.CE:
                ce_host = re.sub(r'\:[0-9]+', '', self.CE.split('/cream')[0])
                self.sandboxcache.baseURI = 'gsiftp://%s/opt/glite/var/cream_sandbox/%s' % (
                    ce_host, self.sandboxcache.vo)
            else:
                logger.error('baseURI not available for GridftpSandboxCache')
                return False

        return True
Example #2
0
    def __setup_sandboxcache__(self, job):
        """Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend"""

        re_token = re.compile("^token:(.*):(.*)$")

        self.sandboxcache.vo = config["VirtualOrganisation"]
        self.sandboxcache.middleware = "GLITE"
        self.sandboxcache.timeout = config["SandboxTransferTimeout"]

        if self.sandboxcache._name == "LCGSandboxCache":
            if not self.sandboxcache.lfc_host:
                self.sandboxcache.lfc_host = grids[self.sandboxcache.middleware].__get_lfc_host__()

            if not self.sandboxcache.se:

                token = ""
                se_host = config["DefaultSE"]
                m = re_token.match(se_host)
                if m:
                    token = m.group(1)
                    se_host = m.group(2)

                self.sandboxcache.se = se_host

                if token:
                    self.sandboxcache.srm_token = token

            if (self.sandboxcache.se_type in ["srmv2"]) and (not self.sandboxcache.srm_token):
                self.sandboxcache.srm_token = config["DefaultSRMToken"]

        elif self.sandboxcache._name == "DQ2SandboxCache":

            # generate a new dataset name if not given
            if not self.sandboxcache.dataset_name:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2outputdatasetname

                self.sandboxcache.dataset_name, unused = dq2outputdatasetname("%s.input" % get_uuid(), 0, False, "")

            # subjobs inherits the dataset name from the master job
            for sj in job.subjobs:
                sj.backend.sandboxcache.dataset_name = self.sandboxcache.dataset_name

        elif self.sandboxcache._name == "GridftpSandboxCache":
            if config["CreamInputSandboxBaseURI"]:
                self.sandboxcache.baseURI = config["CreamInputSandboxBaseURI"]
            elif self.CE:
                ce_host = re.sub(r"\:[0-9]+", "", self.CE.split("/cream")[0])
                self.sandboxcache.baseURI = "gsiftp://%s/opt/glite/var/cream_sandbox/%s" % (
                    ce_host,
                    self.sandboxcache.vo,
                )
            else:
                logger.error("baseURI not available for GridftpSandboxCache")
                return False

        return True
Example #3
0
    def __setup_sandboxcache__(self, job):
        '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend'''

        re_token = re.compile('^token:(.*):(.*)$')

        self.sandboxcache.vo = config['VirtualOrganisation']
        self.sandboxcache.middleware = 'GLITE'
        self.sandboxcache.timeout = config['SandboxTransferTimeout']

        if self.sandboxcache._name == 'LCGSandboxCache':
            if not self.sandboxcache.lfc_host:
                self.sandboxcache.lfc_host = grids[
                    self.sandboxcache.middleware].__get_lfc_host__()

            if not self.sandboxcache.se:

                token = ''
                se_host = config['DefaultSE']
                m = re_token.match(se_host)
                if m:
                    token = m.group(1)
                    se_host = m.group(2)

                self.sandboxcache.se = se_host

                if token:
                    self.sandboxcache.srm_token = token

            if (self.sandboxcache.se_type in ['srmv2']) and (not self.sandboxcache.srm_token):
                self.sandboxcache.srm_token = config['DefaultSRMToken']

        elif self.sandboxcache._name == 'DQ2SandboxCache':

            # generate a new dataset name if not given
            if not self.sandboxcache.dataset_name:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2outputdatasetname
                self.sandboxcache.dataset_name, unused = dq2outputdatasetname(
                    "%s.input" % get_uuid(), 0, False, '')

            # subjobs inherits the dataset name from the master job
            for sj in job.subjobs:
                sj.backend.sandboxcache.dataset_name = self.sandboxcache.dataset_name

        elif self.sandboxcache._name == 'GridftpSandboxCache':
            if config['CreamInputSandboxBaseURI']:
                self.sandboxcache.baseURI = config['CreamInputSandboxBaseURI']
            elif self.CE:
                ce_host = re.sub(r'\:[0-9]+', '', self.CE.split('/cream')[0])
                self.sandboxcache.baseURI = 'gsiftp://%s/opt/glite/var/cream_sandbox/%s' % (
                    ce_host, self.sandboxcache.vo)
            else:
                logger.error('baseURI not available for GridftpSandboxCache')
                return False

        return True
    def master_prepare(self,app,appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('ExecutablePandaRTHandler master_prepare called for %s', job.getFQID('.')) 

        # set chirp variables
        if configPanda['chirpconfig'] or configPanda['chirpserver']:
            setChirpVariables()

#       Pack inputsandbox
        inputsandbox = 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null') 
        inpw = job.getInputWorkspace()
        # add user script to inputsandbox
        if hasattr(job.application.exe, "name"):
            if not job.application.exe in job.inputsandbox:
                job.inputsandbox.append(job.application.exe)

        for fname in [f.name for f in job.inputsandbox]:
            fname.rstrip(os.sep)
            path = fname[:fname.rfind(os.sep)]
            f = fname[fname.rfind(os.sep)+1:]
            rc, output = commands.getstatusoutput('tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
        if len(job.inputsandbox) > 0:
            rc, output = commands.getstatusoutput('gzip %s' % (inpw.getPath(inputsandbox)))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
            inputsandbox += ".gz"
        else:
            inputsandbox = None

#       Upload Inputsandbox
        if inputsandbox:
            logger.debug('Uploading source tarball ...')
            uploadSources(inpw.getPath(),os.path.basename(inputsandbox))
            self.inputsandbox = inputsandbox
        else:
            self.inputsandbox = None

#       input dataset
        if job.inputdata:
            if job.inputdata._name != 'DQ2Dataset':
                raise ApplicationConfigurationError('PANDA application supports only DQ2Datasets')

        # run brokerage here if not splitting
        if not job.splitter:
            from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
            runPandaBrokerage(job)
        elif job.splitter._name not in ['DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask']:
            raise ApplicationConfigurationError('Panda splitter must be DQ2JobSplitter or ArgSplitter')
        
        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError('site is still AUTO after brokerage!')

#       output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname)

        self.outDsLocation = Client.PandaSites[job.backend.site]['ddm']

        try:
            Client.addDataset(job.outputdata.datasetname,False,location=self.outDsLocation)
            logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,self.outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, location=self.outDsLocation)
        except exceptions.SystemExit:
            raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))

        # handle the libds
        if job.backend.libds:
            self.libDataset = job.backend.libds
            self.fileBO = getLibFileSpecFromLibDS(self.libDataset)
            self.library = self.fileBO.lfn
        elif job.backend.bexec:
            self.libDataset = job.outputdata.datasetname+'.lib'
            self.library = '%s.tgz' % self.libDataset
            try:
                Client.addDataset(self.libDataset,False,location=self.outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, location=self.outDsLocation)
                logger.info('Lib dataset %s registered at %s'%(self.libDataset,self.outDsLocation))
            except exceptions.SystemExit:
                raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(self.libDataset,sys.exc_info()[0],sys.exc_info()[1]))

        # collect extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.outputsandbox:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # create build job
        if job.backend.bexec != '':
            jspec = JobSpec()
            jspec.jobDefinitionID   = job.id
            jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
            jspec.transformation    = '%s/buildGen-00-00-01' % Client.baseURLSUB
            if Client.isDQ2free(job.backend.site):
                jspec.destinationDBlock = '%s/%s' % (job.outputdata.datasetname,self.libDataset)
                jspec.destinationSE     = 'local'
            else:
                jspec.destinationDBlock = self.libDataset
                jspec.destinationSE     = job.backend.site
            jspec.prodSourceLabel   = configPanda['prodSourceLabelBuild']
            jspec.processingType    = configPanda['processingType']
            jspec.assignedPriority  = configPanda['assignedPriorityBuild']
            jspec.computingSite     = job.backend.site
            jspec.cloud             = job.backend.requirements.cloud
            jspec.jobParameters     = '-o %s' % (self.library)
            if self.inputsandbox:
                jspec.jobParameters     += ' -i %s' % (self.inputsandbox)
            else:
                raise ApplicationConfigurationError('Executable on Panda with build job defined, but inputsandbox is emtpy !')
            matchURL = re.search('(http.*://[^/]+)/',Client.baseURLCSRVSSL)
            if matchURL:
                jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1)
            if job.backend.bexec != '':
                jspec.jobParameters += ' --bexec "%s" ' % urllib.quote(job.backend.bexec)
                jspec.jobParameters += ' -r %s ' % '.'
                

            fout = FileSpec()
            fout.lfn  = self.library
            fout.type = 'output'
            fout.dataset = self.libDataset
            fout.destinationDBlock = self.libDataset
            jspec.addFile(fout)

            flog = FileSpec()
            flog.lfn = '%s.log.tgz' % self.libDataset
            flog.type = 'log'
            flog.dataset = self.libDataset
            flog.destinationDBlock = self.libDataset
            jspec.addFile(flog)
            return jspec
        else:
            return None
Example #5
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object

        logger.debug("AthenaLocalRTHandler master_prepare called, %s", job.id)

        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name == 'DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(
                dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Expand Athena jobOptions
        if not app.option_file and not app.command_line:
            raise ConfigError(
                "j.application.option_file='' - No Athena jobOptions files specified."
            )

        athena_options = ''
        inputbox = [
            File(os.path.join(os.path.dirname(__file__), 'athena-utility.sh'))
        ]
        if app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']:

            for option_file in app.option_file:
                athena_options += ' ' + os.path.basename(option_file.name)
                inputbox += [File(option_file.name)]

            athena_options += ' %s ' % app.options

        else:
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options = app.options + ' ' + athena_options
                inputbox += [File(option_file.name)]

            if app.command_line:
                athena_options = app.command_line

        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

        #       prepare input sandbox

        if app.user_setupfile.name: inputbox += [File(app.user_setupfile.name)]
        #CN: added extra test for TNTJobSplitter
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset', 'ATLASTier3Dataset'
        ] or (job._getRoot().splitter
              and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
            _append_files(inputbox, 'dq2_get')
            _append_files(inputbox, 'dq2info.tar.gz')
            _append_files(inputbox, 'libdcap.so')

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                _append_files(inputbox, 'ganga-stagein-lfc.py')
            else:
                _append_files(inputbox, 'ganga-stagein.py')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset'
        ] and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')

        if not 'getstats.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'getstats.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            if not job.outputdata.location:
                raise ApplicationConfigurationError(
                    None,
                    'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !'
                )
            if not File(
                    os.path.join(os.path.dirname(__file__),
                                 'ganga-stage-in-out-dq2.py')) in inputbox:
                _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
                _append_files(inputbox, 'dq2info.tar.gz')
                _append_files(inputbox, 'libdcap.so')
            _append_files(inputbox, 'ganga-joboption-parse.py')

        if job.inputsandbox:
            for file in job.inputsandbox:
                inputbox += [file]
        if app.user_area.name:
            if app.is_prepared is True:
                inputbox += [File(app.user_area.name)]
            else:
                inputbox += [
                    File(
                        os.path.join(
                            os.path.join(shared_path, app.is_prepared.name),
                            os.path.basename(app.user_area.name)))
                ]
        if app.group_area.name and string.find(app.group_area.name,
                                               "http") < 0:
            if app.is_prepared is True:
                inputbox += [File(app.group_area.name)]
            else:
                inputbox += [
                    File(
                        os.path.join(
                            os.path.join(shared_path, app.is_prepared.name),
                            os.path.basename(app.group_area.name)))
                ]


#       prepare environment

        try:
            atlas_software = config['ATLAS_SOFTWARE']
        except ConfigError:
            raise ConfigError(
                'No default location of ATLAS_SOFTWARE specified in the configuration.'
            )

        if app.atlas_release == '' and app.atlas_project != "AthAnalysisBase":
            raise ApplicationConfigurationError(
                None,
                'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.'
            )

        environment = {
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': athena_options,
            'ATLAS_SOFTWARE': atlas_software,
            'ATHENA_USERSETUPFILE': athena_usersetupfile,
            'ATLAS_PROJECT': app.atlas_project,
            'ATLAS_EXETYPE': app.atlas_exetype,
            'GANGA_VERSION': configSystem['GANGA_VERSION'],
            'DQ2_SETUP_SCRIPT': configDQ2['setupScript']
        }

        # Set athena architecture: 32 or 64 bit
        environment['ATLAS_ARCH'] = '32'
        cmtconfig = app.atlas_cmtconfig
        if cmtconfig.find('x86_64') >= 0:
            environment['ATLAS_ARCH'] = '64'

        environment['ATLAS_CMTCONFIG'] = app.atlas_cmtconfig
        environment['DCACHE_RA_BUFFER'] = str(config['DCACHE_RA_BUFFER'])

        if app.atlas_environment:
            for var in app.atlas_environment:
                vars = var.split('=')
                if len(vars) == 2:
                    environment[vars[0]] = vars[1]

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1'
                                     or app.atlas_release.find('12.') <= 0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production

        if app.user_area.name:
            environment['USER_AREA'] = os.path.basename(app.user_area.name)
        if app.group_area.name:
            if string.find(app.group_area.name, "http") >= 0:
                environment['GROUP_AREA_REMOTE'] = "%s" % (app.group_area.name)
            else:
                environment['GROUP_AREA'] = os.path.basename(
                    app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)

        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if job.inputdata.type not in ['LOCAL']:

                try:
                    environment['X509CERTDIR'] = os.environ['X509_CERT_DIR']
                except KeyError:
                    environment['X509CERTDIR'] = ''

                try:
                    proxy = os.environ['X509_USER_PROXY']
                except KeyError:
                    proxy = '/tmp/x509up_u%s' % os.getuid()

                REMOTE_PROXY = '%s:%s' % (socket.getfqdn(), proxy)
                environment['REMOTE_PROXY'] = REMOTE_PROXY

                try:
                    environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
                except:
                    pass

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(
                    job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2[
                    'DQ2_URL_SERVER_SSL']
                #environment['DATASETTYPE']=job.inputdata.type
                # At present, DQ2 download is the only thing that works
                environment['DATASETTYPE'] = "DQ2_DOWNLOAD"
                if job.inputdata.accessprotocol:
                    environment[
                        'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol

                try:
                    environment['X509CERTDIR'] = os.environ['X509_CERT_DIR']
                except KeyError:
                    environment['X509CERTDIR'] = ''

                try:
                    proxy = os.environ['X509_USER_PROXY']
                except KeyError:
                    proxy = '/tmp/x509up_u%s' % os.getuid()

                REMOTE_PROXY = '%s:%s' % (socket.getfqdn(), proxy)
                environment['REMOTE_PROXY'] = REMOTE_PROXY
                try:
                    environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
                except:
                    pass

            else:
                raise ConfigError(
                    "j.inputdata.dataset='' - DQ2 dataset name needs to be specified."
                )

            if job.inputdata.tagdataset:
                environment['TAGDATASETNAME'] = ':'.join(
                    job.inputdata.tagdataset)

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            try:
                environment['X509CERTDIR'] = os.environ['X509_CERT_DIR']
            except KeyError:
                environment['X509CERTDIR'] = ''
            try:
                proxy = os.environ['X509_USER_PROXY']
            except KeyError:
                proxy = '/tmp/x509up_u%s' % os.getuid()

            REMOTE_PROXY = '%s:%s' % (socket.getfqdn(), proxy)
            environment['REMOTE_PROXY'] = REMOTE_PROXY
            try:
                environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
            except:
                pass

        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
            elif job.backend.extraopts.find('site=zn') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2[
                    'DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh')

        #       output sandbox
        outputbox = []
        outputGUIDs = 'output_guids'
        outputLOCATION = 'output_location'
        outputDATA = 'output_data'
        outputbox.append(outputGUIDs)
        outputbox.append(outputLOCATION)
        outputbox.append(outputDATA)
        outputbox.append('stats.pickle')
        if (job.outputsandbox):
            for file in job.outputsandbox:
                outputbox += [file]

        ## retrieve the FileStager log
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset'
        ] and job.inputdata.type in ['FILE_STAGER']:
            outputbox += ['FileStager.out', 'FileStager.err']

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'

        return StandardJobConfig(File(exe), inputbox, [], outputbox,
                                 environment)
Example #6
0
    def master_prepare(self,app,appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from pandatools import MiscUtils
        from pandatools import AthenaUtils
        from pandatools import PsubUtils

        # create a random number for this submission to allow multiple use of containers
        self.rndSubNum = random.randint(1111,9999)

        job = app._getParent()
        logger.debug('AthenaJediRTHandler master_prepare called for %s', job.getFQID('.')) 

        if app.useRootCoreNoBuild:
            logger.info('Athena.useRootCoreNoBuild is True, setting Panda.nobuild=True.')
            job.backend.nobuild = True

        if job.backend.bexec and job.backend.nobuild:
            raise ApplicationConfigurationError("Contradicting options: job.backend.bexec and job.backend.nobuild are both enabled.")

        if job.backend.requirements.rootver != '' and job.backend.nobuild:
            raise ApplicationConfigurationError("Contradicting options: job.backend.requirements.rootver given and job.backend.nobuild are enabled.")
        
        # Switch on compilation flag if bexec is set or libds is empty
        if job.backend.bexec != '' or not job.backend.nobuild:
            app.athena_compile = True
            for sj in job.subjobs:
                sj.application.athena_compile = True
            logger.info('"job.backend.nobuild=False" or "job.backend.bexec" is set - Panda build job is enabled.')

        if job.backend.nobuild:
            app.athena_compile = False
            for sj in job.subjobs:
                sj.application.athena_compile = False
            logger.info('"job.backend.nobuild=True" or "--nobuild" chosen - Panda build job is switched off.')

        # check for auto datri
        if job.outputdata.location != '':
            if not PsubUtils.checkDestSE(job.outputdata.location,job.outputdata.datasetname,False):
                raise ApplicationConfigurationError("Problems with outputdata.location setting '%s'" % job.outputdata.location)

        # validate application
        if not app.atlas_release and not job.backend.requirements.rootver and not app.atlas_exetype in [ 'EXE' ]:
            raise ApplicationConfigurationError("application.atlas_release is not set. Did you run application.prepare()")

        self.dbrelease = app.atlas_dbrelease
        if self.dbrelease != '' and self.dbrelease != 'LATEST' and self.dbrelease.find(':') == -1:
            raise ApplicationConfigurationError("ERROR : invalid argument for DB Release. Must be 'LATEST' or 'DatasetName:FileName'")

        self.runConfig = AthenaUtils.ConfigAttr(app.atlas_run_config)
        for k in self.runConfig.keys():
            self.runConfig[k]=AthenaUtils.ConfigAttr(self.runConfig[k])
        if not app.atlas_run_dir:
            raise ApplicationConfigurationError("application.atlas_run_dir is not set. Did you run application.prepare()")
 
        self.rundirectory = app.atlas_run_dir
        self.cacheVer = ''
        if app.atlas_project and app.atlas_production:
            self.cacheVer = "-" + app.atlas_project + "_" + app.atlas_production

        # handle different atlas_exetypes
        self.job_options = ''
        if app.atlas_exetype == 'TRF':
            self.job_options += ' '.join([os.path.basename(fopt.name) for fopt in app.option_file])

            #if not job.outputdata.outputdata:
            #    raise ApplicationConfigurationError("job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend")
            #raise ApplicationConfigurationError("Sorry TRF on Panda backend not yet supported")

            if app.options:
                self.job_options += ' %s ' % app.options
                
        elif app.atlas_exetype == 'ATHENA':
            
            if len(app.atlas_environment) > 0 and app.atlas_environment[0].find('DBRELEASE_OVERRIDE')==-1:
                logger.warning("Passing of environment variables to Athena using Panda not supported. Ignoring atlas_environment setting.")
                
            if job.outputdata.outputdata:
                raise ApplicationConfigurationError("job.outputdata.outputdata must be empty if atlas_exetype='ATHENA' and Panda backend is used (outputs are auto-detected)")
            if app.options:
                if app.options.startswith('-c'):
                    self.job_options += ' %s ' % app.options
                else:
                    self.job_options += ' -c %s ' % app.options

                logger.warning('The value of j.application.options has been prepended with " -c " ')
                logger.warning('Please make sure to use proper quotes for the values of j.application.options !')

            self.job_options += ' '.join([os.path.basename(fopt.name) for fopt in app.option_file])

            # check for TAG compression
            if 'subcoll.tar.gz' in app.append_to_user_area:
                self.job_options = ' uncompress.py ' + self.job_options
                
        elif app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']:

            #if not job.outputdata.outputdata:
            #    raise ApplicationConfigurationError("job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend")
            self.job_options += ' '.join([os.path.basename(fopt.name) for fopt in app.option_file])

            # sort out environment variables
            env_str = ""
            if len(app.atlas_environment) > 0:
                for env_var in app.atlas_environment:
                    env_str += "export %s ; " % env_var
            else: 
                env_str = ""

            # below fixes issue with runGen -- job_options are executed by os.system when dbrelease is used, and by the shell otherwise
            ## - REMOVED FIX DUE TO CHANGE IN PILOT - MWS 8/11/11
            if job.backend.requirements.usecommainputtxt:
                input_str = '/bin/echo %IN > input.txt; cat input.txt; '
            else:
                input_str = '/bin/echo %IN | sed \'s/,/\\\n/g\' > input.txt; cat input.txt; '
            if app.atlas_exetype == 'PYARA':
                self.job_options = env_str + input_str + ' python ' + self.job_options
            elif app.atlas_exetype == 'ARES':
                self.job_options = env_str + input_str + ' athena.py ' + self.job_options
            elif app.atlas_exetype == 'ROOT':
                self.job_options = env_str + input_str + ' root -b -q ' + self.job_options
            elif app.atlas_exetype == 'EXE':
                self.job_options = env_str + input_str + self.job_options

            if app.options:
                self.job_options += ' %s ' % app.options

        if self.job_options == '':
            raise ApplicationConfigurationError("No Job Options found!")
        logger.info('Running job options: %s'%self.job_options)

        # validate dbrelease
        if self.dbrelease != "LATEST":
            self.dbrFiles,self.dbrDsList = getDBDatasets(self.job_options,'',self.dbrelease)

        # handle the output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        # validate the output dataset name (and make it a container)
        job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname)
        if not job.outputdata.datasetname.endswith('/'):
            job.outputdata.datasetname+='/'

        # add extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)
        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # use the shared area if possible
        tmp_user_area_name = app.user_area.name
        if app.is_prepared is not True:
            from Ganga.Utility.files import expandfilename
            shared_path = os.path.join(expandfilename(getConfig('Configuration')['gangadir']),'shared',getConfig('Configuration')['user'])
            tmp_user_area_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name))


        # Add inputsandbox to user_area
        if job.inputsandbox:
            logger.warning("Submitting Panda job with inputsandbox. This may slow the submission slightly.")

            if tmp_user_area_name:
                inpw = os.path.dirname(tmp_user_area_name)
                self.inputsandbox = os.path.join(inpw, 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null'))
            else:
                inpw = job.getInputWorkspace()
                self.inputsandbox = inpw.getPath('sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null'))

            if tmp_user_area_name:
                rc, output = commands.getstatusoutput('cp %s %s.gz' % (tmp_user_area_name, self.inputsandbox))
                if rc:
                    logger.error('Copying user_area failed with status %d',rc)
                    logger.error(output)
                    raise ApplicationConfigurationError('Packing inputsandbox failed.')
                rc, output = commands.getstatusoutput('gunzip %s.gz' % (self.inputsandbox))
                if rc:
                    logger.error('Unzipping user_area failed with status %d',rc)
                    logger.error(output)
                    raise ApplicationConfigurationError('Packing inputsandbox failed.')

            for fname in [os.path.abspath(f.name) for f in job.inputsandbox]:
                fname.rstrip(os.sep)
                path = os.path.dirname(fname)
                fn = os.path.basename(fname)

                #app.atlas_run_dir
                # get Athena versions
                rc, out = AthenaUtils.getAthenaVer()
                # failed
                if not rc:
                    #raise ApplicationConfigurationError('CMT could not parse correct environment ! \n Did you start/setup ganga in the run/ or cmt/ subdirectory of your athena analysis package ?')
                    logger.warning("CMT could not parse correct environment for inputsandbox - will use the atlas_run_dir as default")
                    
                    # as we don't have to be in the run dir now, create a copy of the run_dir directory structure and use that
                    input_dir = os.path.dirname(self.inputsandbox)
                    run_path = "%s/sbx_tree/%s" % (input_dir, app.atlas_run_dir)
                    rc, output = commands.getstatusoutput("mkdir -p %s" % run_path)
                    if not rc:
                        # copy this sandbox file
                        rc, output = commands.getstatusoutput("cp %s %s" % (fname, run_path))
                        if not rc:
                            path = os.path.join(input_dir, 'sbx_tree')
                            fn = os.path.join(app.atlas_run_dir, fn)
                        else:
                            raise ApplicationConfigurationError("Couldn't copy file %s to recreate run_dir for input sandbox" % fname)
                    else:
                        raise ApplicationConfigurationError("Couldn't create directory structure to match run_dir %s for input sandbox" % run_path)

                else:
                    userarea = out['workArea']

                    # strip the path from the filename if present in the userarea
                    ua = os.path.abspath(userarea)
                    if ua in path:
                        fn = fname[len(ua)+1:]
                        path = ua

                rc, output = commands.getstatusoutput('tar -h -r -f %s -C %s %s' % (self.inputsandbox, path, fn))
                if rc:
                    logger.error('Packing inputsandbox failed with status %d',rc)
                    logger.error(output)
                    raise ApplicationConfigurationError('Packing inputsandbox failed.')

            # remove sandbox tree if created
            if "sbx_tree" in os.listdir(os.path.dirname(self.inputsandbox)):                
                rc, output = commands.getstatusoutput("rm -r %s/sbx_tree" % os.path.dirname(self.inputsandbox))
                if rc:
                    raise ApplicationConfigurationError("Couldn't remove directory structure used for input sandbox")
                
            rc, output = commands.getstatusoutput('gzip %s' % (self.inputsandbox))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
            self.inputsandbox += ".gz"
        else:
            self.inputsandbox = tmp_user_area_name

        # job name
        jobName = 'ganga.%s' % MiscUtils.wrappedUuidGen()

        # make task
        taskParamMap = {}
        # Enforce that outputdataset name ends with / for container
        if not job.outputdata.datasetname.endswith('/'):
            job.outputdata.datasetname = job.outputdata.datasetname + '/'

        taskParamMap['taskName'] = job.outputdata.datasetname

        taskParamMap['uniqueTaskName'] = True
        taskParamMap['vo'] = 'atlas'
        taskParamMap['architecture'] = AthenaUtils.getCmtConfig(athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig)
        if app.atlas_release:
            taskParamMap['transUses'] = 'Atlas-%s' % app.atlas_release
        else:
            taskParamMap['transUses'] = ''
        taskParamMap['transHome'] = 'AnalysisTransforms'+self.cacheVer#+nightVer

        configSys = getConfig('System')
        gangaver = configSys['GANGA_VERSION'].lower()
        if not gangaver:
            gangaver = "ganga"

        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['processingType'] = '{0}-jedi-athena'.format(gangaver)
        else:
            taskParamMap['processingType'] = '{0}-jedi-run'.format(gangaver)

        #if options.eventPickEvtList != '':
        #    taskParamMap['processingType'] += '-evp'
        taskParamMap['prodSourceLabel'] = 'user'
        if job.backend.site != 'AUTO':
            taskParamMap['cloud'] = Client.PandaSites[job.backend.site]['cloud']
            taskParamMap['site'] = job.backend.site
        elif job.backend.requirements.cloud != None and not job.backend.requirements.anyCloud:
            taskParamMap['cloud'] = job.backend.requirements.cloud
        if job.backend.requirements.excluded_sites != []:
            taskParamMap['excludedSite'] = expandExcludedSiteList( job )

        # if only a single site specifed, don't set includedSite
        #if job.backend.site != 'AUTO':
        #    taskParamMap['includedSite'] = job.backend.site
        #taskParamMap['cliParams'] = fullExecString
        if job.backend.requirements.noEmail:
            taskParamMap['noEmail'] = True
        if job.backend.requirements.skipScout:
            taskParamMap['skipScout'] = True
        if not app.atlas_exetype in ["ATHENA", "TRF"]: 
            taskParamMap['nMaxFilesPerJob'] = job.backend.requirements.maxNFilesPerJob
        if job.backend.requirements.disableAutoRetry:
            taskParamMap['disableAutoRetry'] = 1
        # source URL
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL)
        if matchURL != None:
            taskParamMap['sourceURL'] = matchURL.group(1)

        # dataset names
        outDatasetName = job.outputdata.datasetname
        logDatasetName = re.sub('/$','.log/',job.outputdata.datasetname)
        # log
        taskParamMap['log'] = {'dataset': logDatasetName,
                               'container': logDatasetName,
                               'type':'template',
                               'param_type':'log',
                               'value':'{0}.${{SN}}.log.tgz'.format(logDatasetName[:-1])
                               }
        # job parameters
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['jobParameters'] = [
                {'type':'constant',
                 'value': ' --sourceURL ${SURL}',
                 },
                ]
        else:
            taskParamMap['jobParameters'] = [
                {'type':'constant',
                 'value': '-j "" --sourceURL ${SURL}',
                 },
                ]

        taskParamMap['jobParameters'] += [
            {'type':'constant',
             'value': '-r {0}'.format(self.rundirectory),
             },
            ]

        # Add the --trf option to jobParameters if required
        if app.atlas_exetype == "TRF":
            taskParamMap['jobParameters'] += [{'type': 'constant', 'value': '--trf'}]

        # output
        # output files
        outMap = {}
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            outMap, tmpParamList = AthenaUtils.convertConfToOutput(self.runConfig, self.extOutFile, job.outputdata.datasetname, destination=job.outputdata.location)
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '-o "%s" ' % outMap
                 },
                ]
            taskParamMap['jobParameters'] += tmpParamList 

        else: 
            if job.outputdata.outputdata:
                for tmpLFN in job.outputdata.outputdata:
                    if len(job.outputdata.datasetname.split('.')) > 2:
                        lfn = '{0}.{1}'.format(*job.outputdata.datasetname.split('.')[:2])
                    else:
                        lfn = job.outputdata.datasetname[:-1]
                    lfn += '.$JOBSETID._${{SN/P}}.{0}'.format(tmpLFN)
                    dataset = '{0}_{1}/'.format(job.outputdata.datasetname[:-1],tmpLFN)
                    taskParamMap['jobParameters'] += MiscUtils.makeJediJobParam(lfn,dataset,'output',hidden=True, destination=job.outputdata.location)
                    outMap[tmpLFN] = lfn

                taskParamMap['jobParameters'] += [ 
                    {'type':'constant',
                     'value': '-o "{0}"'.format(str(outMap)),
                     },
                    ]

        if app.atlas_exetype in ["ATHENA"]:
            # jobO parameter
            tmpJobO = self.job_options
            # replace full-path jobOs
            for tmpFullName,tmpLocalName in AthenaUtils.fullPathJobOs.iteritems():
                tmpJobO = re.sub(tmpFullName,tmpLocalName,tmpJobO)
            # modify one-liner for G4 random seeds
            if self.runConfig.other.G4RandomSeeds > 0:
                if app.options != '':
                    tmpJobO = re.sub('-c "%s" ' % app.options,
                                     '-c "%s;from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' \
                                         % app.options,tmpJobO)
                else:
                    tmpJobO = '-c "from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" '
                dictItem = {'type':'template',
                            'param_type':'number',
                            'value':'${RNDMSEED}',
                            'hidden':True,
                            'offset':self.runConfig.other.G4RandomSeeds,
                            }
                taskParamMap['jobParameters'] += [dictItem]
        elif app.atlas_exetype in ["TRF"]:
            # replace parameters for TRF
            tmpJobO = self.job_options
            # output : basenames are in outMap['IROOT'] trough extOutFile
            tmpOutMap = []
            for tmpName,tmpLFN in outMap['IROOT']:
                tmpJobO = tmpJobO.replace('%OUT.' + tmpName,tmpName)
            # replace DBR
            tmpJobO = re.sub('%DB=[^ \'\";]+','${DBR}',tmpJobO)

        if app.atlas_exetype in ["TRF"]:
            taskParamMap['useLocalIO'] = 1

        # build
        if job.backend.nobuild:
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '-a {0}'.format(os.path.basename(self.inputsandbox)),
                 },
                ]
        else:
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '-l ${LIB}',
                 },
                ]

        #
        # input
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.backend.requirements.nFilesPerJob > 0 and job.inputdata.number_of_files == 0 and job.backend.requirements.split > 0:
                job.inputdata.number_of_files = job.backend.requirements.nFilesPerJob * job.backend.requirements.split

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.number_of_files != 0:
            taskParamMap['nFiles'] = job.inputdata.number_of_files
        elif job.backend.requirements.nFilesPerJob > 0 and job.backend.requirements.split > 0:
            # pathena does this for some reason even if there is no input files
            taskParamMap['nFiles'] = job.backend.requirements.nFilesPerJob * job.backend.requirements.split
        if job.backend.requirements.nFilesPerJob > 0:    
            taskParamMap['nFilesPerJob'] = job.backend.requirements.nFilesPerJob
            
        if job.backend.requirements.nEventsPerFile > 0:    
            taskParamMap['nEventsPerFile'] = job.backend.requirements.nEventsPerFile

        if not job.backend.requirements.nGBPerJob in [ 0,'MAX']:
            try:
                if job.backend.requirements.nGBPerJob != 'MAX':
                    job.backend.requirments.nGBPerJob = int(job.backend.requirements.nGBPerJob)
            except:
                logger.error("nGBPerJob must be an integer or MAX")
            # check negative                                                                                                                                                         
            if job.backend.requirements.nGBPerJob <= 0:
                logger.error("nGBPerJob must be positive")

            # don't set MAX since it is the defalt on the server side
            if not job.backend.requirements.nGBPerJob in [-1,'MAX']: 
                taskParamMap['nGBPerJob'] = job.backend.requirements.nGBPerJob

        if app.atlas_exetype in ["ATHENA", "TRF"]:
            inputMap = {}
            if job.inputdata and job.inputdata._name == 'DQ2Dataset':
                tmpDict = {'type':'template',
                           'param_type':'input',
                           'value':'-i "${IN/T}"',
                           'dataset': ','.join(job.inputdata.dataset),
                           'expand':True,
                           'exclude':'\.log\.tgz(\.\d+)*$',
                           }
                #if options.inputType != '':
                #    tmpDict['include'] = options.inputType
                taskParamMap['jobParameters'].append(tmpDict)
                taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset)
                inputMap['IN'] = ','.join(job.inputdata.dataset)
            else:
                # no input
                taskParamMap['noInput'] = True
                if job.backend.requirements.split > 0:
                    taskParamMap['nEvents'] = job.backend.requirements.split
                else:
                    taskParamMap['nEvents'] = 1
                taskParamMap['nEventsPerJob'] = 1
                taskParamMap['jobParameters'] += [
                    {'type':'constant',
                     'value': '-i "[]"',
                     },
                    ]
        else:
            if job.inputdata and job.inputdata._name == 'DQ2Dataset':
                tmpDict = {'type':'template',
                           'param_type':'input',
                           'value':'-i "${IN/T}"',
                           'dataset': ','.join(job.inputdata.dataset),
                           'expand':True,
                           'exclude':'\.log\.tgz(\.\d+)*$',
                           }
               #if options.nSkipFiles != 0:
               #    tmpDict['offset'] = options.nSkipFiles
                taskParamMap['jobParameters'].append(tmpDict)
                taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset)
            else:
                # no input
                taskParamMap['noInput'] = True
                if job.backend.requirements.split > 0:
                    taskParamMap['nEvents'] = job.backend.requirements.split
                else:
                    taskParamMap['nEvents'] = 1
                taskParamMap['nEventsPerJob'] = 1

        # param for DBR     
        if self.dbrelease != '':
            dbrDS = self.dbrelease.split(':')[0]
            # change LATEST to DBR_LATEST
            if dbrDS == 'LATEST':
                dbrDS = 'DBR_LATEST'
            dictItem = {'type':'template',
                        'param_type':'input',
                        'value':'--dbrFile=${DBR}',
                        'dataset':dbrDS,
                            }
            taskParamMap['jobParameters'] += [dictItem]
        # no expansion
        #if options.notExpandDBR:
        #dictItem = {'type':'constant',
        #            'value':'--noExpandDBR',
        #            }
        #taskParamMap['jobParameters'] += [dictItem]

        # secondary FIXME disabled
        self.secondaryDSs = {}
        if self.secondaryDSs != {}:
            inMap = {}
            streamNames = []
            for tmpDsName,tmpMap in self.secondaryDSs.iteritems():
                # make template item
                streamName = tmpMap['streamName']
                dictItem = MiscUtils.makeJediJobParam('${'+streamName+'}',tmpDsName,'input',hidden=True,
                                                      expand=True,include=tmpMap['pattern'],offset=tmpMap['nSkip'],
                                                      nFilesPerJob=tmpMap['nFiles'])
                taskParamMap['jobParameters'] += dictItem
                inMap[streamName] = 'tmp_'+streamName 
                streamNames.append(streamName)
            # make constant item
            strInMap = str(inMap)
            # set placeholders
            for streamName in streamNames:
                strInMap = strInMap.replace("'tmp_"+streamName+"'",'${'+streamName+'/T}')
            dictItem = {'type':'constant',
                        'value':'--inMap "%s"' % strInMap,
                        }
            taskParamMap['jobParameters'] += [dictItem]

        # misc
        jobParameters = ''
        # use Athena packages
        if app.atlas_exetype == 'ARES' or (app.atlas_exetype in ['PYARA','ROOT','EXE'] and app.useAthenaPackages):
            jobParameters += "--useAthenaPackages "
            
        # use RootCore
        if app.useRootCore or app.useRootCoreNoBuild:
            jobParameters += "--useRootCore "
            
        # use mana
        if app.useMana:
            jobParameters += "--useMana "
            if app.atlas_release != "":
                jobParameters += "--manaVer %s " % app.atlas_release
        # root
        if app.atlas_exetype in ['PYARA','ROOT','EXE'] and job.backend.requirements.rootver != '':
            rootver = re.sub('/','.', job.backend.requirements.rootver)
            jobParameters += "--rootVer %s " % rootver

        # write input to txt
        #if options.writeInputToTxt != '':
        #    jobParameters += "--writeInputToTxt %s " % options.writeInputToTxt
        # debug parameters
        #if options.queueData != '':
        #    jobParameters += "--overwriteQueuedata=%s " % options.queueData
        # JEM
        #if options.enableJEM:
        #    jobParameters += "--enable-jem "
        #    if options.configJEM != '':
        #        jobParameters += "--jem-config %s " % options.configJEM

        # set task param
        if jobParameters != '':
            taskParamMap['jobParameters'] += [ 
                {'type':'constant',
                 'value': jobParameters,
                 },
                ]

        # force stage-in
        if job.backend.accessmode == "LocalIO":
            taskParamMap['useLocalIO'] = 1

        # set jobO parameter
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '-j "',
                 'padding':False,
                 },
                ]
            taskParamMap['jobParameters'] += PsubUtils.convertParamStrToJediParam(tmpJobO,inputMap,job.outputdata.datasetname[:-1], True,False)
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '"',
                 },
                ]

        else:
            taskParamMap['jobParameters'] += [ {'type':'constant',
                                                'value': '-p "{0}"'.format(urllib.quote(self.job_options)),
                                                },
                                               ]

        # build step
        if not job.backend.nobuild:
            jobParameters = '-i ${IN} -o ${OUT} --sourceURL ${SURL} '

            if job.backend.bexec != '':
                jobParameters += ' --bexec "%s" ' % urllib.quote(job.backend.bexec)

            if app.atlas_exetype == 'ARES' or (app.atlas_exetype in ['PYARA','ROOT','EXE'] and app.useAthenaPackages):
                # use Athena packages
                jobParameters += "--useAthenaPackages "
            # use RootCore
            if app.useRootCore or app.useRootCoreNoBuild:
                jobParameters += "--useRootCore "

            # run directory
            if app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']:
                jobParameters += '-r {0} '.format(self.rundirectory)
                
            # no compile
            #if options.noCompile:
            #    jobParameters += "--noCompile "
            # use mana
            if app.useMana:
                jobParameters += "--useMana "
                if app.atlas_release != "":
                    jobParameters += "--manaVer %s " % app.atlas_release

            # root
            if app.atlas_exetype in ['PYARA','ROOT','EXE'] and job.backend.requirements.rootver != '':
                rootver = re.sub('/','.', job.backend.requirements.rootver)
                jobParameters += "--rootVer %s " % rootver
                
            # cmt config
            if app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']:
                if not app.atlas_cmtconfig in ['','NULL',None]:
                    jobParameters += " --cmtConfig %s " % app.atlas_cmtconfig
                                            
            
            #cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig)
            #if cmtConfig:
            #    jobParameters += "--cmtConfig %s " % cmtConfig
            # debug parameters
            #if options.queueData != '':
            #    jobParameters += "--overwriteQueuedata=%s " % options.queueData
            # set task param
            taskParamMap['buildSpec'] = {
                'prodSourceLabel':'panda',
                'archiveName':os.path.basename(self.inputsandbox),
                'jobParameters':jobParameters,
                }


        # enable merging
        if job.backend.requirements.enableMerge:
            jobParameters = '-r {0} '.format(self.rundirectory)
            if 'exec' in job.backend.requirements.configMerge and job.backend.requirements.configMerge['exec'] != '':
                jobParameters += '-j "{0}" '.format(job.backend.requirements.configMerge['exec'])
            if not job.backend.nobuild:
                jobParameters += '-l ${LIB} '
            else:
                jobParameters += '-a {0} '.format(os.path.basename(self.inputsandbox))
                jobParameters += "--sourceURL ${SURL} "
            jobParameters += '${TRN_OUTPUT:OUTPUT} ${TRN_LOG:LOG}'
            taskParamMap['mergeSpec'] = {}
            taskParamMap['mergeSpec']['useLocalIO'] = 1
            taskParamMap['mergeSpec']['jobParameters'] = jobParameters
            taskParamMap['mergeOutput'] = True    
            
        # Selected by Jedi
        #if not app.atlas_exetype in ['PYARA','ROOT','EXE']:
        #    taskParamMap['transPath'] = 'http://atlpan.web.cern.ch/atlpan/runAthena-00-00-12'

        logger.debug(taskParamMap)

        # upload sources
        if self.inputsandbox and not job.backend.libds:
            uploadSources(os.path.dirname(self.inputsandbox),os.path.basename(self.inputsandbox))

            if not self.inputsandbox == tmp_user_area_name:
                logger.info('Removing source tarball %s ...' % self.inputsandbox )
                os.remove(self.inputsandbox)

        return taskParamMap
Example #7
0
    def master_prepare(self, app, appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('ExecutablePandaRTHandler master_prepare called for %s',
                     job.getFQID('.'))

        # set chirp variables
        if configPanda['chirpconfig'] or configPanda['chirpserver']:
            setChirpVariables()

#       Pack inputsandbox
        inputsandbox = 'sources.%s.tar' % commands.getoutput(
            'uuidgen 2> /dev/null')
        inpw = job.getInputWorkspace()
        # add user script to inputsandbox
        if hasattr(job.application.exe, "name"):
            if not job.application.exe in job.inputsandbox:
                job.inputsandbox.append(job.application.exe)

        for fname in [f.name for f in job.inputsandbox]:
            fname.rstrip(os.sep)
            path = fname[:fname.rfind(os.sep)]
            f = fname[fname.rfind(os.sep) + 1:]
            rc, output = commands.getstatusoutput(
                'tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f))
            if rc:
                logger.error('Packing inputsandbox failed with status %d', rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Packing inputsandbox failed.')
        if len(job.inputsandbox) > 0:
            rc, output = commands.getstatusoutput('gzip %s' %
                                                  (inpw.getPath(inputsandbox)))
            if rc:
                logger.error('Packing inputsandbox failed with status %d', rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Packing inputsandbox failed.')
            inputsandbox += ".gz"
        else:
            inputsandbox = None

#       Upload Inputsandbox
        if inputsandbox:
            logger.debug('Uploading source tarball ...')
            uploadSources(inpw.getPath(), os.path.basename(inputsandbox))
            self.inputsandbox = inputsandbox
        else:
            self.inputsandbox = None

#       input dataset
        if job.inputdata:
            if job.inputdata._name != 'DQ2Dataset':
                raise ApplicationConfigurationError(
                    None, 'PANDA application supports only DQ2Datasets')

        # run brokerage here if not splitting
        if not job.splitter:
            from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
            runPandaBrokerage(job)
        elif job.splitter._name not in [
                'DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask'
        ]:
            raise ApplicationConfigurationError(
                None, 'Panda splitter must be DQ2JobSplitter or ArgSplitter')

        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(
                None, 'site is still AUTO after brokerage!')

#       output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError(
                    None, 'Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        job.outputdata.datasetname, outlfn = dq2outputdatasetname(
            job.outputdata.datasetname, job.id, job.outputdata.isGroupDS,
            job.outputdata.groupname)

        self.outDsLocation = Client.PandaSites[job.backend.site]['ddm']

        try:
            Client.addDataset(job.outputdata.datasetname,
                              False,
                              location=self.outDsLocation)
            logger.info('Output dataset %s registered at %s' %
                        (job.outputdata.datasetname, self.outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname,
                                     location=self.outDsLocation)
        except exceptions.SystemExit:
            raise BackendError(
                'Panda', 'Exception in Client.addDataset %s: %s %s' %
                (job.outputdata.datasetname, sys.exc_info()[0],
                 sys.exc_info()[1]))

        # handle the libds
        if job.backend.libds:
            self.libDataset = job.backend.libds
            self.fileBO = getLibFileSpecFromLibDS(self.libDataset)
            self.library = self.fileBO.lfn
        elif job.backend.bexec:
            self.libDataset = job.outputdata.datasetname + '.lib'
            self.library = '%s.tgz' % self.libDataset
            try:
                Client.addDataset(self.libDataset,
                                  False,
                                  location=self.outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset,
                                         location=self.outDsLocation)
                logger.info('Lib dataset %s registered at %s' %
                            (self.libDataset, self.outDsLocation))
            except exceptions.SystemExit:
                raise BackendError(
                    'Panda', 'Exception in Client.addDataset %s: %s %s' %
                    (self.libDataset, sys.exc_info()[0], sys.exc_info()[1]))

        # collect extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.outputsandbox:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # create build job
        if job.backend.bexec != '':
            jspec = JobSpec()
            jspec.jobDefinitionID = job.id
            jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
            jspec.transformation = '%s/buildGen-00-00-01' % Client.baseURLSUB
            if Client.isDQ2free(job.backend.site):
                jspec.destinationDBlock = '%s/%s' % (
                    job.outputdata.datasetname, self.libDataset)
                jspec.destinationSE = 'local'
            else:
                jspec.destinationDBlock = self.libDataset
                jspec.destinationSE = job.backend.site
            jspec.prodSourceLabel = configPanda['prodSourceLabelBuild']
            jspec.processingType = configPanda['processingType']
            jspec.assignedPriority = configPanda['assignedPriorityBuild']
            jspec.computingSite = job.backend.site
            jspec.cloud = job.backend.requirements.cloud
            jspec.jobParameters = '-o %s' % (self.library)
            if self.inputsandbox:
                jspec.jobParameters += ' -i %s' % (self.inputsandbox)
            else:
                raise ApplicationConfigurationError(
                    None,
                    'Executable on Panda with build job defined, but inputsandbox is emtpy !'
                )
            matchURL = re.search('(http.*://[^/]+)/', Client.baseURLCSRVSSL)
            if matchURL:
                jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1)
            if job.backend.bexec != '':
                jspec.jobParameters += ' --bexec "%s" ' % urllib.quote(
                    job.backend.bexec)
                jspec.jobParameters += ' -r %s ' % '.'

            fout = FileSpec()
            fout.lfn = self.library
            fout.type = 'output'
            fout.dataset = self.libDataset
            fout.destinationDBlock = self.libDataset
            jspec.addFile(fout)

            flog = FileSpec()
            flog.lfn = '%s.log.tgz' % self.libDataset
            flog.type = 'log'
            flog.dataset = self.libDataset
            flog.destinationDBlock = self.libDataset
            jspec.addFile(flog)
            return jspec
        else:
            return None
Example #8
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id)

        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name == 'DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(
                dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % (
                        job.backend.requirements.sites)
                    raise ApplicationConfigurationError(None, printout)

        #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job
        #will already have been prepared
        #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing.
        #if app.is_prepared is not True:
        #    for position in xrange(len(app.option_file)):
        #        app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name)))
        # Expand Athena jobOptions
        if not app.atlas_exetype in ['EXE']:
            athena_options = ' '.join([
                os.path.basename(opt_file.name) for opt_file in app.option_file
            ])
            #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options
            if app.options:
                athena_options = app.options + ' ' + athena_options

            inputbox = [File(opt_file.name) for opt_file in app.option_file]
        else:
            athena_options = ' '.join([
                os.path.basename(opt_file.name) for opt_file in app.option_file
            ])
            inputbox = []

        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

        #       prepare input sandbox

        inputbox.append(File(os.path.join(__directory__, 'athena-utility.sh')))

        if job.inputdata and job.inputdata._name == "AMIDataset" and job.inputdata.goodRunListXML.name != '':
            inputbox.append(File(job.inputdata.goodRunListXML.name))

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                _append_files(inputbox, 'ganga-stagein-lfc.py')
            else:
                _append_files(inputbox, 'ganga-stagein.py')

        if app.user_area.name:
            #we will now use the user_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_user_name = os.path.join(
                    os.path.join(shared_path, app.is_prepared.name),
                    os.path.basename(app.user_area.name))
                inputbox.append(File(tmp_user_name))
            else:
                inputbox.append(File(app.user_area.name))

        #if app.group_area.name: inputbox += [ File(app.group_area.name) ]
        if app.group_area.name and str(app.group_area.name).find('http') < 0:
            #we will now use the group_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_group_name = os.path.join(
                    os.path.join(shared_path, app.is_prepared.name),
                    os.path.basename(app.group_area.name))
                inputbox.append(File(tmp_group_name))
            else:
                inputbox.append(File(app.group_area.name))

        if app.user_setupfile.name:
            inputbox.append(File(app.user_setupfile.name))

        # CN: added TNTJobSplitter clause

        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) or (job._getRoot().splitter
               and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py', 'dq2_get',
                          'dq2info.tar.gz')
            if job.inputdata and job.inputdata.type == 'LFC' and not (
                    job._getRoot().splitter
                    and job._getRoot().splitter._name == 'TNTJobSplitter'):
                _append_files(inputbox, 'dq2_get_old')

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py',
                          'dq2info.tar.gz')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            #if not job.outputdata.location:
            #    raise ApplicationConfigurationError(None,'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !')
            if not 'ganga-stage-in-out-dq2.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
            _append_files(inputbox, 'ganga-joboption-parse.py')
            if not 'dq2info.tar.gz' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'dq2info.tar.gz')

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
        if not 'dq2tracerreport.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'dq2tracerreport.py')
        if not 'db_dq2localid.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'db_dq2localid.py')
        if not 'getstats.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'getstats.py')

        if str(app.atlas_release).find('12.') >= 0:
            _append_files(inputbox, 'libDCache.so', 'libRFIO.so', 'libdcap.so')
        elif str(app.atlas_release).find('13.') >= 0:
            _append_files(inputbox, 'libdcap.so')
        else:
            _append_files(inputbox, 'libdcap.so')

        if job.inputsandbox: inputbox += job.inputsandbox

        #       prepare environment

        if not app.atlas_release:
            raise ApplicationConfigurationError(
                None,
                'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.'
            )

        environment = {
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': athena_options,
            'ATHENA_USERSETUPFILE': athena_usersetupfile,
            'ATLAS_PROJECT': app.atlas_project,
            'ATLAS_EXETYPE': app.atlas_exetype,
            'GANGA_VERSION': configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']

        if app.atlas_environment:
            for var in app.atlas_environment:
                try:
                    vars = re.match("^(\w+)=(.*)", var).group(1)
                    value = re.match("^(\w+)=(.*)", var).group(2)
                    environment[vars] = value
                except:
                    logger.warning(
                        'Athena.atlas_environment variable not correctly configured: %s',
                        var)
                    pass

        if app.atlas_production and app.atlas_release.find(
                '12.') >= 0 and app.atlas_project != 'AtlasPoint1':
            temp_atlas_production = re.sub('\.', '_', app.atlas_production)
            prod_url = config[
                'PRODUCTION_ARCHIVE_BASEURL'] + '/AtlasProduction_' + temp_atlas_production + '_noarch.tar.gz'
            logger.info('Using Production cache from: %s', prod_url)
            environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1'
                                     or app.atlas_release.find('12.') <= 0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production

        if app.user_area.name:
            environment['USER_AREA'] = os.path.basename(app.user_area.name)
        #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name)
        if app.group_area.name:
            if str(app.group_area.name).find('http') >= 0:
                environment['GROUP_AREA_REMOTE'] = str(app.group_area.name)
            else:
                environment['GROUP_AREA'] = os.path.basename(
                    app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)

        if job.backend.requirements._name == 'AtlasLCGRequirements':
            requirements = AtlasLCGRequirements()
        elif job.backend.requirements._name == 'AtlasCREAMRequirements':
            requirements = AtlasCREAMRequirements()
        else:
            requirements = AtlasLCGRequirements()

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                environment['GANGA_LFC_HOST'] = job.inputdata.lfc

        if 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']

        if job.inputdata and (job.inputdata._name
                              in ['DQ2Dataset', 'AMIDataset', 'EventPicking']):
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(
                    job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2[
                    'DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment[
                        'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1

            else:
                raise ApplicationConfigurationError(
                    None,
                    'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.'
                )

            # Raise submission exception
            if (not job.backend.CE and not (job.backend.requirements._name in [
                    'AtlasLCGRequirements', 'AtlasCREAMRequirements'
            ] and job.backend.requirements.sites)
                    and not (job.splitter
                             and job.splitter._name == 'DQ2JobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'TNTJobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'AnaTaskSplitterJob')
                    and not (job.splitter
                             and job.splitter._name == 'ATLASTier3Splitter')):

                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )

            if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0:
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

            # Add TAG datasetname
            if job.inputdata.tagdataset:
                environment['TAGDATASETNAME'] = ':'.join(
                    job.inputdata.tagdataset)

#       prepare job requirements
        requirementsSoftware = getLCGReleaseTag(app)

        releaseBlacklist = job.backend.requirements.list_release_blacklist()
        if requirementsSoftware and requirementsSoftware[0] in releaseBlacklist:
            logger.error(
                'The athena release %s you are using is not recommended for distributed analysis !',
                requirementsSoftware[0])
            logger.error(
                'For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !'
            )
            requirements.software = requirementsSoftware
        else:
            requirements.software = requirementsSoftware

        # Set athena architecture: 32 or 64 bit
        environment['ATLAS_ARCH'] = '32'
        if requirementsSoftware and requirementsSoftware[0].find(
                'x86_64') >= 0:
            environment['ATLAS_ARCH'] = '64'

        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ] and job.inputdata.type in [
                'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'
        ] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']:
            try:
                # override the default one if the dq2client_version is presented
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version

        if app.atlas_dbrelease:
            if not app._name == "AthenaTask" and not (
                    job.splitter and
                (job.splitter._name == 'DQ2JobSplitter'
                 or job.splitter._name == 'ATLASTier3Splitter')):
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !'
                )
            try:
                environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split(
                    ':')[0]
                environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1]
            except:
                logger.warning(
                    'Problems with the atlas_dbrelease configuration')

        # Fill AtlasLCGRequirements access mode
        if configDQ2['USE_ACCESS_INFO']:
            logger.warning(
                "config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !"
            )
            import pickle, StringIO
            #if job.backend.requirements.sites:
            info = job.backend.requirements.list_access_info()
            fileHandle = StringIO.StringIO()
            pickle.dump(info, fileHandle)
            fileHandle.seek(-1)
            lines = fileHandle.read()
            inputbox.append(FileBuffer('access_info.pickle', lines))
            _append_files(inputbox, 'access_info.py')
            if not 'make_filestager_joption.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'make_filestager_joption.py',
                              'dm_util.py', 'fs-copy.py')


#       jobscript

        exe = os.path.join(__directory__, 'run-athena-lcg.sh')

        #       output sandbox
        outputbox = [
            'output_guids', 'output_location', 'output_data', 'stats.pickle'
        ]

        ## retrieve the FileStager log
        if configDQ2['USE_ACCESS_INFO'] or (
                job.inputdata and
            (job.inputdata._name
             in ['DQ2Dataset', 'AMIDataset', 'EventPicking'])
                and job.inputdata.type in ['FILE_STAGER']):
            outputbox += ['FileStager.out', 'FileStager.err']

        if job.outputsandbox: outputbox += job.outputsandbox

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'

        return LCGJobConfig(File(exe), inputbox, [], outputbox, environment,
                            [], requirements)
Example #9
0
    def master_prepare(self, app, appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from pandatools import MiscUtils
        from pandatools import AthenaUtils
        from pandatools import PsubUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec
        from pandatools import PandaToolsPkgInfo

        # create a random number for this submission to allow multiple use of containers
        self.rndSubNum = random.randint(1111, 9999)

        job = app._getParent()
        logger.debug('AthenaJediRTHandler master_prepare called for %s',
                     job.getFQID('.'))

        if app.useRootCoreNoBuild:
            logger.info(
                'Athena.useRootCoreNoBuild is True, setting Panda.nobuild=True.'
            )
            job.backend.nobuild = True

        if job.backend.bexec and job.backend.nobuild:
            raise ApplicationConfigurationError(
                None,
                "Contradicting options: job.backend.bexec and job.backend.nobuild are both enabled."
            )

        if job.backend.requirements.rootver != '' and job.backend.nobuild:
            raise ApplicationConfigurationError(
                None,
                "Contradicting options: job.backend.requirements.rootver given and job.backend.nobuild are enabled."
            )

        # Switch on compilation flag if bexec is set or libds is empty
        if job.backend.bexec != '' or not job.backend.nobuild:
            app.athena_compile = True
            for sj in job.subjobs:
                sj.application.athena_compile = True
            logger.info(
                '"job.backend.nobuild=False" or "job.backend.bexec" is set - Panda build job is enabled.'
            )

        if job.backend.nobuild:
            app.athena_compile = False
            for sj in job.subjobs:
                sj.application.athena_compile = False
            logger.info(
                '"job.backend.nobuild=True" or "--nobuild" chosen - Panda build job is switched off.'
            )

        # check for auto datri
        if job.outputdata.location != '':
            if not PsubUtils.checkDestSE(job.outputdata.location,
                                         job.outputdata.datasetname, False):
                raise ApplicationConfigurationError(
                    None, "Problems with outputdata.location setting '%s'" %
                    job.outputdata.location)

        # validate application
        if not app.atlas_release and not job.backend.requirements.rootver and not app.atlas_exetype in [
                'EXE'
        ]:
            raise ApplicationConfigurationError(
                None,
                "application.atlas_release is not set. Did you run application.prepare()"
            )

        self.dbrelease = app.atlas_dbrelease
        if self.dbrelease != '' and self.dbrelease != 'LATEST' and self.dbrelease.find(
                ':') == -1:
            raise ApplicationConfigurationError(
                None,
                "ERROR : invalid argument for DB Release. Must be 'LATEST' or 'DatasetName:FileName'"
            )

        self.runConfig = AthenaUtils.ConfigAttr(app.atlas_run_config)
        for k in self.runConfig.keys():
            self.runConfig[k] = AthenaUtils.ConfigAttr(self.runConfig[k])
        if not app.atlas_run_dir:
            raise ApplicationConfigurationError(
                None,
                "application.atlas_run_dir is not set. Did you run application.prepare()"
            )

        self.rundirectory = app.atlas_run_dir
        self.cacheVer = ''
        if app.atlas_project and app.atlas_production:
            self.cacheVer = "-" + app.atlas_project + "_" + app.atlas_production

        # handle different atlas_exetypes
        self.job_options = ''
        if app.atlas_exetype == 'TRF':
            self.job_options += ' '.join(
                [os.path.basename(fopt.name) for fopt in app.option_file])

            #if not job.outputdata.outputdata:
            #    raise ApplicationConfigurationError(None,"job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend")
            #raise ApplicationConfigurationError(None,"Sorry TRF on Panda backend not yet supported")

            if app.options:
                self.job_options += ' %s ' % app.options

        elif app.atlas_exetype == 'ATHENA':

            if len(app.atlas_environment) > 0 and app.atlas_environment[
                    0].find('DBRELEASE_OVERRIDE') == -1:
                logger.warning(
                    "Passing of environment variables to Athena using Panda not supported. Ignoring atlas_environment setting."
                )

            if job.outputdata.outputdata:
                raise ApplicationConfigurationError(
                    None,
                    "job.outputdata.outputdata must be empty if atlas_exetype='ATHENA' and Panda backend is used (outputs are auto-detected)"
                )
            if app.options:
                if app.options.startswith('-c'):
                    self.job_options += ' %s ' % app.options
                else:
                    self.job_options += ' -c %s ' % app.options

                logger.warning(
                    'The value of j.application.options has been prepended with " -c " '
                )
                logger.warning(
                    'Please make sure to use proper quotes for the values of j.application.options !'
                )

            self.job_options += ' '.join(
                [os.path.basename(fopt.name) for fopt in app.option_file])

            # check for TAG compression
            if 'subcoll.tar.gz' in app.append_to_user_area:
                self.job_options = ' uncompress.py ' + self.job_options

        elif app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']:

            #if not job.outputdata.outputdata:
            #    raise ApplicationConfigurationError(None,"job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend")
            self.job_options += ' '.join(
                [os.path.basename(fopt.name) for fopt in app.option_file])

            # sort out environment variables
            env_str = ""
            if len(app.atlas_environment) > 0:
                for env_var in app.atlas_environment:
                    env_str += "export %s ; " % env_var
            else:
                env_str = ""

            # below fixes issue with runGen -- job_options are executed by os.system when dbrelease is used, and by the shell otherwise
            ## - REMOVED FIX DUE TO CHANGE IN PILOT - MWS 8/11/11
            if job.backend.requirements.usecommainputtxt:
                input_str = '/bin/echo %IN > input.txt; cat input.txt; '
            else:
                input_str = '/bin/echo %IN | sed \'s/,/\\\n/g\' > input.txt; cat input.txt; '
            if app.atlas_exetype == 'PYARA':
                self.job_options = env_str + input_str + ' python ' + self.job_options
            elif app.atlas_exetype == 'ARES':
                self.job_options = env_str + input_str + ' athena.py ' + self.job_options
            elif app.atlas_exetype == 'ROOT':
                self.job_options = env_str + input_str + ' root -b -q ' + self.job_options
            elif app.atlas_exetype == 'EXE':
                self.job_options = env_str + input_str + self.job_options

            if app.options:
                self.job_options += ' %s ' % app.options

        if self.job_options == '':
            raise ApplicationConfigurationError(None, "No Job Options found!")
        logger.info('Running job options: %s' % self.job_options)

        # validate dbrelease
        if self.dbrelease != "LATEST":
            self.dbrFiles, self.dbrDsList = getDBDatasets(
                self.job_options, '', self.dbrelease)

        # handle the output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError(
                    None, 'Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        # validate the output dataset name (and make it a container)
        job.outputdata.datasetname, outlfn = dq2outputdatasetname(
            job.outputdata.datasetname, job.id, job.outputdata.isGroupDS,
            job.outputdata.groupname)
        if not job.outputdata.datasetname.endswith('/'):
            job.outputdata.datasetname += '/'

        # add extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)
        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # use the shared area if possible
        tmp_user_area_name = app.user_area.name
        if app.is_prepared is not True:
            from Ganga.Utility.files import expandfilename
            shared_path = os.path.join(
                expandfilename(getConfig('Configuration')['gangadir']),
                'shared',
                getConfig('Configuration')['user'])
            tmp_user_area_name = os.path.join(
                os.path.join(shared_path, app.is_prepared.name),
                os.path.basename(app.user_area.name))

        # Add inputsandbox to user_area
        if job.inputsandbox:
            logger.warning(
                "Submitting Panda job with inputsandbox. This may slow the submission slightly."
            )

            if tmp_user_area_name:
                inpw = os.path.dirname(tmp_user_area_name)
                self.inputsandbox = os.path.join(
                    inpw, 'sources.%s.tar' %
                    commands.getoutput('uuidgen 2> /dev/null'))
            else:
                inpw = job.getInputWorkspace()
                self.inputsandbox = inpw.getPath(
                    'sources.%s.tar' %
                    commands.getoutput('uuidgen 2> /dev/null'))

            if tmp_user_area_name:
                rc, output = commands.getstatusoutput(
                    'cp %s %s.gz' % (tmp_user_area_name, self.inputsandbox))
                if rc:
                    logger.error('Copying user_area failed with status %d', rc)
                    logger.error(output)
                    raise ApplicationConfigurationError(
                        None, 'Packing inputsandbox failed.')
                rc, output = commands.getstatusoutput('gunzip %s.gz' %
                                                      (self.inputsandbox))
                if rc:
                    logger.error('Unzipping user_area failed with status %d',
                                 rc)
                    logger.error(output)
                    raise ApplicationConfigurationError(
                        None, 'Packing inputsandbox failed.')

            for fname in [os.path.abspath(f.name) for f in job.inputsandbox]:
                fname.rstrip(os.sep)
                path = os.path.dirname(fname)
                fn = os.path.basename(fname)

                #app.atlas_run_dir
                # get Athena versions
                rc, out = AthenaUtils.getAthenaVer()
                # failed
                if not rc:
                    #raise ApplicationConfigurationError(None, 'CMT could not parse correct environment ! \n Did you start/setup ganga in the run/ or cmt/ subdirectory of your athena analysis package ?')
                    logger.warning(
                        "CMT could not parse correct environment for inputsandbox - will use the atlas_run_dir as default"
                    )

                    # as we don't have to be in the run dir now, create a copy of the run_dir directory structure and use that
                    input_dir = os.path.dirname(self.inputsandbox)
                    run_path = "%s/sbx_tree/%s" % (input_dir,
                                                   app.atlas_run_dir)
                    rc, output = commands.getstatusoutput("mkdir -p %s" %
                                                          run_path)
                    if not rc:
                        # copy this sandbox file
                        rc, output = commands.getstatusoutput(
                            "cp %s %s" % (fname, run_path))
                        if not rc:
                            path = os.path.join(input_dir, 'sbx_tree')
                            fn = os.path.join(app.atlas_run_dir, fn)
                        else:
                            raise ApplicationConfigurationError(
                                None,
                                "Couldn't copy file %s to recreate run_dir for input sandbox"
                                % fname)
                    else:
                        raise ApplicationConfigurationError(
                            None,
                            "Couldn't create directory structure to match run_dir %s for input sandbox"
                            % run_path)

                else:
                    userarea = out['workArea']

                    # strip the path from the filename if present in the userarea
                    ua = os.path.abspath(userarea)
                    if ua in path:
                        fn = fname[len(ua) + 1:]
                        path = ua

                rc, output = commands.getstatusoutput(
                    'tar -h -r -f %s -C %s %s' % (self.inputsandbox, path, fn))
                if rc:
                    logger.error('Packing inputsandbox failed with status %d',
                                 rc)
                    logger.error(output)
                    raise ApplicationConfigurationError(
                        None, 'Packing inputsandbox failed.')

            # remove sandbox tree if created
            if "sbx_tree" in os.listdir(os.path.dirname(self.inputsandbox)):
                rc, output = commands.getstatusoutput(
                    "rm -r %s/sbx_tree" % os.path.dirname(self.inputsandbox))
                if rc:
                    raise ApplicationConfigurationError(
                        None,
                        "Couldn't remove directory structure used for input sandbox"
                    )

            rc, output = commands.getstatusoutput('gzip %s' %
                                                  (self.inputsandbox))
            if rc:
                logger.error('Packing inputsandbox failed with status %d', rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Packing inputsandbox failed.')
            self.inputsandbox += ".gz"
        else:
            self.inputsandbox = tmp_user_area_name

        # job name
        jobName = 'ganga.%s' % MiscUtils.wrappedUuidGen()

        # make task
        taskParamMap = {}
        # Enforce that outputdataset name ends with / for container
        if not job.outputdata.datasetname.endswith('/'):
            job.outputdata.datasetname = job.outputdata.datasetname + '/'

        taskParamMap['taskName'] = job.outputdata.datasetname

        taskParamMap['uniqueTaskName'] = True
        taskParamMap['vo'] = 'atlas'
        taskParamMap['architecture'] = AthenaUtils.getCmtConfig(
            athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig)
        if app.atlas_release:
            taskParamMap['transUses'] = 'Atlas-%s' % app.atlas_release
        else:
            taskParamMap['transUses'] = ''
        taskParamMap[
            'transHome'] = 'AnalysisTransforms' + self.cacheVer  #+nightVer

        configSys = getConfig('System')
        gangaver = configSys['GANGA_VERSION'].lower()
        if not gangaver:
            gangaver = "ganga"

        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['processingType'] = '{0}-jedi-athena'.format(gangaver)
        else:
            taskParamMap['processingType'] = '{0}-jedi-run'.format(gangaver)

        #if options.eventPickEvtList != '':
        #    taskParamMap['processingType'] += '-evp'
        taskParamMap['prodSourceLabel'] = 'user'
        if job.backend.site != 'AUTO':
            taskParamMap['cloud'] = Client.PandaSites[
                job.backend.site]['cloud']
            taskParamMap['site'] = job.backend.site
        elif job.backend.requirements.cloud != None and not job.backend.requirements.anyCloud:
            taskParamMap['cloud'] = job.backend.requirements.cloud
        if job.backend.requirements.excluded_sites != []:
            taskParamMap['excludedSite'] = expandExcludedSiteList(job)

        # if only a single site specifed, don't set includedSite
        #if job.backend.site != 'AUTO':
        #    taskParamMap['includedSite'] = job.backend.site
        #taskParamMap['cliParams'] = fullExecString
        if job.backend.requirements.noEmail:
            taskParamMap['noEmail'] = True
        if job.backend.requirements.skipScout:
            taskParamMap['skipScout'] = True
        if not app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap[
                'nMaxFilesPerJob'] = job.backend.requirements.maxNFilesPerJob
        if job.backend.requirements.disableAutoRetry:
            taskParamMap['disableAutoRetry'] = 1
        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL)
        if matchURL != None:
            taskParamMap['sourceURL'] = matchURL.group(1)

        # dataset names
        outDatasetName = job.outputdata.datasetname
        logDatasetName = re.sub('/$', '.log/', job.outputdata.datasetname)
        # log
        taskParamMap['log'] = {
            'dataset': logDatasetName,
            'container': logDatasetName,
            'type': 'template',
            'param_type': 'log',
            'value': '{0}.${{SN}}.log.tgz'.format(logDatasetName[:-1])
        }
        # job parameters
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['jobParameters'] = [
                {
                    'type': 'constant',
                    'value': ' --sourceURL ${SURL}',
                },
            ]
        else:
            taskParamMap['jobParameters'] = [
                {
                    'type': 'constant',
                    'value': '-j "" --sourceURL ${SURL}',
                },
            ]

        taskParamMap['jobParameters'] += [
            {
                'type': 'constant',
                'value': '-r {0}'.format(self.rundirectory),
            },
        ]

        # output
        # output files
        outMap = {}
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            outMap, tmpParamList = AthenaUtils.convertConfToOutput(
                self.runConfig,
                self.extOutFile,
                job.outputdata.datasetname,
                destination=job.outputdata.location)
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '-o "%s" ' % outMap
                },
            ]
            taskParamMap['jobParameters'] += tmpParamList

        else:
            if job.outputdata.outputdata:
                for tmpLFN in job.outputdata.outputdata:
                    if len(job.outputdata.datasetname.split('.')) > 2:
                        lfn = '{0}.{1}'.format(
                            *job.outputdata.datasetname.split('.')[:2])
                    else:
                        lfn = job.outputdata.datasetname[:-1]
                    lfn += '.$JOBSETID._${{SN/P}}.{0}'.format(tmpLFN)
                    dataset = '{0}_{1}/'.format(
                        job.outputdata.datasetname[:-1], tmpLFN)
                    taskParamMap[
                        'jobParameters'] += MiscUtils.makeJediJobParam(
                            lfn,
                            dataset,
                            'output',
                            hidden=True,
                            destination=job.outputdata.location)
                    outMap[tmpLFN] = lfn

                taskParamMap['jobParameters'] += [
                    {
                        'type': 'constant',
                        'value': '-o "{0}"'.format(str(outMap)),
                    },
                ]

        if app.atlas_exetype in ["ATHENA"]:
            # jobO parameter
            tmpJobO = self.job_options
            # replace full-path jobOs
            for tmpFullName, tmpLocalName in AthenaUtils.fullPathJobOs.iteritems(
            ):
                tmpJobO = re.sub(tmpFullName, tmpLocalName, tmpJobO)
            # modify one-liner for G4 random seeds
            if self.runConfig.other.G4RandomSeeds > 0:
                if app.options != '':
                    tmpJobO = re.sub('-c "%s" ' % app.options,
                                     '-c "%s;from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' \
                                         % app.options,tmpJobO)
                else:
                    tmpJobO = '-c "from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" '
                dictItem = {
                    'type': 'template',
                    'param_type': 'number',
                    'value': '${RNDMSEED}',
                    'hidden': True,
                    'offset': self.runConfig.other.G4RandomSeeds,
                }
                taskParamMap['jobParameters'] += [dictItem]
        elif app.atlas_exetype in ["TRF"]:
            # replace parameters for TRF
            tmpJobO = self.job_options
            # output : basenames are in outMap['IROOT'] trough extOutFile
            tmpOutMap = []
            for tmpName, tmpLFN in outMap['IROOT']:
                tmpJobO = tmpJobO.replace('%OUT.' + tmpName, tmpName)
            # replace DBR
            tmpJobO = re.sub('%DB=[^ \'\";]+', '${DBR}', tmpJobO)

        if app.atlas_exetype in ["TRF"]:
            taskParamMap['useLocalIO'] = 1

        # build
        if job.backend.nobuild:
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value':
                    '-a {0}'.format(os.path.basename(self.inputsandbox)),
                },
            ]
        else:
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '-l ${LIB}',
                },
            ]

        #
        # input
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.backend.requirements.nFilesPerJob > 0 and job.inputdata.number_of_files == 0 and job.backend.requirements.split > 0:
                job.inputdata.number_of_files = job.backend.requirements.nFilesPerJob * job.backend.requirements.split

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.number_of_files != 0:
            taskParamMap['nFiles'] = job.inputdata.number_of_files
        elif job.backend.requirements.nFilesPerJob > 0 and job.backend.requirements.split > 0:
            # pathena does this for some reason even if there is no input files
            taskParamMap[
                'nFiles'] = job.backend.requirements.nFilesPerJob * job.backend.requirements.split
        if job.backend.requirements.nFilesPerJob > 0:
            taskParamMap[
                'nFilesPerJob'] = job.backend.requirements.nFilesPerJob

        if job.backend.requirements.nEventsPerFile > 0:
            taskParamMap[
                'nEventsPerFile'] = job.backend.requirements.nEventsPerFile

        if not job.backend.requirements.nGBPerJob in [0, 'MAX']:
            try:
                if job.backend.requirements.nGBPerJob != 'MAX':
                    job.backend.requirments.nGBPerJob = int(
                        job.backend.requirements.nGBPerJob)
            except:
                logger.error("nGBPerJob must be an integer or MAX")
            # check negative
            if job.backend.requirements.nGBPerJob <= 0:
                logger.error("nGBPerJob must be positive")

            # don't set MAX since it is the defalt on the server side
            if not job.backend.requirements.nGBPerJob in [-1, 'MAX']:
                taskParamMap['nGBPerJob'] = job.backend.requirements.nGBPerJob

        if app.atlas_exetype in ["ATHENA", "TRF"]:
            inputMap = {}
            if job.inputdata and job.inputdata._name == 'DQ2Dataset':
                tmpDict = {
                    'type': 'template',
                    'param_type': 'input',
                    'value': '-i "${IN/T}"',
                    'dataset': ','.join(job.inputdata.dataset),
                    'expand': True,
                    'exclude': '\.log\.tgz(\.\d+)*$',
                }
                #if options.inputType != '':
                #    tmpDict['include'] = options.inputType
                taskParamMap['jobParameters'].append(tmpDict)
                taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset)
                inputMap['IN'] = ','.join(job.inputdata.dataset)
            else:
                # no input
                taskParamMap['noInput'] = True
                if job.backend.requirements.split > 0:
                    taskParamMap['nEvents'] = job.backend.requirements.split
                else:
                    taskParamMap['nEvents'] = 1
                taskParamMap['nEventsPerJob'] = 1
                taskParamMap['jobParameters'] += [
                    {
                        'type': 'constant',
                        'value': '-i "[]"',
                    },
                ]
        else:
            if job.inputdata and job.inputdata._name == 'DQ2Dataset':
                tmpDict = {
                    'type': 'template',
                    'param_type': 'input',
                    'value': '-i "${IN/T}"',
                    'dataset': ','.join(job.inputdata.dataset),
                    'expand': True,
                    'exclude': '\.log\.tgz(\.\d+)*$',
                }
                #if options.nSkipFiles != 0:
                #    tmpDict['offset'] = options.nSkipFiles
                taskParamMap['jobParameters'].append(tmpDict)
                taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset)
            else:
                # no input
                taskParamMap['noInput'] = True
                if job.backend.requirements.split > 0:
                    taskParamMap['nEvents'] = job.backend.requirements.split
                else:
                    taskParamMap['nEvents'] = 1
                taskParamMap['nEventsPerJob'] = 1

        # param for DBR
        if self.dbrelease != '':
            dbrDS = self.dbrelease.split(':')[0]
            # change LATEST to DBR_LATEST
            if dbrDS == 'LATEST':
                dbrDS = 'DBR_LATEST'
            dictItem = {
                'type': 'template',
                'param_type': 'input',
                'value': '--dbrFile=${DBR}',
                'dataset': dbrDS,
            }
            taskParamMap['jobParameters'] += [dictItem]
        # no expansion
        #if options.notExpandDBR:
        #dictItem = {'type':'constant',
        #            'value':'--noExpandDBR',
        #            }
        #taskParamMap['jobParameters'] += [dictItem]

        # secondary FIXME disabled
        self.secondaryDSs = {}
        if self.secondaryDSs != {}:
            inMap = {}
            streamNames = []
            for tmpDsName, tmpMap in self.secondaryDSs.iteritems():
                # make template item
                streamName = tmpMap['streamName']
                dictItem = MiscUtils.makeJediJobParam(
                    '${' + streamName + '}',
                    tmpDsName,
                    'input',
                    hidden=True,
                    expand=True,
                    include=tmpMap['pattern'],
                    offset=tmpMap['nSkip'],
                    nFilesPerJob=tmpMap['nFiles'])
                taskParamMap['jobParameters'] += dictItem
                inMap[streamName] = 'tmp_' + streamName
                streamNames.append(streamName)
            # make constant item
            strInMap = str(inMap)
            # set placeholders
            for streamName in streamNames:
                strInMap = strInMap.replace("'tmp_" + streamName + "'",
                                            '${' + streamName + '/T}')
            dictItem = {
                'type': 'constant',
                'value': '--inMap "%s"' % strInMap,
            }
            taskParamMap['jobParameters'] += [dictItem]

        # misc
        jobParameters = ''
        # use Athena packages
        if app.atlas_exetype == 'ARES' or (app.atlas_exetype
                                           in ['PYARA', 'ROOT', 'EXE']
                                           and app.useAthenaPackages):
            jobParameters += "--useAthenaPackages "

        # use RootCore
        if app.useRootCore or app.useRootCoreNoBuild:
            jobParameters += "--useRootCore "

        # use mana
        if app.useMana:
            jobParameters += "--useMana "
            if app.atlas_release != "":
                jobParameters += "--manaVer %s " % app.atlas_release
        # root
        if app.atlas_exetype in ['PYARA', 'ROOT', 'EXE'
                                 ] and job.backend.requirements.rootver != '':
            rootver = re.sub('/', '.', job.backend.requirements.rootver)
            jobParameters += "--rootVer %s " % rootver

        # write input to txt
        #if options.writeInputToTxt != '':
        #    jobParameters += "--writeInputToTxt %s " % options.writeInputToTxt
        # debug parameters
        #if options.queueData != '':
        #    jobParameters += "--overwriteQueuedata=%s " % options.queueData
        # JEM
        #if options.enableJEM:
        #    jobParameters += "--enable-jem "
        #    if options.configJEM != '':
        #        jobParameters += "--jem-config %s " % options.configJEM

        # set task param
        if jobParameters != '':
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': jobParameters,
                },
            ]

        # force stage-in
        if job.backend.accessmode == "LocalIO":
            taskParamMap['useLocalIO'] = 1

        # set jobO parameter
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '-j "',
                    'padding': False,
                },
            ]
            taskParamMap[
                'jobParameters'] += PsubUtils.convertParamStrToJediParam(
                    tmpJobO, inputMap, job.outputdata.datasetname[:-1], True,
                    False)
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '"',
                },
            ]

        else:
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '-p "{0}"'.format(urllib.quote(self.job_options)),
                },
            ]

        # build step
        if not job.backend.nobuild:
            jobParameters = '-i ${IN} -o ${OUT} --sourceURL ${SURL} '

            if job.backend.bexec != '':
                jobParameters += ' --bexec "%s" ' % urllib.quote(
                    job.backend.bexec)

            if app.atlas_exetype == 'ARES' or (app.atlas_exetype
                                               in ['PYARA', 'ROOT', 'EXE']
                                               and app.useAthenaPackages):
                # use Athena packages
                jobParameters += "--useAthenaPackages "
            # use RootCore
            if app.useRootCore or app.useRootCoreNoBuild:
                jobParameters += "--useRootCore "

            # run directory
            if app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']:
                jobParameters += '-r {0} '.format(self.rundirectory)

            # no compile
            #if options.noCompile:
            #    jobParameters += "--noCompile "
            # use mana
            if app.useMana:
                jobParameters += "--useMana "
                if app.atlas_release != "":
                    jobParameters += "--manaVer %s " % app.atlas_release

            # root
            if app.atlas_exetype in [
                    'PYARA', 'ROOT', 'EXE'
            ] and job.backend.requirements.rootver != '':
                rootver = re.sub('/', '.', job.backend.requirements.rootver)
                jobParameters += "--rootVer %s " % rootver

            # cmt config
            if app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']:
                if not app.atlas_cmtconfig in ['', 'NULL', None]:
                    jobParameters += " --cmtConfig %s " % app.atlas_cmtconfig

            #cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig)
            #if cmtConfig:
            #    jobParameters += "--cmtConfig %s " % cmtConfig
            # debug parameters
            #if options.queueData != '':
            #    jobParameters += "--overwriteQueuedata=%s " % options.queueData
            # set task param
            taskParamMap['buildSpec'] = {
                'prodSourceLabel': 'panda',
                'archiveName': os.path.basename(self.inputsandbox),
                'jobParameters': jobParameters,
            }

        # enable merging
        if job.backend.requirements.enableMerge:
            jobParameters = '-r {0} '.format(self.rundirectory)
            if 'exec' in job.backend.requirements.configMerge and job.backend.requirements.configMerge[
                    'exec'] != '':
                jobParameters += '-j "{0}" '.format(
                    job.backend.requirements.configMerge['exec'])
            if not job.backend.nobuild:
                jobParameters += '-l ${LIB} '
            else:
                jobParameters += '-a {0} '.format(
                    os.path.basename(self.inputsandbox))
                jobParameters += "--sourceURL ${SURL} "
            jobParameters += '${TRN_OUTPUT:OUTPUT} ${TRN_LOG:LOG}'
            taskParamMap['mergeSpec'] = {}
            taskParamMap['mergeSpec']['useLocalIO'] = 1
            taskParamMap['mergeSpec']['jobParameters'] = jobParameters
            taskParamMap['mergeOutput'] = True

        # Selected by Jedi
        #if not app.atlas_exetype in ['PYARA','ROOT','EXE']:
        #    taskParamMap['transPath'] = 'http://atlpan.web.cern.ch/atlpan/runAthena-00-00-12'

        logger.debug(taskParamMap)

        # upload sources
        if self.inputsandbox and not job.backend.libds:
            uploadSources(os.path.dirname(self.inputsandbox),
                          os.path.basename(self.inputsandbox))

            if not self.inputsandbox == tmp_user_area_name:
                logger.info('Removing source tarball %s ...' %
                            self.inputsandbox)
                os.remove(self.inputsandbox)

        return taskParamMap
Example #10
0
    def master_prepare( self, app, appconfig ):
        """Prepare the master job"""
        
        job = app._getParent() # Returns job or subjob object

        logger.debug("AthenaLocalRTHandler master_prepare called, %s", job.id)

        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name=='DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Expand Athena jobOptions
        if not app.option_file and not app.command_line:
            raise ConfigError("j.application.option_file='' - No Athena jobOptions files specified.")

        athena_options = ''
        inputbox = [File(os.path.join(os.path.dirname(__file__),'athena-utility.sh'))]
        if app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']:

            for option_file in app.option_file:
                athena_options += ' ' + os.path.basename(option_file.name)
                inputbox += [ File(option_file.name) ]

            athena_options += ' %s ' % app.options

        else:
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options =  app.options + ' ' + athena_options
                inputbox += [ File(option_file.name) ]


            if app.command_line:
                athena_options = app.command_line

        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

#       prepare input sandbox

        if app.user_setupfile.name: inputbox += [ File(app.user_setupfile.name) ]
        #CN: added extra test for TNTJobSplitter
        if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'ATLASTier3Dataset'] or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox,'ganga-stage-in-out-dq2.py')
            _append_files(inputbox,'dq2_get')
            _append_files(inputbox,'dq2info.tar.gz')
            _append_files(inputbox,'libdcap.so')

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                _append_files(inputbox,'ganga-stagein-lfc.py')
            else:
                _append_files(inputbox,'ganga-stagein.py')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py')

        if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'getstats.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            if not job.outputdata.location:
                raise ApplicationConfigurationError(None,'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !')
            if not File(os.path.join(os.path.dirname(__file__),'ganga-stage-in-out-dq2.py')) in inputbox:
                _append_files(inputbox,'ganga-stage-in-out-dq2.py')
                _append_files(inputbox,'dq2info.tar.gz')
                _append_files(inputbox,'libdcap.so')
            _append_files(inputbox,'ganga-joboption-parse.py')

        if job.inputsandbox:
            for file in job.inputsandbox:
                inputbox += [ file ]
        if app.user_area.name:
            if app.is_prepared is True:
                inputbox += [ File(app.user_area.name) ] 
            else:
                inputbox += [ File(os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name))) ]
        if app.group_area.name and string.find(app.group_area.name,"http")<0:
            if app.is_prepared is True:
                inputbox += [ File(app.group_area.name) ] 
            else:
                inputbox += [ File(os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.group_area.name))) ]
   
#       prepare environment

        try:
            atlas_software = config['ATLAS_SOFTWARE']
        except ConfigError:
            raise ConfigError('No default location of ATLAS_SOFTWARE specified in the configuration.')

        if app.atlas_release=='' and app.atlas_project != "AthAnalysisBase":
            raise ApplicationConfigurationError(None,'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.')
      
        environment={ 
            'ATLAS_RELEASE' : app.atlas_release,
            'ATHENA_OPTIONS' : athena_options,
            'ATLAS_SOFTWARE' : atlas_software,
            'ATHENA_USERSETUPFILE' : athena_usersetupfile,
            'ATLAS_PROJECT' : app.atlas_project,
            'ATLAS_EXETYPE' : app.atlas_exetype,
            'GANGA_VERSION' : configSystem['GANGA_VERSION'],
            'DQ2_SETUP_SCRIPT': configDQ2['setupScript']
        }

        # Set athena architecture: 32 or 64 bit
        environment['ATLAS_ARCH'] = '32'
        cmtconfig = app.atlas_cmtconfig
        if cmtconfig.find('x86_64')>=0:
            environment['ATLAS_ARCH'] = '64'

        environment['ATLAS_CMTCONFIG'] = app.atlas_cmtconfig
        environment['DCACHE_RA_BUFFER'] = str(config['DCACHE_RA_BUFFER'])
        
        if app.atlas_environment:
            for var in app.atlas_environment:
                vars=var.split('=')
                if len(vars)==2:
                    environment[vars[0]]=vars[1]

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.')<=0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production 

        if app.user_area.name: 
            environment['USER_AREA'] = os.path.basename(app.user_area.name)
        if app.group_area.name:
            if string.find(app.group_area.name,"http")>=0:
                environment['GROUP_AREA_REMOTE'] = "%s" % (app.group_area.name)
            else:
                environment['GROUP_AREA']=os.path.basename(app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)

        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if job.inputdata.type not in ['LOCAL']:

                try:
                    environment['X509CERTDIR']=os.environ['X509_CERT_DIR']
                except KeyError:
                    environment['X509CERTDIR']=''

                try:
                    proxy = os.environ['X509_USER_PROXY']
                except KeyError:
                    proxy = '/tmp/x509up_u%s' % os.getuid()

                REMOTE_PROXY = '%s:%s' % (socket.getfqdn(),proxy)
                environment['REMOTE_PROXY'] = REMOTE_PROXY

                try:
                    environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP']
                except:
                    pass

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME']=':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
                environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL']
                #environment['DATASETTYPE']=job.inputdata.type
                # At present, DQ2 download is the only thing that works
                environment['DATASETTYPE']="DQ2_DOWNLOAD"
                if job.inputdata.accessprotocol:
                    environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol                

                try:
                    environment['X509CERTDIR']=os.environ['X509_CERT_DIR']
                except KeyError:
                    environment['X509CERTDIR']=''

                try:
                    proxy = os.environ['X509_USER_PROXY']
                except KeyError:
                    proxy = '/tmp/x509up_u%s' % os.getuid()

                REMOTE_PROXY = '%s:%s' % (socket.getfqdn(),proxy)
                environment['REMOTE_PROXY'] = REMOTE_PROXY
                try:
                    environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP']
                except:
                    pass

            else:
                raise ConfigError("j.inputdata.dataset='' - DQ2 dataset name needs to be specified.")
            
            if job.inputdata.tagdataset:
                environment['TAGDATASETNAME'] = ':'.join(job.inputdata.tagdataset)
                
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL']
            try:
                environment['X509CERTDIR']=os.environ['X509_CERT_DIR']
            except KeyError:
                environment['X509CERTDIR']=''
            try:
                proxy = os.environ['X509_USER_PROXY']
            except KeyError:
                proxy = '/tmp/x509up_u%s' % os.getuid()

            REMOTE_PROXY = '%s:%s' % (socket.getfqdn(),proxy)
            environment['REMOTE_PROXY'] = REMOTE_PROXY
            try:
                environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP']
            except:
                pass

        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh')>0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
            elif job.backend.extraopts.find('site=zn')>0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh')

#       output sandbox
        outputbox = [ ]
        outputGUIDs='output_guids'
        outputLOCATION='output_location'
        outputDATA='output_data'
        outputbox.append( outputGUIDs )
        outputbox.append( outputLOCATION )
        outputbox.append( outputDATA )
        outputbox.append('stats.pickle')
        if (job.outputsandbox):
            for file in job.outputsandbox:
                outputbox += [ file ]

        ## retrieve the FileStager log
        if job.inputdata and job.inputdata._name in [ 'DQ2Dataset'] and job.inputdata.type in ['FILE_STAGER']:
            outputbox += ['FileStager.out', 'FileStager.err']

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'

        return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)
Example #11
0
    def master_prepare( self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent() # Returns job or subjob object
        logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id )


        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name=='DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' %(job.backend.requirements.sites)
                    raise ApplicationConfigurationError(printout )


        #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job
        #will already have been prepared
        #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing.
        #if app.is_prepared is not True:
        #    for position in xrange(len(app.option_file)):
        #        app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name)))
        # Expand Athena jobOptions
        if not app.atlas_exetype in ['EXE']:
            athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file])
            #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options
            if app.options:
                athena_options = app.options + ' ' + athena_options
                
            inputbox = [ File(opt_file.name) for opt_file in app.option_file ]
        else:
            athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file])
            inputbox = []
            
        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

#       prepare input sandbox

        
        inputbox.append( File(os.path.join(__directory__,'athena-utility.sh')) )

        if app.user_area.name: 
            #we will now use the user_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_user_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name))
                inputbox.append(File(tmp_user_name))
            else:
                inputbox.append(File(app.user_area.name))

        #if app.group_area.name: inputbox += [ File(app.group_area.name) ]
        if app.group_area.name and str(app.group_area.name).find('http')<0:
            #we will now use the group_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_group_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.group_area.name))
                inputbox.append(File(tmp_group_name))
            else:
                inputbox.append(File(app.group_area.name))
    
        if app.user_setupfile.name: inputbox.append(File(app.user_setupfile.name))

        # CN: added TNTJobSplitter clause  

        if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ] ) or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2_get','dq2info.tar.gz')
            if job.inputdata and job.inputdata.type == 'LFC' and not (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'):
                _append_files(inputbox,'dq2_get_old')

        if job.inputdata and job.inputdata._name ==  'ATLASTier3Dataset':
            _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2info.tar.gz')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            #if not job.outputdata.location:
            #    raise ApplicationConfigurationError('j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !')
            if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
                _append_files(inputbox,'ganga-stage-in-out-dq2.py')
            _append_files(inputbox,'ganga-joboption-parse.py')
            if not 'dq2info.tar.gz' in [os.path.basename(file.name) for file in inputbox ]:
                _append_files(inputbox,'dq2info.tar.gz') 

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
        if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'dq2tracerreport.py')
        if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'db_dq2localid.py')
        if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]:
            _append_files(inputbox, 'getstats.py')


        if str(app.atlas_release).find('12.')>=0:
            _append_files(inputbox, 'libDCache.so','libRFIO.so','libdcap.so')
        elif str(app.atlas_release).find('13.')>=0:
            _append_files(inputbox,'libdcap.so')
        else:
            _append_files(inputbox,'libdcap.so')

        if job.inputsandbox: inputbox += job.inputsandbox
            
#       prepare environment

        if not app.atlas_release: 
            raise ApplicationConfigurationError('j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.')

        environment={ 
            'ATLAS_RELEASE'  : app.atlas_release,
            'ATHENA_OPTIONS' : athena_options,
            'ATHENA_USERSETUPFILE' : athena_usersetupfile,
            'ATLAS_PROJECT' : app.atlas_project,
            'ATLAS_EXETYPE' : app.atlas_exetype,
            'GANGA_VERSION' : configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']

        if app.atlas_environment:
            for var in app.atlas_environment:
                try:
                    vars = re.match("^(\w+)=(.*)",var).group(1)
                    value = re.match("^(\w+)=(.*)",var).group(2)
                    environment[vars]=value
                except:
                    logger.warning('Athena.atlas_environment variable not correctly configured: %s', var)
                    pass

        if app.atlas_production and app.atlas_release.find('12.')>=0 and app.atlas_project != 'AtlasPoint1':
            temp_atlas_production = re.sub('\.','_',app.atlas_production)
            prod_url = config['PRODUCTION_ARCHIVE_BASEURL']+'/AtlasProduction_'+ temp_atlas_production +'_noarch.tar.gz'
            logger.info('Using Production cache from: %s', prod_url)
            environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.')<=0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production
        
        if app.user_area.name: environment['USER_AREA'] = os.path.basename(app.user_area.name)
        #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name)
        if app.group_area.name:
            if str(app.group_area.name).find('http')>=0:
                environment['GROUP_AREA_REMOTE'] = str(app.group_area.name)
            else:
                environment['GROUP_AREA'] = os.path.basename(app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)
        
        if job.backend.requirements._name == 'AtlasLCGRequirements':
            requirements = AtlasLCGRequirements()
        elif job.backend.requirements._name == 'AtlasCREAMRequirements':
            requirements = AtlasCREAMRequirements()
        else:
            requirements = AtlasLCGRequirements()
        
        if 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]:
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
        
        if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']):
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1
                    
            else:
                raise ApplicationConfigurationError('j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.')

            # Raise submission exception
            if (not job.backend.CE and 
                not (job.backend.requirements._name in [ 'AtlasLCGRequirements', 'AtlasCREAMRequirements' ] and job.backend.requirements.sites) and
                not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and
                not (job.splitter and job.splitter._name == 'TNTJobSplitter') and
                not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob') and
                not (job.splitter and job.splitter._name == 'ATLASTier3Splitter')):

                raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')

            if job.inputdata.match_ce_all or job.inputdata.min_num_files>0:
                raise ApplicationConfigurationError('Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !')
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

#       prepare job requirements
        requirementsSoftware = getLCGReleaseTag( app )

        releaseBlacklist = job.backend.requirements.list_release_blacklist()     
        if requirementsSoftware and  requirementsSoftware[0] in releaseBlacklist:
            logger.error('The athena release %s you are using is not recommended for distributed analysis !', requirementsSoftware[0])
            logger.error('For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !')
            requirements.software = requirementsSoftware
        else:
            requirements.software = requirementsSoftware

        # Set athena architecture: 32 or 64 bit    
        environment['ATLAS_ARCH'] = '32'
        if requirementsSoftware and requirementsSoftware[0].find('x86_64')>=0:
            environment['ATLAS_ARCH'] = '64'
            
        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ]  and job.inputdata.type in [ 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']:
            try:
                # override the default one if the dq2client_version is presented 
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version

        if app.atlas_dbrelease:
            if not app._name == "AthenaTask" and not (job.splitter and (job.splitter._name == 'DQ2JobSplitter' or job.splitter._name == 'ATLASTier3Splitter')):
                raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !')
            try:
                environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split(':')[0]
                environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1]
            except:
                logger.warning('Problems with the atlas_dbrelease configuration')


        # Fill AtlasLCGRequirements access mode 
        if configDQ2['USE_ACCESS_INFO']:
            logger.warning("config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !" )
            import pickle, StringIO
            #if job.backend.requirements.sites:
            info = job.backend.requirements.list_access_info()
            fileHandle = StringIO.StringIO()
            pickle.dump(info,fileHandle)
            fileHandle.seek(-1)
            lines = fileHandle.read()
            inputbox.append(FileBuffer( 'access_info.pickle', lines))
            _append_files(inputbox, 'access_info.py')
            if not 'make_filestager_joption.py' in [ os.path.basename(file.name) for file in inputbox ]:
                _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py')

#       jobscript

        exe = os.path.join(__directory__,'run-athena-lcg.sh')

#       output sandbox
        outputbox = [
            'output_guids',
            'output_location',
            'output_data',
            'stats.pickle'
        ]

        ## retrieve the FileStager log
        if configDQ2['USE_ACCESS_INFO'] or (job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']):
            outputbox += ['FileStager.out', 'FileStager.err']
            
        if job.outputsandbox: outputbox += job.outputsandbox

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'
            
        return LCGJobConfig(File(exe),inputbox,[],outputbox,environment,[],requirements) 
Example #12
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object

        logger.debug("AthenaLocalRTHandler master_prepare called, %s", job.id)

        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name == "DQ2OutputDataset":
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ""
                dq2_isGroupDS = False
                dq2_groupname = ""
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(
                dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname
            )

        # Expand Athena jobOptions
        if not app.option_file:
            raise ConfigError("j.application.option_file='' - No Athena jobOptions files specified.")

        athena_options = ""
        inputbox = [File(os.path.join(os.path.dirname(__file__), "athena-utility.sh"))]
        if app.atlas_exetype in ["PYARA", "ARES", "ROOT", "EXE"]:

            for option_file in app.option_file:
                athena_options += " " + os.path.basename(option_file.name)
                inputbox += [File(option_file.name)]

            athena_options += " %s " % app.options

        else:
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += " " + athena_option
                if app.options:
                    athena_options = app.options + " " + athena_options
                inputbox += [File(option_file.name)]

        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

        #       prepare input sandbox

        if app.user_setupfile.name:
            inputbox += [File(app.user_setupfile.name)]
        # CN: added extra test for TNTJobSplitter
        if (
            job.inputdata
            and job.inputdata._name in ["DQ2Dataset", "ATLASTier3Dataset"]
            or (job._getRoot().splitter and job._getRoot().splitter._name == "TNTJobSplitter")
        ):
            _append_files(inputbox, "ganga-stage-in-out-dq2.py")
            _append_files(inputbox, "dq2_get")
            _append_files(inputbox, "dq2info.tar.gz")
            _append_files(inputbox, "libdcap.so")

        if job.inputdata and job.inputdata._name == "ATLASDataset":
            if job.inputdata.lfc:
                _append_files(inputbox, "ganga-stagein-lfc.py")
            else:
                _append_files(inputbox, "ganga-stagein.py")

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name in ["DQ2Dataset"] and job.inputdata.type in ["FILE_STAGER"]:
            _append_files(inputbox, "make_filestager_joption.py", "dm_util.py", "fs-copy.py")

        if not "getstats.py" in [os.path.basename(file.name) for file in inputbox]:
            _append_files(inputbox, "getstats.py")

        if job.outputdata and job.outputdata._name == "DQ2OutputDataset":
            if not job.outputdata.location:
                raise ApplicationConfigurationError(
                    None, "j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !"
                )
            if not File(os.path.join(os.path.dirname(__file__), "ganga-stage-in-out-dq2.py")) in inputbox:
                _append_files(inputbox, "ganga-stage-in-out-dq2.py")
                _append_files(inputbox, "dq2info.tar.gz")
                _append_files(inputbox, "libdcap.so")
            _append_files(inputbox, "ganga-joboption-parse.py")

        if job.inputsandbox:
            for file in job.inputsandbox:
                inputbox += [file]
        if app.user_area.name:
            if app.is_prepared is True:
                inputbox += [File(app.user_area.name)]
            else:
                inputbox += [
                    File(
                        os.path.join(
                            os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.user_area.name)
                        )
                    )
                ]
        if app.group_area.name and string.find(app.group_area.name, "http") < 0:
            if app.is_prepared is True:
                inputbox += [File(app.group_area.name)]
            else:
                inputbox += [
                    File(
                        os.path.join(
                            os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.group_area.name)
                        )
                    )
                ]

        #       prepare environment

        try:
            atlas_software = config["ATLAS_SOFTWARE"]
        except ConfigError:
            raise ConfigError("No default location of ATLAS_SOFTWARE specified in the configuration.")

        if app.atlas_release == "" and app.atlas_project != "AthAnalysisBase":
            raise ApplicationConfigurationError(
                None,
                "j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.",
            )

        environment = {
            "ATLAS_RELEASE": app.atlas_release,
            "ATHENA_OPTIONS": athena_options,
            "ATLAS_SOFTWARE": atlas_software,
            "ATHENA_USERSETUPFILE": athena_usersetupfile,
            "ATLAS_PROJECT": app.atlas_project,
            "ATLAS_EXETYPE": app.atlas_exetype,
            "GANGA_VERSION": configSystem["GANGA_VERSION"],
            "DQ2_SETUP_SCRIPT": configDQ2["setupScript"],
        }

        # Set athena architecture: 32 or 64 bit
        environment["ATLAS_ARCH"] = "32"
        cmtconfig = app.atlas_cmtconfig
        if cmtconfig.find("x86_64") >= 0:
            environment["ATLAS_ARCH"] = "64"

        environment["ATLAS_CMTCONFIG"] = app.atlas_cmtconfig
        environment["DCACHE_RA_BUFFER"] = str(config["DCACHE_RA_BUFFER"])

        if app.atlas_environment:
            for var in app.atlas_environment:
                vars = var.split("=")
                if len(vars) == 2:
                    environment[vars[0]] = vars[1]

        if app.atlas_production and (app.atlas_project == "AtlasPoint1" or app.atlas_release.find("12.") <= 0):
            environment["ATLAS_PRODUCTION"] = app.atlas_production

        if app.user_area.name:
            environment["USER_AREA"] = os.path.basename(app.user_area.name)
        if app.group_area.name:
            if string.find(app.group_area.name, "http") >= 0:
                environment["GROUP_AREA_REMOTE"] = "%s" % (app.group_area.name)
            else:
                environment["GROUP_AREA"] = os.path.basename(app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment["ATHENA_MAX_EVENTS"] = str(app.max_events)

        if job.inputdata and job.inputdata._name == "StagerDataset":

            if job.inputdata.type not in ["LOCAL"]:

                try:
                    environment["X509CERTDIR"] = os.environ["X509_CERT_DIR"]
                except KeyError:
                    environment["X509CERTDIR"] = ""

                try:
                    proxy = os.environ["X509_USER_PROXY"]
                except KeyError:
                    proxy = "/tmp/x509up_u%s" % os.getuid()

                REMOTE_PROXY = "%s:%s" % (socket.getfqdn(), proxy)
                environment["REMOTE_PROXY"] = REMOTE_PROXY

                try:
                    environment["GANGA_GLITE_UI"] = configLCG["GLITE_SETUP"]
                except:
                    pass

        if job.inputdata and job.inputdata._name == "DQ2Dataset":
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment["DATASETNAME"] = ":".join(datasetname)
                environment["DATASETLOCATION"] = ":".join(job.inputdata.get_locations())
                environment["DQ2_URL_SERVER"] = configDQ2["DQ2_URL_SERVER"]
                environment["DQ2_URL_SERVER_SSL"] = configDQ2["DQ2_URL_SERVER_SSL"]
                # environment['DATASETTYPE']=job.inputdata.type
                # At present, DQ2 download is the only thing that works
                environment["DATASETTYPE"] = "DQ2_DOWNLOAD"
                if job.inputdata.accessprotocol:
                    environment["DQ2_LOCAL_PROTOCOL"] = job.inputdata.accessprotocol

                try:
                    environment["X509CERTDIR"] = os.environ["X509_CERT_DIR"]
                except KeyError:
                    environment["X509CERTDIR"] = ""

                try:
                    proxy = os.environ["X509_USER_PROXY"]
                except KeyError:
                    proxy = "/tmp/x509up_u%s" % os.getuid()

                REMOTE_PROXY = "%s:%s" % (socket.getfqdn(), proxy)
                environment["REMOTE_PROXY"] = REMOTE_PROXY
                try:
                    environment["GANGA_GLITE_UI"] = configLCG["GLITE_SETUP"]
                except:
                    pass

            else:
                raise ConfigError("j.inputdata.dataset='' - DQ2 dataset name needs to be specified.")

            if job.inputdata.tagdataset:
                environment["TAGDATASETNAME"] = ":".join(job.inputdata.tagdataset)

        if job.outputdata and job.outputdata._name == "DQ2OutputDataset":
            environment["DQ2_URL_SERVER"] = configDQ2["DQ2_URL_SERVER"]
            environment["DQ2_URL_SERVER_SSL"] = configDQ2["DQ2_URL_SERVER_SSL"]
            try:
                environment["X509CERTDIR"] = os.environ["X509_CERT_DIR"]
            except KeyError:
                environment["X509CERTDIR"] = ""
            try:
                proxy = os.environ["X509_USER_PROXY"]
            except KeyError:
                proxy = "/tmp/x509up_u%s" % os.getuid()

            REMOTE_PROXY = "%s:%s" % (socket.getfqdn(), proxy)
            environment["REMOTE_PROXY"] = REMOTE_PROXY
            try:
                environment["GANGA_GLITE_UI"] = configLCG["GLITE_SETUP"]
            except:
                pass

        if hasattr(job.backend, "extraopts"):
            if job.backend.extraopts.find("site=hh") > 0:
                environment["DQ2_LOCAL_SITE_ID"] = "DESY-HH_SCRATCHDISK"
            elif job.backend.extraopts.find("site=zn") > 0:
                environment["DQ2_LOCAL_SITE_ID"] = "DESY-ZN_SCRATCHDISK"
            else:
                environment["DQ2_LOCAL_SITE_ID"] = configDQ2["DQ2_LOCAL_SITE_ID"]
        else:
            environment["DQ2_LOCAL_SITE_ID"] = configDQ2["DQ2_LOCAL_SITE_ID"]

        exe = os.path.join(os.path.dirname(__file__), "run-athena-local.sh")

        #       output sandbox
        outputbox = []
        outputGUIDs = "output_guids"
        outputLOCATION = "output_location"
        outputDATA = "output_data"
        outputbox.append(outputGUIDs)
        outputbox.append(outputLOCATION)
        outputbox.append(outputDATA)
        outputbox.append("stats.pickle")
        if job.outputsandbox:
            for file in job.outputsandbox:
                outputbox += [file]

        ## retrieve the FileStager log
        if job.inputdata and job.inputdata._name in ["DQ2Dataset"] and job.inputdata.type in ["FILE_STAGER"]:
            outputbox += ["FileStager.out", "FileStager.err"]

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment["GANGA_LOG_DEBUG"] = "0"
        else:
            environment["GANGA_LOG_DEBUG"] = "1"

        return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)