Ejemplo n.º 1
0
    def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig):
        from Ganga.Lib.LCG import LCGJobConfig

        prepared_exe = File(os.path.join( os.path.dirname(__file__), "runNA62MC.sh"))
        
        #if app.is_prepared is not None:
        #    logger.info("Submitting a prepared application; taking any input files from %s" %(app.is_prepared.name))
        #    prepared_exe = File(os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(prepared_exe.name)))

        #outputbox = [ 'na62run%d.stderr.err' % app.run_number, 'na62run%d.stdout.out' % app.run_number ]
        outputbox = []
        outputbox += app._getParent().outputsandbox
        
        inputbox = [ File(os.path.join(os.path.join(shared_path,app.is_prepared.name), app.getMACFileName())) ]
        inputbox.append( File(os.path.join(shared_path,app.is_prepared.name, app.script_name)) )
        inputbox.append( File(os.path.join(shared_path,app.is_prepared.name, "input_files.tgz")) )
        inputbox += app._getParent().inputsandbox
        
        env = {'NA62SCRIPT':app.script_name,
               'NA62STDOUT':'na62run%d.out' % app.run_number,
               'NA62STDERR':'na62run%d.err' % app.run_number}
        args = [app.getMACFileName(), "%s/r%d/%s" % (app.job_type, app.getRevision(), app.getDecayString())]

        # add the output files
        app._getParent().outputfiles = [SandboxFile('na62run%d.out' % app.run_number), SandboxFile('na62run%d.err' % app.run_number),
                                        SandboxFile('__jdlfile__'), SandboxFile(app.getMACFileName()) ]
        
        return LCGJobConfig(prepared_exe,inputbox,args,outputbox,env)
Ejemplo n.º 2
0
    def configure(self, masterappconfig):
        '''Configure method, called once per job.
        '''
        logger.debug('RAT::RATProd configure ...')

        job = self._getParent()
        masterjob = job._getParent()

        if self.prodScript == '' and self.ratMacro == '':
            logger.error('prodScript or ratMacro not defined')
            raise Exception
        elif self.prodScript != '' and self.ratMacro != '':
            logger.error('both prodScript and ratMacro are defined')
            raise Exception
        if self.useDB:
            if not config['rat_db_pswd']:
                logger.error(
                    'Need a password in order to contact the ratdb database')
                raise Exception

        #The production script is added in, line by line, into the submission script
        #job.inputsandbox.append(File(self.prodScript))

        if self.ratMacro != '':
            #decimated=self.ratMacro.split('/')
            #macFile=decimated[len(decimated)-1]#for when I thought we needed args to go with the script
            job.inputsandbox.append(File(self.ratMacro))
            #Always run rat with a log called rat.log
            job.outputsandbox.append('rat.log')
        else:
            job.inputsandbox.append(File(self.prodScript))
        job.outputsandbox.append('return_card.js')

        #we want a list of files - if we only have one (i.e. a string) then just force into a list
        if type(self.inputFiles) is str:
            self.inputFiles = [self.inputFiles]
        if type(self.outputFiles) is str:
            self.outputFiles = [self.outputFiles]

        return (None, None)
Ejemplo n.º 3
0
    def master_prepare(self, app, appmasterconfig):
        if app.siteroot:
            os.environ["SITEROOT"] = app.siteroot
        if app.cmtsite:
            os.environ["CMTSITE"] = app.cmtsite

        job = app._getParent()
        if app.dryrun:
            os.environ["SITEROOT"] = "NONE"
            os.environ["CMTSITE"] = "NONE"
        try:
            assert "SITEROOT" in os.environ
        except:
            raise ApplicationConfigurationError(
                None, " ATLAS environment not defined")

        try:
            assert "CMTSITE" in os.environ and os.environ["CMTSITE"] != ""
        except:
            raise ApplicationConfigurationError(
                None,
                "cmt not setup properly. Please check your ATLAS setup or run on the grid"
            )

        if "AtlasVersion" in os.environ:
            logger.debug(
                "Checking AtlasVersion: %s and selected atlas release %s" %
                (os.environ["AtlasVersion"], app.atlas_rel))
            try:
                assert app.atlas_release == os.environ["AtlasVersion"]
            except:
                logger.error(
                    "Mismatching atlas release. Local setup is %s, resetting requested atlas release to local value."
                    % os.environ["AtlasVersion"])
                app.atlas_release = os.environ["AtlasVersion"]
        elif "ATLAS_RELEASE" in os.environ:
            logger.debug(
                "Checking ATLAS_RELEASE: %s and selected atlas release %s" %
                (os.environ["ATLAS_RELEASE"], app.atlas_rel))
            try:
                assert app.atlas_rel == os.environ["ATLAS_RELASE"]
            except:
                logger.error(
                    "Mismatching atlas release. Local setup is %s, resetting requested atlas release to local value."
                    % os.environ["ATLAS_RELEASE"])
                app.atlas_rel = os.environ["ATLAS_RELEASE"]
        else:
            logger.warning(
                "Could not compare requested release and local setup. Hope you are doing something sensible..."
            )

        if job.backend._name == "LSF":
            try:
                assert "CMTSITE" in os.environ and os.environ[
                    "CMTSITE"] == "CERN"
            except:
                raise ApplicationConfigurationError(
                    None,
                    "Error, CERN ATLAS AFS environment not defined. Needed by LSF backend"
                )

        environment = {'T_LCG_GFAL_INFOSYS': 'atlas-bdii.cern.ch:2170'}
        for tag in os.environ:
            if os.environ[tag] != "":
                environment[tag] = os.environ[tag]

        trfopts = app.transflags
        # need to parse them to be able to pass them in an environment variable
        trfopts = trfopts.replace(" ", "/W")
        trfopts = trfopts.replace("-", "/F")

        trflags = trfopts
        if app.mode == "evgen":
            trflags = "/Ft"
            if app.verbosity:
                trflags += "/W/Fl/W%s" % app.verbosity

        if trflags:
            environment["TRFLAGS"] = trflags

        # setting output site from input data if any.
        outsite, backup, outputlocation, backuplocation = "", "", "", ""
        logger.info("checking sites from input data: %s" % str(app.sites))

        # must distinguish running site (backend.requirements.sites) and output storage site (app.se_name)

        # matching with user's wishes (app.se_name or backend.requirements.sites)

        # select sites which are matching user's wishes, if any.
        selectedSites = app.sites
        print app.sites
        if job.backend._name != "Local":
            if len(selectedSites) > 0:
                [outlfc, outsite, outputlocation
                 ] = job.outputdata.getDQ2Locations(selectedSites[0])
            if len(selectedSites) > 1:
                [outlfc2, backup, backuplocation
                 ] = job.outputdata.getDQ2Locations(selectedSites[1])

        # app.se_name set: users wishes to get the output data written to another site than the one hosting the input.
        # One needs to ensure that this location is at least in the same cloud as the targetted processing site. This is done by insuring that the lfcs are the same.
        userSEs = []
        outse = ""
        if job.application.se_name and job.application.se_name != "none" and job.backend._name != "Local":
            userSEs = job.application.se_name.split(" ")
            # loop through userSEs until up to 2 valid sites are found...
            outse = ""
            for SE in userSEs:
                [lfc, se, location] = job.outputdata.getDQ2Locations(SE)
                if lfc == outlfc:
                    if not outse:
                        outse = se  # important to use outse and not outsite here, as outsite is used for selection of processing site.
                        # userSEs overrides outlfc,outputlocation, but not outsite as outsite is unfortunately used for choice of the processing site.
                        outputlocation = location
                    else:
                        outlfc2 = lfc
                        backup = se
                        backuplocation = location
                        break

        # finally: if no backup location is defined at this point, enforce CERN-PROD_SCRATCHDISK as backup location
        if outsite == "":
            [outlfc, outsite,
             outputlocation] = job.outputdata.getDQ2Locations(_defaultSite)
        if backup == "":
            [outlfc2, backup,
             backuplocation] = job.outputdata.getDQ2Locations(_defaultSite)

        logger.info("Final selection of output sites: %s , backup: %s" %
                    (outsite, backup))

        # srmv2 sites special treatment: the space token has been prefixed to the outputlocation and must be removed now:
        imin = string.find(outputlocation, "token:")
        imax = string.find(outputlocation, "srm:")
        spacetoken = ""
        if imin > -1 and imax > -1:
            spacetoken = outputlocation[imin + 6:imax - 1]
            outputlocation = outputlocation[imax:]
        # same treatment for backup location if any
        imin = string.find(backuplocation, "token:")
        imax = string.find(backuplocation, "srm:")
        bst = ""
        if imin > -1 and imax > -1:
            bst = backuplocation[imin + 6:imax - 1]
            backuplocation = backuplocation[imax:]

        environment["OUTLFC"] = outlfc
        environment["OUTSITE"] = outsite
        if outse:
            environment[
                "OUTSITE"] = outse  # user's choice for output storage location overriding AthenaMC's.

        environment["OUTPUT_LOCATION"] = outputlocation
        if spacetoken:
            environment["SPACETOKEN"] = spacetoken
        if backup:
            environment["OUTLFC2"] = outlfc2
            environment["OUTSITE2"] = backup
            environment["OUTPUT_LOCATION2"] = backuplocation

        environment["PROD_RELEASE"] = app.prod_release

        # setting environment["BACKEND"]
        # Local, Condor become "batch". LSF becomes "batch" unless the inputdata is on castor (in this case, it becomes "castor")
        environment["BACKEND"] = job.backend._name
        if job.backend._name == "LSF" and len(app.turls.values()) > 0:
            turl = app.turls.values()[0]
##            if string.find(turl,"castor")>-1:
##                environment["BACKEND"]="castor"
##            else:
##                environment["BACKEND"]="batch"
        if job.backend._name in ["Local", "Condor", "PBS"]:
            environment["SITEROOT"] = os.environ["SITEROOT"]
            environment["CMTSITE"] = os.environ["CMTSITE"]
            if job.backend._name in ["Condor", "PBS"]:
                environment["BACKEND"] = "batch"

#       finalise environment

# preparing input sandbox, output sandbox , environment vars and job requirements

        inputbox = [
            File(os.path.join(os.path.dirname(__file__), 'setup-release.sh')),
            File(os.path.join(os.path.dirname(__file__), 'stage-in.sh')),
            File(os.path.join(os.path.dirname(__file__), 'stage-out.sh')),
            File(os.path.join(os.path.dirname(__file__), 'adler32.py'))
        ]

        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [File(app.transform_archive)]
        elif app.transform_archive:
            # tarball in local or remote web area.
            if string.find(app.transform_archive, "http") >= 0:
                environment['TRANSFORM_ARCHIVE'] = "%s" % (
                    app.transform_archive)
            else:
                myfile = os.path.basename(app.transform_archive)
                myfile = "http://cern.ch/atlas-computing/links/kitsDirectory/Production/kits/" + myfile
                environment['TRANSFORM_ARCHIVE'] = "%s" % (myfile)

        if app.evgen_job_option and os.path.exists(app.evgen_job_option):
            # locally modified job option file to add to the input sand box
            inputbox += [File(app.evgen_job_option)]
            # need to strip the path away.
            self.evgen_job_option = app.evgen_job_option.split("/")[-1]
            environment['CUSTOM_JOB_OPTION'] = "%s" % (self.evgen_job_option)
        elif app.evgen_job_option:
            self.evgen_job_option = app.evgen_job_option

        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [file]

        outputbox = []
        outputGUIDs = 'output_guids'
        outputLOCATION = 'output_location'
        outputbox.append(outputGUIDs)
        outputbox.append(outputLOCATION)
        outputbox.append('output_data')
        if (job.outputsandbox):
            for file in job.outputsandbox:
                outputbox += [file]

        # switch JobTransforms/AtlasProduction package.
        self.isJT = string.find(app.transform_archive, "JobTransform")
        if self.isJT > -1 and app.mode == "evgen":
            environment['T_CONTEXT'] = str(
                self.number_events_job
            )  # needed to avoid prodsys failure mechanism based on a hardcoded minimum number of event of 5000 per job

        # backend specifics:
        if job.backend._name == "SGE" and job.backend.extraopts == "":
            job.backend.extraopts = "-l h_vmem=5G -l s_vmem=5G -l h_cpu=1:00:00 -l h_fsize=10G"  # minimum set up for Atlfast II

        logger.debug("master job submit?")

        return StandardJobConfig("", inputbox, [], outputbox, environment)
Ejemplo n.º 4
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        """Prepare the job"""

        inputbox = []

        #       prepare environment
        environment = {}
        environment = jobmasterconfig.env.copy()
        environment["INPUTDATASETS"] = ""
        environment["INPUTSITES"] = ""
        environment["INPUTFILES"] = ""

        alllfns = app.inputfiles + app.cavernfiles + app.mbfiles + app.dbfiles
        infilenr = 0
        for infile in alllfns:
            environment["INPUTFILES"] += "lfn[%d]='%s';" % (infilenr, infile)
            environment["INPUTDATASETS"] += "dset[%d]='%s';" % (
                infilenr, app.dsetmap[infile])
            insites = app.sitemap[infile]
            # compare with environment["OUTSITE"] and reorder if needed.
            newinsites = self.sortSites(insites, environment["OUTSITE"])
            environment["INPUTSITES"] += "site[%d]='%s';" % (infilenr,
                                                             newinsites)
            infilenr += 1

        logger.debug(
            "%s %s %s" %
            (str(environment["INPUTDATASETS"]), str(
                environment["INPUTSITES"]), str(environment["INPUTFILES"])))

        job = app._getParent()  # Returns job or subjob object
        # if datasetType is DQ2, then one needs to ensure that DQ2 environment is properly set.
        if job.inputdata:
            if job.inputdata.datasetType == "DQ2":
                environment["BACKEND"] = "LCG"
                try:
                    assert "DQ2_LOCAL_SITE_ID" in os.environ
                except:
                    logger.error(
                        "Error in DQ2 configuration. Please leave ganga, then rerun local DQ2 setup before restarting ganga. Or change inputdata.datasetType to 'local'"
                    )
                    raise
            elif job.backend._name == "Local":
                environment["BACKEND"] = "Local"
            else:
                environment["BACKEND"] = "batch"


# now doing output files....

        outfilelist = ""
        subjob_outbox = []
        for type in app.outputpaths.keys():
            outfilelist += app.outputpaths[type] + app.subjobsOutfiles[
                job.id][type] + " "
            if job.application.se_name == "ganga":
                outfile1 = app.subjobsOutfiles[job.id][type]
                subjob_outbox.append(outfile1)
        environment["OUTPUTFILES"] = outfilelist
        # Work around for glite WMS spaced environement variable problem
        inputbox += [
            FileBuffer('outputfiles.conf', environment['OUTPUTFILES'] + '\n')
        ]

        # setting up job wrapper arguments.
        args = app.args

        jid = ""
        if job._getRoot().subjobs:
            jid = job._getRoot().id
        else:
            jid = "%d" % job.id
        environment["OUTPUT_JOBID"] = str(jid)  # used for versionning
        if app.dryrun:
            environment["DRYRUN"] = "TRUE"
        if app.dbrelease:
            environment["ATLASDBREL"] = app.dbrelease
        inputdata = []

        filename = "wrapper.sh"
        exe = os.path.join(os.path.dirname(__file__), filename)

        #       output sandbox
        outputbox = jobmasterconfig.outputbox
        outputbox.extend(subjob_outbox)

        return StandardJobConfig(File(exe), inputbox, args, outputbox,
                                 environment)
Ejemplo n.º 5
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        """Prepare the job"""

        inputbox = []

        #       prepare environment
        environment = {}
        environment = jobmasterconfig.env.copy()
        environment["INPUTDATASETS"] = ""
        environment["INPUTFILES"] = ""
        environment["INPUTTURLS"] = ""

        alllfns = app.inputfiles + app.cavernfiles + app.mbfiles + app.dbfiles
        guids = app.turls
        guids.update(app.cavern_turls)
        guids.update(app.minbias_turls)
        guids.update(app.dbturls)

        infilenr = 0
        for infile in alllfns:
            environment["INPUTFILES"] += "lfn[%d]='%s';" % (infilenr, infile)
            environment["INPUTDATASETS"] += "dset[%d]='%s';" % (
                infilenr, app.dsetmap[infile])
            ##            insites=app.sitemap[infile]
            ##            # compare with environment["OUTSITE"] and reorder if needed.
            ##            newinsites=self.sortSites(insites,environment["OUTSITE"])
            ##            environment["INPUTSITES"]+="site[%d]='%s';"%(infilenr,newinsites)
            environment["INPUTTURLS"] += "turl[%d]='%s';" % (infilenr,
                                                             guids[infile])

            infilenr += 1

        logger.debug(
            "%s %s %s" %
            (str(environment["INPUTDATASETS"]), str(
                environment["INPUTTURLS"]), str(environment["INPUTFILES"])))

        if environment["INPUTDATASETS"]:
            # Work around for glite WMS spaced environement variable problem
            inputbox += [
                FileBuffer('inputdsets.conf',
                           environment['INPUTDATASETS'] + '\n')
            ]
        if environment["INPUTTURLS"]:
            # Work around for glite WMS spaced environement variable problem
            inputbox += [
                FileBuffer('inputturls.conf', environment['INPUTTURLS'] + '\n')
            ]
        if environment["INPUTFILES"]:
            # Work around for glite WMS spaced environement variable problem
            inputbox += [
                FileBuffer('inputfiles.conf', environment['INPUTFILES'] + '\n')
            ]


# now doing output files....
        job = app._getParent()  # Returns job or subjob object

        outfilelist = ""
        for type in app.outputpaths.keys():
            if type == "LOG" and "LOG" not in job.outputdata.outrootfiles:
                # logfiles are no longer saved in DQ2 datasets unless they are explicitly named in the outrootfiles dictionnary
                continue
            outfilelist += app.outputpaths[type] + app.subjobsOutfiles[
                job.id][type] + " "

        environment["OUTPUTFILES"] = outfilelist
        # Work around for glite WMS spaced environement variable problem
        inputbox += [
            FileBuffer('outputfiles.conf', environment['OUTPUTFILES'] + '\n')
        ]

        # setting up job wrapper arguments.
        args = app.args
        trfargs = ' '.join(app.args[4:])
        inputbox += [FileBuffer('trfargs.conf', trfargs + '\n')]
        jid = ""
        if job._getRoot().subjobs:
            jid = job._getRoot().id
        else:
            jid = "%d" % job.id
        environment["OUTPUT_JOBID"] = str(jid)  # used for versionning
        if app.dryrun:
            environment["DRYRUN"] = "TRUE"
        if app.dbrelease:
            environment["ATLASDBREL"] = app.dbrelease
        inputdata = []

        filename = "wrapper.sh"
        exe = os.path.join(os.path.dirname(__file__), filename)

        #       output sandbox
        outputbox = jobmasterconfig.outputbox

        if job.backend._name == "LCG" or job.backend._name == "Cronus" or job.backend._name == "Condor" or job.backend._name == "NG" or job.backend._name == "SGE":
            logger.debug("submission to %s" % job.backend._name)
            #       prepare job requirements
            requirements = jobmasterconfig.requirements

            if "INPUTTURLS" in environment:
                logger.debug(environment["INPUTTURLS"])
                if string.find(environment["INPUTTURLS"], "file:") >= 0:
                    raise ApplicationConfigurationError(
                        None,
                        "Input file was found to be local, and LCG backend does not support replication of local files to the GRID yet. Please register your input dataset in DQ2 before resubmitting this job. Aborting"
                    )
            if string.lower(app.se_name) == "local":
                raise ApplicationConfigurationError(
                    None,
                    "Output file cannot be committed to local filesystem on a grid job. Please change se_name"
                )

            lcg_job_config = LCGJobConfig(File(exe), inputbox, args, outputbox,
                                          environment, inputdata, requirements)
            lcg_job_config.monitoring_svc = mc['AthenaMC/LCG']
            return lcg_job_config
        else:
            logger.debug(
                "Backend %s not fully supported , will try our best anyway..."
                % job.backend._name)
            # if there are input data files and they are on the grid, prestage them on local area (use either app.datasets.input_dataset or /tmp/$login/data (and update environment["INPUTFILE"] accordingly inf the later is used...)
            # later development....

            return StandardJobConfig(File(exe), inputbox, args, outputbox,
                                     environment)
Ejemplo n.º 6
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("AthenaLocalRTHandler prepare called, %s", job.id)

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []

        # If job has inputdata
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG',
                            'TNT_LOCAL', 'TNT_DOWNLOAD'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'

            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s' %
                                    job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [
                            line.strip() for line in pfnListFile
                        ]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG',
                            'TNT_LOCAL', 'TNT_DOWNLOAD'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'

                    contents = job.inputdata.get_contents()
                    input_files = [lfn for guid, lfn in contents]
                    input_guids = [guid for guid, lfn in contents]

                    if job.inputdata.tagdataset:
                        tag_contents = job.inputdata.get_tag_contents()
                        input_tag_files = [lfn for guid, lfn in tag_contents]
                        input_tag_guids = [guid for guid, lfn in tag_contents]
                    if job.inputdata.use_aodesd_backnav:
                        esd_contents = job.inputdata.get_contents(backnav=True)
                        input_esd_files = [lfn for guid, lfn in esd_contents]
                        input_esd_guids = [guid for guid, lfn in esd_contents]

                    job.inputdata.names = input_files
                    job.inputdata.guids = input_guids

        # Outputdataset
        output_location = ''
        if job.outputdata:

            if job.outputdata._name == 'DQ2OutputDataset':

                if job.outputdata.location:
                    if isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.outputdata.location)
                elif job._getRoot().subjobs and job._getRoot(
                ).outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.getRoot().outputdata.location)

                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)

            elif job.outputdata.location == '' and job.outputdata._name == 'DQ2OutputDataset':
                output_location = ''
            elif job.outputdata.location:
                output_location = expandfilename(job.outputdata.location)
            else:
                try:
                    output_location = config['LocalOutputLocation']
                    if job.outputdata:
                        job.outputdata.location = expandfilename(
                            output_location)
                except ConfigError:
                    logger.warning(
                        'No default output location specified in the configuration.'
                    )
        else:
            try:
                output_location = config['LocalOutputLocation']
            except ConfigError:
                logger.warning(
                    'No default output location specified in the configuration.'
                )

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset':

            if job._getRoot().subjobs:
                if config['NoSubDirsAtAllForLocalOutput']:
                    output_location = output_location
                elif config['SingleDirForLocalOutput']:
                    output_location = os.path.join(output_location,
                                                   "%d" % (job._getRoot().id))
                elif config['IndividualSubjobDirsForLocalOutput']:
                    output_location = os.path.join(
                        output_location, "%d/%d" % (job._getRoot().id, job.id))
                else:
                    output_location = os.path.join(output_location, jid)

            if job.outputdata:
                # Remove trailing number if job is copied
                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat, output_location):
                    output_location = re.sub(pat, '', output_location)

                    if config['NoSubDirsAtAllForLocalOutput']:
                        output_location = output_location
                    elif config['SingleDirForLocalOutput']:
                        output_location = os.path.join(
                            output_location, "%d" % (job._getRoot().id))
                    elif config['IndividualSubjobDirsForLocalOutput']:
                        output_location = os.path.join(
                            output_location,
                            "%d/%d" % (job._getRoot().id, job.id))
                    else:
                        output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname = output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname = output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id == 0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
            else:
                if (job._getRoot().subjobs
                        and job.id == 0) or not job._getRoot().subjobs:
                    logger.warning(
                        "Dataset %s already exists - appending new files to this dataset",
                        output_datasetname)
                    output_location = job.outputdata.get_locations(
                        datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s', output_location,
                                 job.outputdata.location)
                    if output_location:
                        output_location = output_location[0]
                        if job._getRoot().subjobs:
                            job._getRoot(
                            ).outputdata.location = output_location
                            job.outputdata.location = output_location
                        else:
                            job.outputdata.location = output_location

                    logger.debug('Output4: %s,%s', output_location,
                                 job.outputdata.location)

        inputbox = [
            File(os.path.join(os.path.dirname(__file__), 'athena-utility.sh'))
        ]

        if input_guids:
            inputbox += [
                FileBuffer('input_guids', '\n'.join(input_guids) + '\n')
            ]

        if input_files:
            inputbox += [
                FileBuffer('input_files', '\n'.join(input_files) + '\n')
            ]

        if input_tag_guids:
            inputbox += [
                FileBuffer('input_tag_guids',
                           '\n'.join(input_tag_guids) + '\n')
            ]

        if input_tag_files:
            inputbox += [
                FileBuffer('input_tag_files',
                           '\n'.join(input_tag_files) + '\n')
            ]

        if input_esd_guids:
            inputbox += [
                FileBuffer('input_esd_guids',
                           '\n'.join(input_esd_guids) + '\n')
            ]

        if input_esd_files:
            inputbox += [
                FileBuffer('input_esd_files',
                           '\n'.join(input_esd_files) + '\n')
            ]

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

        if job.outputdata and job.outputdata.outputdata:
            inputbox += [
                FileBuffer('output_files',
                           '\n'.join(job.outputdata.outputdata) + '\n')
            ]
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment = jobmasterconfig.env.copy()

        ## create and add sample files for FileStager
        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if not job.inputdata.dataset:
                raise ApplicationConfigurationError(
                    None, 'dataset name not specified in job.inputdata')

            ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager
            inputbox += [
                File(os.path.join(os.path.dirname(__file__), 'fs-copy.py'))
            ]

            (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions(
                job=job, max_events=app.max_events)
            inputbox += [File(jo_path), File(ic_path)]

            ## re-make the environment['ATHENA_OPTIONS']
            athena_options = os.path.basename(File(jo_path).name)
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options = app.options + ' ' + athena_options

            environment['ATHENA_OPTIONS'] = athena_options
            environment['DATASETTYPE'] = 'FILE_STAGER'

            ## ask to send back the FileStager.out/err generated by fs-copy.py
            outputbox += ['FileStager.out', 'FileStager.err']

        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        # set EOS env setting
        environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary']

        # flag for single output dir
        if (config['SingleDirForLocalOutput'] or
                config['NoSubDirsAtAllForLocalOutput']) and job._getParent():
            environment['SINGLE_OUTPUT_DIR'] = jid

            # change the filename
            newoutput = []
            for outf in job.outputdata.outputdata:
                newfile, newfileExt = os.path.splitext(outf)
                jid = "%d.%d" % (job._getParent().id, job.id)
                newoutput.append("%s.%s%s" % (newfile, jid, newfileExt))

            job.outputdata.outputdata = newoutput[:]

        environment['OUTPUT_LOCATION'] = output_location
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str(
                configDQ2['OUTPUTFILE_NAMELENGTH'])
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
            try:
                environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
            except:
                pass
            environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join(
                configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join(
                configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])

        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot(
        ).splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME'] = ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(
                job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            #environment['DATASETTYPE']=job.inputdata.type
            # At present, DQ2 download is the only thing that works
            environment['DATASETTYPE'] = "DQ2_DOWNLOAD"
            if job.inputdata.accessprotocol:
                environment[
                    'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset'
        ] and job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER']:
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if job.inputdata.use_poolfilecatalog_failover:
                environment['USE_POOLFILECATALOG_FAILOVER'] = '1'

        # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset
        environment['CREATE_POOLFILECATALOG'] = '1'
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if not job.inputdata.create_poolfilecatalog:
                environment['CREATE_POOLFILECATALOG'] = '0'

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params != ' ' and job.application.atlas_exetype == 'TRF':
            _append_file_buffer(inputbox, 'trf_params', [trf_params])
            if not 'db_dq2localid.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'db_dq2localid.py')

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # Athena run dir
        if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "":
            environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir

        # Set DQ2_LOCAL_SITE_ID
        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
                environment[
                    'GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119'  # hack for FILE_STAGER at NAF
            elif job.backend.extraopts.find('site=zn') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
                environment[
                    'GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119'  # hack for FILE_STAGER at NAF
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2[
                    'DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        return StandardJobConfig(File(exe), inputbox, [], outputbox,
                                 environment)
Ejemplo n.º 7
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):

        logger.debug('RAT::RTHandler prepare ...')
        from Ganga.GPIDev.Adapters.StandardJobConfig import StandardJobConfig

        #create the backend wrapper script
        job = app.getJobObject()

        #Check whether we're looking for a non-default sw dir
        if app.softwareEnvironment is None:
            logger.error('Must specify a RAT environment')
            raise Exception

        #we need to know the name of the file to run
        macroFile = None
        prodFile = None
        if app.ratMacro != '':
            decimated = app.ratMacro.split('/')
            macroFile = decimated[len(decimated) - 1]
        else:
            decimated = app.prodScript.split('/')
            prodFile = decimated[len(decimated) - 1]

        foutList = '['
        finList = '['
        for i, var in enumerate(app.outputFiles):
            foutList += '%s,' % var
        for i, var in enumerate(app.inputFiles):
            finList += '%s,' % var
        if len(foutList) != 1:
            foutList = '%s]' % foutList[:
                                        -1]  #remove final comma, add close bracket
        if len(finList) != 1:
            finList = '%s]' % finList[:
                                      -1]  #remove final comma, add close bracket

        if app.environment == None:
            args = []
            args += ['-v', app.ratVersion]
            args += ['-e', app.softwareEnvironment]
            args += ['-d', app.outputDir]
            args += ['-o', foutList]
            args += ['-x', app.inputDir]
            args += ['-i', finList]
            if app.ratMacro != '':
                args += ['-m', macroFile]
            else:
                args += ['-k', '-m', prodFile]
            if app.useDB:
                args += ['--dbuser', app.config['rat_db_user']]
                args += ['--dbpassword', app.config['rat_db_pswd']]
                args += ['--dbname', app.config['rat_db_name']]
                args += ['--dbprotocol', app.config['rat_db_protocol']]
                args += ['--dburl', app.config['rat_db_url']]
            if app.discardOutput:
                args += ['--nostore']

            return StandardJobConfig(File('%s/ratProdRunner.py' %
                                          _app_directory),
                                     inputbox=app._getParent().inputsandbox,
                                     outputbox=app._getParent().outputsandbox,
                                     args=args)
        else:  #need a specific environment setup
            #can either use a specific file or a list of strings.  the latter needs to be converted to a temp file and shipped.
            envFile = None
            if type(app.environment) == list:
                tempname = 'tempRATProdEnv_%s' % os.getlogin()
                tempf = file('/tmp/%s' % (tempname), 'w')
                for line in app.environment:
                    tempf.write('%s \n' % line)
                tempf.close()
                app._getParent().inputsandbox.append('/tmp/%s' % (tempname))
                envFile = tempname
            else:
                app._getParent().inputsandbox.append(app.environment)
                envFile = os.path.basename(app.environment)
            args = ''
            args += '-v %s ' % (app.ratVersion)
            args += '-e %s ' % (app.softwareEnvironment)
            args += '-d %s ' % (app.outputDir)
            args += '-o %s ' % (foutList)
            args += '-x %s ' % (app.inputDir)
            args += '-i %s ' % (finList)
            if app.ratMacro != '':
                args += '-m %s ' % (macroFile)
            else:
                args += '-k -m %s ' % (prodFile)
            if app.useDB:
                args += '--dbuser %s ' % (app.config['rat_db_user'])
                args += '--dbpassword %s ' % (app.config['rat_db_pswd'])
                args += '--dbname %s ' % (app.config['rat_db_name'])
                args += '--dbprotocol %s ' % (app.config['rat_db_protocol'])
                args += '--dburl %s ' % (app.config['rat_db_url'])
            if app.discardOutput:
                args += '--nostore '

            wrapperArgs = ['-f', envFile, '-a', '"%s"' % args]
            wrapperArgs += ['ratProdRunner.py', 'misc']

            app._getParent().inputsandbox.append('%s/ratProdRunner.py' %
                                                 _app_directory)

            return StandardJobConfig(File('%s/sillyPythonWrapper.py' %
                                          _app_directory),
                                     inputbox=app._getParent().inputsandbox,
                                     outputbox=app._getParent().outputsandbox,
                                     args=wrapperArgs)
Ejemplo n.º 8
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):

        logger.debug('RAT::RTHandler prepare ...')
        from Ganga.GPIDev.Adapters.StandardJobConfig import StandardJobConfig

        #create the backend wrapper script
        job = app.getJobObject()

        #Check whether we're looking for a non-default sw dir
        if app.softwareEnvironment is None:
            logger.error('Must specify a RAT directory')
            raise Exception

        voproxy = job.backend.voproxy
        if voproxy == None:
            #use the proxy from the environment (default behaviour)
            try:
                voproxy = os.environ["X509_USER_PROXY"]
            except:
                logger.error(
                    'Cannot run without voproxy either in environment (X509_USER_PROXY) or specified for WG backend'
                )
                raise Exception
        if not os.path.exists(voproxy):
            logger.error(
                'Valid WestGrid backend voproxy location MUST be specified: %s'
                % (voproxy))
            raise Exception

        #we need to know the name of the file to run
        macroFile = None
        prodFile = None
        if app.ratMacro != '':
            decimated = app.ratMacro.split('/')
            macroFile = decimated[len(decimated) - 1]
        else:
            decimated = app.prodScript.split('/')
            prodFile = decimated[len(decimated) - 1]

        foutList = '['
        finList = '['
        for i, var in enumerate(app.outputFiles):
            foutList += '%s,' % var
        for i, var in enumerate(app.inputFiles):
            finList += '%s,' % var
        if len(foutList) != 1:
            foutList = '%s]' % foutList[:
                                        -1]  #remove final comma, add close bracket
        if len(finList) != 1:
            finList = '%s]' % finList[:
                                      -1]  #remove final comma, add close bracket

        args = ''
        args += '-g srm '
        args += '-v %s ' % (app.ratVersion)
        args += '-s %s ' % (swDir)
        args += '-d %s ' % (app.outputDir)
        args += '-o %s ' % (foutList)
        args += '-x %s ' % (app.inputDir)
        args += '-i %s ' % (finList)
        if app.ratMacro != '':
            args += '-m %s ' % (macroFile)
        else:
            args += '-k -m %s ' % (prodFile)
        args += '--voproxy %s ' % (voproxy)
        if app.useDB:
            args += '--dbuser %s ' % (app.config['rat_db_user'])
            args += '--dbpassword %s ' % (app.config['rat_db_pswd'])
            args += '--dbname %s ' % (app.config['rat_db_name'])
            args += '--dbprotocol %s ' % (app.config['rat_db_protocol'])
            args += '--dburl %s ' % (app.config['rat_db_url'])
        if app.discardOutput:
            args += '--nostore '

        wrapperArgs = ['-a', '"%s"' % args]
        wrapperArgs += ['ratProdRunner.py', 'wg']

        app._getParent().inputsandbox.append('%s/ratProdRunner.py' %
                                             _app_directory)

        return StandardJobConfig(File('%s/sillyPythonWrapper.py' %
                                      _app_directory),
                                 inputbox=app._getParent().inputsandbox,
                                 outputbox=app._getParent().outputsandbox,
                                 args=wrapperArgs)
Ejemplo n.º 9
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        '''Prepare method: called to configure the job for the specified backend.
        '''
        logger.debug('RAT::WGRTHandler prepare ...')
        from Ganga.GPIDev.Adapters.StandardJobConfig import StandardJobConfig

        job = app.getJobObject()

        #remove the leading directory path of the macro (on the grid node we'll just
        #have the macro file)
        decimated = app.ratMacro.split('/')
        ratMacro = decimated[len(decimated) - 1]

        #Set the output directory
        if app.outputDir == None:
            if app.config['grid_outputDir'] == None:
                logger.error('Output directory not defined')
                raise Exception
            else:
                app.outputDir = app.config['grid_outputDir']
        outputDir = app.outputDir

        if app.softwareEnvironment is None:
            if app.config['local_softwareEnvironment'] is None:
                logger.error(
                    'RATUser requires softwareEnvironment to be defined if running on any backend other than LCG'
                )
                raise Exception
            else:
                app.softwareEnvironment = app.config[
                    'local_softwareEnvironment']

        voproxy = job.backend.voproxy
        if voproxy == None:
            #use the proxy from the environment (default behaviour)
            try:
                voproxy = os.environ["X509_USER_PROXY"]
            except:
                logger.error(
                    'Cannot run without voproxy either in environment (X509_USER_PROXY) or specified for WG backend'
                )
                raise Exception
        if not os.path.exists(voproxy):
            logger.error(
                'Valid WestGrid backend voproxy location MUST be specified: %s'
                % (voproxy))
            raise Exception

        rrArgs = ''
        spArgs = []

        rrArgs += '-g srm '  #always use the srm copy mode
        rrArgs += '-b %s ' % app.ratBaseVersion
        rrArgs += '-m %s ' % ratMacro
        rrArgs += '-d %s ' % outputDir
        rrArgs += '-e %s ' % app.softwareEnvironment
        rrArgs += '--voproxy %s ' % voproxy

        job.backend.extraopts += "-l pmem=2gb,walltime=28:00:00"
        if app.ratVersion != None:
            #add a memory requirement (compilation requires 2GB ram)
            #job.backend.extraopts+="-l pmem=2gb,walltime=28:00:00"
            rrArgs += '-v %s ' % app.ratVersion
            #ship code to backend
            zipFileName = app.zipFileName
            decimated = zipFileName.split('/')
            zipFileName = decimated[len(decimated) - 1]
            rrArgs += '-f %s ' % zipFileName
        if app.outputFile:
            rrArgs += '-o %s ' % app.outputFile
        if app.inputFile:
            rrArgs += '-i %s ' % app.inputFile
        if app.nEvents:
            rrArgs += '-N %s ' % app.nEvents
        elif app.tRun:
            rrArgs += '-T %s ' % app.tRun
        if app.useDB:
            rrArgs += '--dbuser %s ' % (app.config['rat_db_user'])
            rrArgs += '--dbpassword %s ' % (app.config['rat_db_pswd'])
            rrArgs += '--dbname %s ' % (app.config['rat_db_name'])
            rrArgs += '--dbprotocol %s ' % (app.config['rat_db_protocol'])
            rrArgs += '--dburl %s ' % (app.config['rat_db_url'])
        if app.discardOutput:
            rrArgs += '--nostore '

        spArgs += ['-a', '%s' % rrArgs]
        spArgs += ['ratRunner.py', 'wg']

        app._getParent().inputsandbox.append('%s/ratRunner.py' %
                                             _app_directory)
        app._getParent().inputsandbox.append('%s/job_tools.py' %
                                             _app_directory)
        app._getParent().inputsandbox.append('%s/check_root_output.py' %
                                             _app_directory)

        return StandardJobConfig(File('%s/sillyPythonWrapper.py' %
                                      _app_directory),
                                 inputbox=app._getParent().inputsandbox,
                                 outputbox=app._getParent().outputsandbox,
                                 args=spArgs)
Ejemplo n.º 10
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        '''Prepare method: called to configure the job for the specified backend.
        '''
        logger.debug('RAT::LCGRTHandler prepare ...')
        from Ganga.Lib.LCG import LCGJobConfig

        job = app.getJobObject()

        #remove the leading directory path of the macro (on the grid node we'll just
        #have the macro file)
        decimated = app.ratMacro.split('/')
        ratMacro = decimated[len(decimated) - 1]

        #Set the output directory
        if app.outputDir == None:
            if app.config['grid_outputDir'] == None:
                logger.error('Output directory not defined')
                raise Exception
            else:
                app.outputDir = app.config['grid_outputDir']
        outputDir = app.outputDir
        lfcDir = os.path.join('lfn:/grid/snoplus.snolab.ca', outputDir)

        # Check the current worker node permissions
        grid_config = RATUtil.GridConfig.get_instance()
        for ce in grid_config.get_excluded_worker_nodes():
            ganga_job.backend.requirements.excludedCEs += '%s ' % ce

        # By default require 1500 MB of RAM
        memory_set = False
        if len(job.backend.requirements.other) != 0:
            for r in job.backend.requirements.other:
                if "GlueHostMainMemoryRAMSize" in r:
                    memory_set = True
        if not memory_set:
            job.backend.requirements.other += [
                'other.GlueHostMainMemoryRAMSize >= 1500'
            ]

        #on the grid, we need to use our own version of python
        #so have to send a python script to setup the correct environment
        #AND then run the correct python script!

        rrArgs = ''  #args to send to ratRunner
        spArgs = []  #args to send to sillyPythonWrapper

        #ensure the rrArgs are space separated
        rrArgs += '-g lcg '  #always at lcg
        rrArgs += '-b %s ' % app.ratBaseVersion
        rrArgs += '-m %s ' % ratMacro
        rrArgs += '-d %s ' % outputDir
        if app.softwareEnvironment is None:
            # The relative path for CVMFS, SNOPLUS_CVMFS_DIR will be set at the backend
            # Note the extra \ to escape the dollar in the initial python wrapper
            rrArgs += '-e \$SNOPLUS_CVMFS_DIR/sw/%s/env_rat-%s.sh ' % (
                app.ratBaseVersion, app.ratBaseVersion)
        else:
            rrArgs += '-e %s ' % app.softwareEnvironment

        if app.ratVersion != None:
            rrArgs += '-v %s ' % app.ratVersion
            #ship code to backend
            zipFileName = app.zipFileName
            decimated = zipFileName.split('/')
            zipFileName = decimated[len(decimated) - 1]
            rrArgs += '-f %s ' % zipFileName
        if app.outputFile:
            rrArgs += '-o %s ' % app.outputFile
        if app.inputFile:
            rrArgs += '-i %s ' % app.inputFile
        if app.nEvents:
            rrArgs += '-N %s ' % app.nEvents
        elif app.tRun:
            rrArgs += '-T %s ' % app.tRun
        if app.useDB:
            rrArgs += '--dbuser %s ' % (app.config['rat_db_user'])
            rrArgs += '--dbpassword %s ' % (app.config['rat_db_pswd'])
            rrArgs += '--dbname %s ' % (app.config['rat_db_name'])
            rrArgs += '--dbprotocol %s ' % (app.config['rat_db_protocol'])
            rrArgs += '--dburl %s ' % (app.config['rat_db_url'])
        if app.discardOutput:
            rrArgs += '--nostore '

        spArgs += ['-a', '"%s"' % rrArgs]  #appends ratRunner args
        spArgs += ['ratRunner.py', 'lcg']

        app._getParent().inputsandbox.append('%s/ratRunner.py' %
                                             _app_directory)
        app._getParent().inputsandbox.append('%s/job_tools.py' %
                                             _app_directory)
        app._getParent().inputsandbox.append('%s/check_root_output.py' %
                                             _app_directory)

        return LCGJobConfig(File('%s/sillyPythonWrapper.py' % _app_directory),
                            inputbox=app._getParent().inputsandbox,
                            outputbox=app._getParent().outputsandbox,
                            args=spArgs)
Ejemplo n.º 11
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        '''Prepare method: called to configure the job for the specified backend.
        '''
        logger.debug('RAT::RTHandler prepare ...')
        from Ganga.GPIDev.Adapters.StandardJobConfig import StandardJobConfig

        job = app.getJobObject()

        #remove the leading directory path of the macro (on the grid node we'll just
        #have the macro file)
        decimated = app.ratMacro.split('/')
        ratMacro = decimated[len(decimated) - 1]

        if app.outputDir == None:
            if app.config['local_outputDir'] == None:
                logger.error('Output directory not defined')
                raise Exception
            else:
                app.outputDir = app.config['local_outputDir']
        outputDir = app.outputDir
        if app.softwareEnvironment is None:
            if app.config['local_softwareEnvironment'] is None:
                logger.error(
                    'RATUser requires softwareEnvironment to be defined if running on any backend other than LCG'
                )
                raise Exception
            else:
                app.softwareEnvironment = app.config[
                    'local_softwareEnvironment']
        if app.environment == []:
            if app.config['local_environment'] != []:
                app.environment = app.config['local_environment']

        if app.environment == []:
            args = [
                '-b', app.ratBaseVersion, '-m', ratMacro, '-d', outputDir,
                '-e', app.softwareEnvironment
            ]
            if app.ratVersion != None:
                args += ['-v', app.ratVersion]
                #ship code to backend
                zipFileName = app.zipFileName
                decimated = zipFileName.split('/')
                zipFileName = decimated[len(decimated) - 1]
                args += ['-f', zipFileName]
            if app.outputFile:
                args += ['-o', app.outputFile]
            if app.inputFile:
                args += ['-i', app.inputFile]
            if app.nEvents:
                args += ['-N', str(app.nEvents)]
            elif app.tRun:
                args += ['-T', app.tRun]
            if app.useDB:
                args += ['--dbuser', app.config['rat_db_user']]
                args += ['--dbpassword', app.config['rat_db_pswd']]
                args += ['--dbname', app.config['rat_db_name']]
                args += ['--dbprotocol', app.config['rat_db_protocol']]
                args += ['--dburl', app.config['rat_db_url']]
            if app.discardOutput:
                args += ['--nostore']

            app._getParent().inputsandbox.append('%s/job_tools.py' %
                                                 _app_directory)
            app._getParent().inputsandbox.append('%s/check_root_output.py' %
                                                 _app_directory)

            return StandardJobConfig(File('%s/ratRunner.py' % _app_directory),
                                     inputbox=app._getParent().inputsandbox,
                                     outputbox=app._getParent().outputsandbox,
                                     args=args)

        else:  #running somewhere a specific environment needs to be setup first
            #can either use a specific file or a list of strings.  the latter needs to be converted to a temp file and shipped.
            envFile = None
            rrArgs = ''
            spArgs = []

            rrArgs += '-b %s ' % app.ratBaseVersion
            rrArgs += '-m %s ' % ratMacro
            rrArgs += '-d %s ' % outputDir
            rrArgs += '-e %s ' % app.softwareEnvironment

            if type(app.environment) == list:
                #need to get the username
                tempname = 'tempRATUserEnv_%s' % os.getlogin()
                tempf = file('/tmp/%s' % (tempname), 'w')
                for line in app.environment:
                    tempf.write('%s \n' % line)
                tempf.close()
                app._getParent().inputsandbox.append('/tmp/%s' % (tempname))
                envFile = tempname
            else:
                app._getParent().inputsandbox.append(app.environment)
                envFile = os.path.basename(app.environment)
            if app.ratVersion != None:
                rrArgs += '-v %s ' % app.ratVersion
                #ship code to backend
                zipFileName = app.zipFileName
                decimated = zipFileName.split('/')
                zipFileName = decimated[len(decimated) - 1]
                rrArgs += '-f %s ' % zipFileName
            if app.outputFile:
                rrArgs += '-o %s ' % app.outputFile
            if app.inputFile:
                rrArgs += '-i %s ' % app.inputFile
            if app.nEvents:
                rrArgs += '-N %s ' % app.nEvents
            elif app.tRun:
                rrArgs += '-T %s ' % app.tRun
            if app.useDB:
                rrArgs += '--dbuser %s ' % (app.config['rat_db_user'])
                rrArgs += '--dbpassword %s ' % (app.config['rat_db_pswd'])
                rrArgs += '--dbname %s ' % (app.config['rat_db_name'])
                rrArgs += '--dbprotocol %s ' % (app.config['rat_db_protocol'])
                rrArgs += '--dburl %s ' % (app.config['rat_db_url'])
            if app.discardOutput:
                rrArgs += '--nostore '

            spArgs += ['-f', envFile]
            spArgs += ['-a', '%s' % rrArgs]
            spArgs += ['ratRunner.py', 'misc']

            app._getParent().inputsandbox.append('%s/ratRunner.py' %
                                                 _app_directory)
            app._getParent().inputsandbox.append('%s/job_tools.py' %
                                                 _app_directory)
            app._getParent().inputsandbox.append('%s/check_root_output.py' %
                                                 _app_directory)

            return StandardJobConfig(File('%s/sillyPythonWrapper.py' %
                                          _app_directory),
                                     inputbox=app._getParent().inputsandbox,
                                     outputbox=app._getParent().outputsandbox,
                                     args=spArgs)
Ejemplo n.º 12
0
   def _initSubJob(self, masterjob, dataset, lfnList, guidList, subCollection): 
       from Ganga.GPIDev.Lib.Job import Job
       from GangaAtlas.Lib.ATLASDataset import DQ2Dataset

       #print "########################"
       #print "SUBJOB INITIALISED"
       #print "########################"
       
       subjob = Job()
       subjob.inputsandbox = masterjob.inputsandbox
       subjob.application = masterjob.application
       subjob.outputdata = masterjob.outputdata
       subjob.outputsandbox = masterjob.outputsandbox
       subjob.backend = masterjob.backend

       # attributes which are different for each sub-job
       subjob.inputdata = DQ2Dataset()   
       #subjob.inputdata.datatype = 'DATA'
       subjob.inputdata.dataset = dataset
       #print "########################"
       #print "DATASET USED"
       #print dataset
       #print "########################"
       subjob.inputdata.names = lfnList
       #print "########################"
       #print "LFNLIST USED"
       #print lfnList
       #print "########################" 
       subjob.inputdata.guids = guidList
       #print "########################"
       #print "GUIDLIST USED"
       #print guidList
       #print "########################"
      
         
       if self.match_ce == True:
            #subjob.inputdata.type = 'TNT_LOCAL'

            # Sort out the possible sites taking into account requirements
            allowed_sites = []
            if subjob.backend.requirements.sites:
               allowed_sites = subjob.backend.requirements.sites
            elif subjob.backend.requirements.cloud:
               allowed_sites = subjob.backend.requirements.list_sites_cloud()
            else:
               raise ApplicationConfigurationError(None,'TntJobSplitter requires a cloud or a site to be set - please use the --cloud option, j.backend.requirements.cloud=CLOUDNAME (T0, IT, ES, FR, UK, DE, NL, TW, CA, US, NG) or j.backend.requirements.sites=SITENAME')

            if subjob.backend.requirements.sites:
               allowed_sites = subjob.backend.requirements.sites

            allowed_sites_all = subjob.backend.requirements.list_sites(True,True)
            # Apply GangaRobot blacklist
            newsites = []
            for site in allowed_sites:
               if site in allowed_sites_all:
                  newsites.append(site)
                  
            allowed_sites = newsites
                    
            # go through and check which sites with the dataset is given by the requirements
            sub_sites = []
            for site in subjob.inputdata.get_locations():
               if site in allowed_sites:
                  sub_sites.append(site)
                  
            if len(sub_sites) == 0:
               raise ApplicationConfigurationError(None,'TntJobSplitter could not find a location for dataset %s in cloud %s. Try another cloud!' % (subjob.inputdata.dataset, subjob.backend.requirements.cloud))
            else:
               subjob.backend.requirements.sites = sub_sites

            #print sub_sites
                        
       else:
            subjob.inputdata.type = 'TNT_DOWNLOAD'   #requires PFNs in sfn:// format

       subjob.inputsandbox += [ File(os.path.join(subCollection+".root")) ]
       
       return subjob
Ejemplo n.º 13
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug('TagPrepareLCGRTHandler master_prepare called: %s',
                     job.id)

        self.username = gridProxy.identity(safe=True)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % (
                        job.backend.requirements.sites)
                    raise ApplicationConfigurationError(None, printout)

        # prepare input sandbox
        inputbox = [(File(os.path.join(__athdirectory__,
                                       'athena-utility.sh'))),
                    (File(os.path.join(__directory__, 'get_tag_info.py')))]

        # CN: added TNTJobSplitter clause
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            _append_files(
                inputbox,
                os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'),
                os.path.join(__athdirectory__, 'dq2_get'),
                os.path.join(__athdirectory__, 'dq2info.tar.gz'))

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.type in [
                'FILE_STAGER'
        ]:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(
                inputbox,
                os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'))
        if not 'dq2tracerreport.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox,
                          os.path.join(__athdirectory__, 'dq2tracerreport.py'))
        if not 'db_dq2localid.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox,
                          os.path.join(__athdirectory__, 'db_dq2localid.py'))
        if not 'getstats.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox,
                          os.path.join(__athdirectory__, 'getstats.py'))

        _append_files(inputbox, os.path.join(__athdirectory__, 'libdcap.so'))

        if job.inputsandbox: inputbox += job.inputsandbox

        # prepare environment
        environment = {
            'MAXNUMREFS': str(app.max_num_refs),
            'STREAM_REF': app.stream_ref,
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': '',
            'ATHENA_USERSETUPFILE': '',
            'ATLAS_PROJECT': '',
            'ATLAS_EXETYPE': 'ATHENA',
            'GANGA_VERSION': configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']
        requirements = AtlasLCGRequirements()

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(
                    job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2[
                    'DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment[
                        'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1

            else:
                raise ApplicationConfigurationError(
                    None,
                    'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.'
                )

            # Raise submission exception
            if (not job.backend.CE and not (job.backend.requirements._name
                                            == 'AtlasLCGRequirements'
                                            and job.backend.requirements.sites)
                    and not (job.splitter
                             and job.splitter._name == 'DQ2JobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'TNTJobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'AnaTaskSplitterJob')):

                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )

            if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0:
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

        # prepare job requirements
        cmtconfig = app.atlas_cmtconfig
        if not cmtconfig in ['i686-slc4-gcc34-opt', 'i686-slc5-gcc43-opt']:
            cmtconfig = 'i686-slc4-gcc34-opt'

        requirements.software = [
            'VO-atlas-offline-%s-%s' % (app.atlas_release, cmtconfig)
        ]

        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata.type in [
                'DQ2_DOWNLOAD', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'
        ]:
            try:
                # override the default one if the dq2client_version is presented
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                requirements.software += [
                    'VO-atlas-dq2clients-%s' % dq2client_version
                ]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version


#       jobscript

        exe = os.path.join(__directory__, 'run-tagprepare-lcg.sh')
        #exe = os.path.join(__directory__,'get_tag_info.py')

        #       output sandbox
        outputbox = ['taginfo.pkl']

        if job.outputsandbox: outputbox += job.outputsandbox

        return LCGJobConfig(File(exe), inputbox, [], outputbox, environment,
                            [], requirements)
Ejemplo n.º 14
0
    def master_prepare(self, app, appmasterconfig):
        if app.siteroot:
            os.environ["SITEROOT"] = app.siteroot
        os.environ["CMTSITE"] = app.cmtsite

        job = app._getParent()
        if job.backend._name in ["Local", "PBS"]:
            if app.dryrun:
                os.environ["SITEROOT"] = "NONE"
                os.environ["CMTSITE"] = "NONE"
            try:
                assert "SITEROOT" in os.environ
            except:
                raise ApplicationConfigurationError(
                    None, " ATLAS environment not defined")

            try:
                assert "CMTSITE" in os.environ
            except:
                raise ApplicationConfigurationError(
                    None,
                    "cmt not setup properly. Please check your ATLAS setup or run on the grid"
                )

            if os.environ["CMTSITE"] == "CERN" and "AtlasVersion" in os.environ:
                logger.debug(
                    "Checking AtlasVersion: %s and selected atlas release %s" %
                    (os.environ["AtlasVersion"], app.atlas_rel))
                try:
                    assert app.atlas_rel == os.environ["AtlasVersion"]
                except:
                    logger.error(
                        "Mismatching atlas release. Local setup is %s, resetting requested atlas release to local value."
                        % os.environ["AtlasVersion"])
                    app.atlas_release = os.environ["AtlasVersion"]
                    app.atlas_rel == os.environ["AtlasVersion"]
            elif "ATLAS_RELEASE" in os.environ:
                logger.debug(
                    "Checking ATLAS_RELEASE: %s and selected atlas release %s"
                    % (os.environ["ATLAS_RELEASE"], app.atlas_rel))
                try:
                    assert app.atlas_rel == os.environ["ATLAS_RELASE"]
                except:
                    logger.error(
                        "Mismatching atlas release. Local setup is %s, resetting requested atlas release to local value."
                        % os.environ["ATLAS_RELEASE"])
                    app.atlas_rel = os.environ["ATLAS_RELEASE"]
            else:
                logger.warning(
                    "Could not compare requested release and local setup. Hope you are doing something sensible..."
                )

        if job.backend._name == "LSF":
            try:
                assert "CMTSITE" in os.environ and os.environ[
                    "CMTSITE"] == "CERN"
            except:
                raise ApplicationConfigurationError(
                    None,
                    "Error, CERN ATLAS AFS environment not defined. Needed by LSF backend"
                )

        environment = {'T_LCG_GFAL_INFOSYS': 'atlas-bdii.cern.ch:2170'}

        trfopts = app.transflags
        # need to parse them to be able to pass them in an environment variable
        trfopts = trfopts.replace(" ", "/W")
        trfopts = trfopts.replace("-", "/F")

        trflags = trfopts
        if app.mode == "evgen":
            trflags = "/Ft"
            if app.verbosity:
                trflags += "/W/Fl/W%s" % app.verbosity

        if trflags:
            environment["TRFLAGS"] = trflags

        # setting output site from input data if any.
        outsite, backup, outputlocation, backuplocation = "", "", "", ""
        logger.info("checking sites from input data: %s" % str(app.sites))

        # must distinguish running site (backend.requirements.sites) and output storage site (app.se_name)

        # matching with user's wishes (app.se_name or backend.requirements.sites)

        usersites = []
        if len(job.backend.requirements.sites) > 0:
            usersites = job.backend.requirements.sites
##        elif job.application.se_name and job.application.se_name != "none":
##            usersites=job.application.se_name.split(" ")
        logger.info("user selection: %s" % str(usersites))

        # select sites which are matching user's wishes, if any.
        selectedSites = app.sites
        if len(selectedSites) == 0:
            selectedSites = usersites
        if len(usersites) > 0 and len(app.sites) > 0:
            selectedSites = job.inputdata.trimSites(usersites, app.sites)
        # evgen case (no input data-> app.sites=[])
        if len(app.sites) == 0 and app.se_name and app.se_name != "none":
            selectedSites = app.se_name.split(" ")

        # This comes last: using surviving sites from matching process.
        if len(selectedSites) == 0:
            try:
                assert len(usersites) == 0
            except:
                raise ApplicationConfigurationError(
                    None,
                    "Could not find a match between input dataset locations: %s and your requested sites: %s. Please use a space token compatible with one of the input dataset locations (replace _XXXDISK or _XXXTAPE by _LOCALGROUPDISK or _SCRATCHDISK if necessary)"
                    % (str(app.sites), str(usersites)))
            logger.warning(
                "Failed to obtain processing site from input data, will use default value: CERN-PROD_SCRATCHDISK and submit production to CERN"
            )
            selectedSites.append(_defaultSite)

        [outlfc, outsite,
         outputlocation] = job.outputdata.getDQ2Locations(selectedSites[0])
        if len(selectedSites) > 1:
            [outlfc2, backup,
             backuplocation] = job.outputdata.getDQ2Locations(selectedSites[1])

        # app.se_name set: users wishes to get the output data written to another site than the one hosting the input.
        # One needs to ensure that this location is at least in the same cloud as the targetted processing site. This is done by insuring that the lfcs are the same.
        userSEs = []
        outse = ""
        if job.application.se_name and job.application.se_name != "none":
            userSEs = job.application.se_name.split(" ")
            # loop through userSEs until up to 2 valid sites are found...
            outse = ""
            for SE in userSEs:
                [lfc, se, location] = job.outputdata.getDQ2Locations(SE)
                if lfc == outlfc:
                    if not outse:
                        outse = se  # important to use outse and not outsite here, as outsite is used for selection of processing site.
                        # userSEs overrides outlfc,outputlocation, but not outsite as outsite is unfortunately used for choice of the processing site.
                        outputlocation = location
                    else:
                        outlfc2 = lfc
                        backup = se
                        backuplocation = location
                        break
        # finally: if no backup location is defined at this point, enforce CERN-PROD_SCRATCHDISK as backup location
        if backup == "":
            [outlfc2, backup,
             backuplocation] = job.outputdata.getDQ2Locations(_defaultSite)

        logger.info("Final selection of output sites: %s , backup: %s" %
                    (outsite, backup))
        try:
            assert outsite
        except:
            raise ApplicationConfigurationError(
                None,
                "Could not find suitable location for your output. Please subscribe your input dataset (if any) to a suitable location or change application.se_name to a suitable space token"
            )

        # srmv2 sites special treatment: the space token has been prefixed to the outputlocation and must be removed now:
        imin = string.find(outputlocation, "token:")
        imax = string.find(outputlocation, "srm:")
        spacetoken = ""
        if imin > -1 and imax > -1:
            spacetoken = outputlocation[imin + 6:imax - 1]
            outputlocation = outputlocation[imax:]
        # same treatment for backup location if any
        imin = string.find(backuplocation, "token:")
        imax = string.find(backuplocation, "srm:")
        bst = ""
        if imin > -1 and imax > -1:
            bst = backuplocation[imin + 6:imax - 1]
            backuplocation = backuplocation[imax:]

        environment["OUTLFC"] = outlfc
        environment["OUTSITE"] = outsite
        if outse:
            environment[
                "OUTSITE"] = outse  # user's choice for output storage location overriding AthenaMC's.

        environment["OUTPUT_LOCATION"] = outputlocation
        if spacetoken:
            environment["SPACETOKEN"] = spacetoken
        if backup:
            environment["OUTLFC2"] = outlfc2
            environment["OUTSITE2"] = backup
            environment["OUTPUT_LOCATION2"] = backuplocation

        environment["PROD_RELEASE"] = app.prod_release

        # setting environment["BACKEND"]
        # Local, Condor become "batch". LSF becomes "batch" unless the inputdata is on castor (in this case, it becomes "castor")
        environment["BACKEND"] = job.backend._name
        environment["BACKEND_DATA"] = app.backend_inputdata
        if job.backend._name == "LSF" and len(app.turls.values()) > 0:
            turl = app.turls.values()[0]
            if string.find(turl, "castor") > -1:
                environment["BACKEND_DATA"] = "castor"
            else:
                environment["BACKEND_DATA"] = "batch"
        if job.backend._name in ["Local", "Condor", "PBS"]:
            environment["BACKEND_DATA"] = "batch"
            environment["SITEROOT"] = os.environ["SITEROOT"]
            environment["CMTSITE"] = os.environ["CMTSITE"]

#       finalise environment

# preparing input sandbox, output sandbox , environment vars and job requirements

        inputbox = [
            File(os.path.join(os.path.dirname(__file__), 'setup-release.sh')),
            File(os.path.join(os.path.dirname(__file__), 'stage-in.sh')),
            File(os.path.join(os.path.dirname(__file__), 'stage-out.sh')),
            File(os.path.join(os.path.dirname(__file__), 'adler32.py'))
        ]

        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [File(app.transform_archive)]
        elif app.transform_archive:
            # tarball in local or remote web area.
            if string.find(app.transform_archive, "http") >= 0:
                environment['TRANSFORM_ARCHIVE'] = "%s" % (
                    app.transform_archive)
            else:
                myfile = os.path.basename(app.transform_archive)
                myfile = "http://cern.ch/atlas-computing/links/kitsDirectory/Production/kits/" + myfile
                environment['TRANSFORM_ARCHIVE'] = "%s" % (myfile)

        if app.evgen_job_option and os.path.exists(app.evgen_job_option):
            # locally modified job option file to add to the input sand box
            inputbox += [File(app.evgen_job_option)]
            # need to strip the path away.
            self.evgen_job_option = app.evgen_job_option.split("/")[-1]
            environment['CUSTOM_JOB_OPTION'] = "%s" % (self.evgen_job_option)
        elif app.evgen_job_option:
            self.evgen_job_option = app.evgen_job_option

        # user area:
        if app.userarea:
            inputbox.append(File(app.userarea))
            environment['USER_AREA'] = os.path.basename(app.userarea)

        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [file]

        outputbox = []
        outputGUIDs = 'output_guids'
        outputLOCATION = 'output_location'
        outputbox.append(outputGUIDs)
        outputbox.append(outputLOCATION)
        outputbox.append('output_data')
        if (job.outputsandbox):
            for file in job.outputsandbox:
                outputbox += [file]

        # switch JobTransforms/AtlasProduction package.
        self.isJT = string.find(app.transform_archive, "JobTransform")
        if self.isJT > -1 and app.mode == "evgen":
            environment['T_CONTEXT'] = str(
                self.number_events_job
            )  # needed to avoid prodsys failure mechanism based on a hardcoded minimum number of event of 5000 per job

        #       prepare job requirements

        if hasattr(job.backend, 'requirements') and hasattr(
                job.backend.requirements, 'sites') and hasattr(
                    job.backend.requirements, 'software') and hasattr(
                        job.backend.requirements, 'other'):
            requirements = job.backend.requirements
        else:
            requirements = AtlasLCGRequirements()

#        requirements.other.append('other.GlueCEStateStatus=="Production"') # missing production
        imax = string.rfind(app.atlas_rel, ".")
        rel = string.atof(
            app.atlas_rel[:imax]
        )  # to deal with string comparisons: [2-9].0.0 > 11.0.0.
        if app.atlas_rel <= "11.4.0" or rel <= 11.4:
            requirements.software = ['VO-atlas-release-%s' % app.atlas_rel]
        elif app.atlas_rel < "12.0.3":
            requirements.software = ['VO-atlas-offline-%s' % app.atlas_rel]
        elif app.atlas_rel >= "14.0.0" and app.atlas_rel <= "15.6.1":
            requirements.software = [
                'VO-atlas-offline-%s-i686-slc4-gcc34-opt' % app.atlas_rel
            ]
        elif app.atlas_rel > "15.6.1":
            requirements.software = [
                'VO-atlas-offline-%s-i686-slc5-gcc43-opt' % app.atlas_rel
            ]
        else:
            requirements.software = ['VO-atlas-production-%s' % app.atlas_rel]
        # case of prod_release set
        if app.prod_release:
            # no prod release tag before 13.0.X
            if app.atlas_rel < "14.0.0" and app.atlas_rel > "13.0.0":
                requirements.software = [
                    'VO-atlas-production-%s' % app.prod_release
                ]
            elif app.atlas_rel >= "14.0.0" and app.atlas_rel <= "15.6.1":
                requirements.software = [
                    'VO-atlas-production-%s-i686-slc4-gcc34-opt' %
                    app.prod_release
                ]
            elif app.atlas_rel > "15.6.1":
                requirements.software = [
                    'VO-atlas-production-%s-i686-slc5-gcc43-opt' %
                    app.prod_release
                ]

        if app.transform_archive and string.find(app.transform_archive,
                                                 "AtlasTier0") > -1:
            requirements.software = ['VO-atlas-tier0-%s' % app.prod_release]
##        extraConfig=getConfig('defaults_AtlasLCGRequirements')
##        if  'dq2client_version' in extraConfig:
##            dq2client_version = extraConfig['dq2client_version']

##        if job.backend.requirements.dq2client_version:
##            dq2client_version = job.backend.requirements.dq2client_version
##        try:
##            assert dq2client_version!=""
##        except:
##            raise  ApplicationConfigurationError(None,"Please give a value to dq2client_version in job.backend.requirements.")

#        requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
#        requirements.other+=['RegExp("VO-atlas-dq2clients",other.GlueHostApplicationSoftwareRunTimeEnvironment)']

# controlled relaxation for simple cases: one single input dataset, less than 200 subjobs. In this case, the subjobs can be submitted to the whole cloud.
        loosematch = "true"
        if job.splitter and job.splitter.numsubjobs > 200:
            loosematch = "false"
        if job.inputdata and (job.inputdata.cavern or job.inputdata.minbias):
            loosematch = "false"
# commented the nex block out as stage-in.sh can now ensure that the local copy is downloaded in the first attempt. However, as a safety net, we maintain the veto on complex jobs with pileup and or minbias, because they are heavy weight anyway and should not be run everywhere.
#        if app.dbrelease:
#            loosematch="false"
        if len(job.backend.requirements.sites) > 0:
            loosematch = "false"  # specified sites take precedence over cloud.

        userCloud = job.backend.requirements.cloud
        if userCloud == 'ALL':
            userCloud = ''  # not supporting the AthenaLCGRequirements catch-all
        # By default: job to data, strict: target outsite and nothing else.
        requirements.sites = outsite

        if loosematch == "true" and userCloud:
            logger.debug(
                "Your job qualifies for controlled relaxation of the current job-to-data policy. Now checking that requested cloud matches with input data"
            )

            from dq2.info.TiersOfATLAS import whichCloud, ToACache
            targetSites = whichCloud(outsite)
            cloud = ""
            for cloudID, sites in ToACache.dbcloud.iteritems():
                if sites == targetSites:
                    cloud = cloudID
            try:
                assert cloud == userCloud
            except:
                raise ApplicationConfigurationError(
                    None,
                    "Requested cloud: %s did not match selected processing cloud: %s. Reverting to submission to site %s"
                    % (userCloud, cloud, outsite))

            requirements.cloud = cloud
            # looks like cloud has to be converted in a list of sites anyway, and this is not done in AtlasLCGRequirements.convert()...
            allsites = requirements.list_sites_cloud()
            try:
                assert len(allsites) > 0
            except:
                raise ApplicationConfigurationError(
                    None,
                    "Could not get any sites from the specified cloud: %s. You will have to specify a target site in job.backend.requirements.sites"
                    % cloud)
            # need to weed out unwanted sites from excluded list
            excludedSites = requirements.excluded_sites
            goodsites = []
            for checksite in allsites:
                selsite = True
                for site in excludedSites:
                    imax = site.find("_")
                    shortSite = site[:imax]
                    if shortSite in checksite:
                        #  print "site is excluded, skipping ", checksite
                        selsite = False
                        break
                if selsite and checksite not in goodsites:
                    goodsites.append(checksite)
#            print len(allsites),len(goodsites)
            if len(goodsites) > 0:
                allsites = goodsites
            job.backend.requirements.sites = allsites
            logger.debug(
                "Relaxing job to data policy to job to cloud. Selected cloud is %s"
                % cloud)

        logger.debug("master job submit?")

        if job.backend._name == "LCG" or job.backend._name == "Cronus" or job.backend._name == "Condor" or job.backend._name == "NG":
            return LCGJobConfig("", inputbox, [], outputbox, environment, [],
                                requirements)
        else:
            return StandardJobConfig("", inputbox, [], outputbox, environment)
Ejemplo n.º 15
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("AthenaLCGRTHandler prepare called, %s", job.id)

        #       prepare inputdata

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []
        add_files = []

        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name in [
                        'DQ2Dataset', 'AMIDataset', 'EventPicking'
                ]:
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None,
                            'No inputdata has been specified. Failure in job %s.%s. Dataset %s'
                            %
                            (job._getRoot().id, job.id, job.inputdata.dataset))
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names

                    if job.inputdata.tag_info:

                        # check for conflicts with TAG_LOCAL or TAG_COPY
                        if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']:
                            raise ApplicationConfigurationError(
                                None,
                                "Cannot provide both tag_info and run as '%s'. Please use one or the other!"
                                % job.inputdata.type)

                        # check if FILE_STAGER is used
                        if job.inputdata.type == 'FILE_STAGER':
                            logger.warning(
                                "TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead."
                            )
                            job.inputdata.type = 'DQ2_COPY'

                        # add additional file info for tags
                        for tag_file in job.inputdata.tag_info:
                            for ref in job.inputdata.tag_info[tag_file][
                                    'refs']:
                                add_files.append(ref[1] + ':' + ref[0] + ':' +
                                                 ref[2])

                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL',
                            'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER',
                            'TAG_LOCAL', 'TAG_COPY'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'
                    if not job.inputdata.datatype in [
                            'DATA', 'MC', 'MuonCalibStream'
                    ]:
                        job.inputdata.datatype = 'MC'

            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s' %
                                    job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [
                            line.strip() for line in pfnListFile
                        ]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')

                elif job.inputdata._name in [
                        'DQ2Dataset', 'AMIDataset', 'EventPicking'
                ]:
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL',
                            'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER',
                            'TAG_LOCAL', 'TAG_COPY'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'
                    if not job.inputdata.datatype in [
                            'DATA', 'MC', 'MuonCalibStream'
                    ]:
                        job.inputdata.datatype = 'MC'

                    input_guids, input_files = _splitlist(
                        job.inputdata.get_contents())

                    if job.inputdata.tagdataset:
                        input_tag_guids, input_tag_files = _splitlist(
                            job.inputdata.get_tag_contents())
                    if job.inputdata.use_aodesd_backnav:
                        input_esd_guids, input_esd_files = _splitlist(
                            job.inputdata.get_contents(backnav=True))

                    job.inputdata.names = input_files
                    job.inputdata.guids = input_guids

#       prepare outputdata

        output_location = ''
        if job.outputdata:

            if job.outputdata._name == 'DQ2OutputDataset':

                if job.outputdata.location:
                    if type(job.outputdata.location) == str and isDQ2SRMSite(
                            job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.outputdata.location)

                    #if job.backend.requirements._name == 'AtlasLCGRequirements':
                    #    if job.backend.requirements.cloud:
                    #        if whichCloud(output_location) != job.backend.requirements.cloud:
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud )
                    #            raise ApplicationConfigurationError(None, printout)
                    #    if job.backend.requirements.sites:
                    #        if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]):
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites)
                    #            raise ApplicationConfigurationError(None,printout )

                elif job._getRoot().subjobs and job._getRoot(
                ).outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.getRoot().outputdata.location)

                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)
            else:
                if job.outputdata.location:
                    output_location = job.outputdata.location
                else:
                    try:
                        output_location = config['LCGOutputLocation']
                    except ConfigError:
                        logger.warning(
                            'No default output location specified in the configuration.'
                        )
            if job.outputdata.location:
                job.outputdata.location = output_location
                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset':
            output_location = os.path.join(output_location, jid)
            if job.outputdata:
                # Remove trailing number if job is copied

                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat, output_location):
                    output_location = re.sub(pat, '', output_location)
                    output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname = output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname = output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id == 0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
                if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
                    job.outputdata.create_subscription(output_datasetname,
                                                       output_location)

            else:
                if (job._getRoot().subjobs
                        and job.id == 0) or not job._getRoot().subjobs:
                    logger.warning(
                        "Dataset %s already exists - appending new files to this dataset",
                        output_datasetname)
                    output_location = job.outputdata.get_locations(
                        datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s', output_location,
                                 job.outputdata.location)
                    if output_location:
                        output_location = output_location[0]
                        if job._getRoot().subjobs:
                            job._getRoot(
                            ).outputdata.location = output_location
                            job.outputdata.location = output_location
                        else:
                            job.outputdata.location = output_location

                    logger.debug('Output4: %s,%s', output_location,
                                 job.outputdata.location)

        if getConfig('LCG')['JobLogHandler'] == 'DQ2' and (
                not job.outputdata or
            (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')):
            raise ApplicationConfigurationError(
                None,
                'Staging of log files in DQ2 requested, but DQ2 output dataset not specified.'
            )

#       prepare inputsandbox

        inputbox = [File(os.path.join(__directory__, 'athena-utility.sh'))]
        if input_guids:
            _append_file_buffer(inputbox, 'input_guids', input_guids)
        if input_files:
            _append_file_buffer(inputbox, 'input_files', input_files)
        if add_files: _append_file_buffer(inputbox, 'add_files', add_files)
        if input_tag_guids:
            _append_file_buffer(inputbox, 'input_tag_guids', input_tag_guids)
        if input_tag_files:
            _append_file_buffer(inputbox, 'input_tag_files', input_tag_files)
        if input_esd_guids:
            _append_file_buffer(inputbox, 'input_esd_guids', input_esd_guids)
        if input_esd_files:
            _append_file_buffer(inputbox, 'input_esd_files', input_esd_files)

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            for tag_file in job.inputdata.tag_info:
                if job.inputdata.tag_info[tag_file]['path'] != '':
                    inputbox.append(
                        File(
                            os.path.join(
                                job.inputdata.tag_info[tag_file]['path'],
                                tag_file)))

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

        if job.outputdata and job.outputdata.outputdata:
            _append_file_buffer(inputbox, 'output_files',
                                job.outputdata.outputdata)
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        exe = os.path.join(__directory__, 'run-athena-lcg.sh')
        outputbox = jobmasterconfig.outputbox
        requirements = jobmasterconfig.requirements.__copy__()
        environment = jobmasterconfig.env.copy()
        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        if output_location and output_location.find(
                '/castor/cern.ch/grid/atlas/t0') >= 0:
            raise ApplicationConfigurationError(
                None,
                'You are try to save the output to TIER0DISK - please use another area !'
            )
        if not output_location:
            output_location = ''
        if configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
            output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2[
                'OUTPUTFILE_NAMELENGTH']
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''

            environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join(
                configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join(
                configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])

        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot(
        ).splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME'] = ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(
                job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DATASETTYPE'] = job.inputdata.type
            environment['DATASETDATATYPE'] = job.inputdata.datatype
            if job.inputdata.accessprotocol:
                environment[
                    'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info:
            if job.inputdata.tag_info[job.inputdata.tag_info.keys(
            )[0]]['dataset'] != '' and job.inputdata.tag_info[tag_file][
                    'path'] == '':
                environment['TAG_TYPE'] = 'DQ2'
            else:
                environment['TAG_TYPE'] = 'LOCAL'

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) and (job.inputdata.type in [
                'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY'
        ]):
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(
            FileBuffer('athena_options', environment['ATHENA_OPTIONS'] + '\n'))

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params != ' ' and job.application.atlas_exetype == 'TRF':
            _append_file_buffer(inputbox, 'trf_params', [trf_params])

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # event based splitting:  set max_events and skip_events
        if job._getRoot().splitter and hasattr(
                job._getRoot().splitter,
                'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0:
            environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events)
            environment['ATHENA_SKIP_EVENTS'] = str(
                job.application.skip_events)

        # pick event
        if job._getRoot().splitter and job._getRoot(
        ).inputdata and job._getRoot().inputdata._name == 'EventPicking':
            #Replace blank space
            environment['ATHENA_RUN_EVENTS'] = str(
                job.application.run_event).replace(' ', '')
            environment['ATHENA_FILTER_POLICY'] = str(
                job.inputdata.pick_filter_policy)

# append a property for monitoring to the jobconfig of subjobs
        lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox,
                                  environment, [], requirements)
        lcg_config.monitoring_svc = mc['Athena']
        return lcg_config
Ejemplo n.º 16
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id)

        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name == 'DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(
                dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Check if all sites are in the same cloud
        if job.backend.requirements.sites:
            firstCloud = whichCloud(job.backend.requirements.sites[0])
            for site in job.backend.requirements.sites:
                cloud = whichCloud(site)
                if cloud != firstCloud:
                    printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % (
                        job.backend.requirements.sites)
                    raise ApplicationConfigurationError(None, printout)

        #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job
        #will already have been prepared
        #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing.
        #if app.is_prepared is not True:
        #    for position in xrange(len(app.option_file)):
        #        app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name)))
        # Expand Athena jobOptions
        if not app.atlas_exetype in ['EXE']:
            athena_options = ' '.join([
                os.path.basename(opt_file.name) for opt_file in app.option_file
            ])
            #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options
            if app.options:
                athena_options = app.options + ' ' + athena_options

            inputbox = [File(opt_file.name) for opt_file in app.option_file]
        else:
            athena_options = ' '.join([
                os.path.basename(opt_file.name) for opt_file in app.option_file
            ])
            inputbox = []

        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

        #       prepare input sandbox

        inputbox.append(File(os.path.join(__directory__, 'athena-utility.sh')))

        if job.inputdata and job.inputdata._name == "AMIDataset" and job.inputdata.goodRunListXML.name != '':
            inputbox.append(File(job.inputdata.goodRunListXML.name))

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                _append_files(inputbox, 'ganga-stagein-lfc.py')
            else:
                _append_files(inputbox, 'ganga-stagein.py')

        if app.user_area.name:
            #we will now use the user_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_user_name = os.path.join(
                    os.path.join(shared_path, app.is_prepared.name),
                    os.path.basename(app.user_area.name))
                inputbox.append(File(tmp_user_name))
            else:
                inputbox.append(File(app.user_area.name))

        #if app.group_area.name: inputbox += [ File(app.group_area.name) ]
        if app.group_area.name and str(app.group_area.name).find('http') < 0:
            #we will now use the group_area that's stored in the users shared directory
            if app.is_prepared is not True:
                tmp_group_name = os.path.join(
                    os.path.join(shared_path, app.is_prepared.name),
                    os.path.basename(app.group_area.name))
                inputbox.append(File(tmp_group_name))
            else:
                inputbox.append(File(app.group_area.name))

        if app.user_setupfile.name:
            inputbox.append(File(app.user_setupfile.name))

        # CN: added TNTJobSplitter clause

        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) or (job._getRoot().splitter
               and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py', 'dq2_get',
                          'dq2info.tar.gz')
            if job.inputdata and job.inputdata.type == 'LFC' and not (
                    job._getRoot().splitter
                    and job._getRoot().splitter._name == 'TNTJobSplitter'):
                _append_files(inputbox, 'dq2_get_old')

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py',
                          'dq2info.tar.gz')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            #if not job.outputdata.location:
            #    raise ApplicationConfigurationError(None,'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !')
            if not 'ganga-stage-in-out-dq2.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
            _append_files(inputbox, 'ganga-joboption-parse.py')
            if not 'dq2info.tar.gz' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'dq2info.tar.gz')

        #       add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x
        if not 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
        if not 'dq2tracerreport.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'dq2tracerreport.py')
        if not 'db_dq2localid.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'db_dq2localid.py')
        if not 'getstats.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'getstats.py')

        if str(app.atlas_release).find('12.') >= 0:
            _append_files(inputbox, 'libDCache.so', 'libRFIO.so', 'libdcap.so')
        elif str(app.atlas_release).find('13.') >= 0:
            _append_files(inputbox, 'libdcap.so')
        else:
            _append_files(inputbox, 'libdcap.so')

        if job.inputsandbox: inputbox += job.inputsandbox

        #       prepare environment

        if not app.atlas_release:
            raise ApplicationConfigurationError(
                None,
                'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.'
            )

        environment = {
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': athena_options,
            'ATHENA_USERSETUPFILE': athena_usersetupfile,
            'ATLAS_PROJECT': app.atlas_project,
            'ATLAS_EXETYPE': app.atlas_exetype,
            'GANGA_VERSION': configSystem['GANGA_VERSION']
        }

        environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER']

        if app.atlas_environment:
            for var in app.atlas_environment:
                try:
                    vars = re.match("^(\w+)=(.*)", var).group(1)
                    value = re.match("^(\w+)=(.*)", var).group(2)
                    environment[vars] = value
                except:
                    logger.warning(
                        'Athena.atlas_environment variable not correctly configured: %s',
                        var)
                    pass

        if app.atlas_production and app.atlas_release.find(
                '12.') >= 0 and app.atlas_project != 'AtlasPoint1':
            temp_atlas_production = re.sub('\.', '_', app.atlas_production)
            prod_url = config[
                'PRODUCTION_ARCHIVE_BASEURL'] + '/AtlasProduction_' + temp_atlas_production + '_noarch.tar.gz'
            logger.info('Using Production cache from: %s', prod_url)
            environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1'
                                     or app.atlas_release.find('12.') <= 0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production

        if app.user_area.name:
            environment['USER_AREA'] = os.path.basename(app.user_area.name)
        #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name)
        if app.group_area.name:
            if str(app.group_area.name).find('http') >= 0:
                environment['GROUP_AREA_REMOTE'] = str(app.group_area.name)
            else:
                environment['GROUP_AREA'] = os.path.basename(
                    app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)

        if job.backend.requirements._name == 'AtlasLCGRequirements':
            requirements = AtlasLCGRequirements()
        elif job.backend.requirements._name == 'AtlasCREAMRequirements':
            requirements = AtlasCREAMRequirements()
        else:
            requirements = AtlasLCGRequirements()

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                environment['GANGA_LFC_HOST'] = job.inputdata.lfc

        if 'ganga-stage-in-out-dq2.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']

        if job.inputdata and (job.inputdata._name
                              in ['DQ2Dataset', 'AMIDataset', 'EventPicking']):
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(
                    job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2[
                    'DQ2_URL_SERVER_SSL']
                environment['DATASETTYPE'] = job.inputdata.type
                if job.inputdata.failover:
                    environment['DATASETFAILOVER'] = 1
                environment['DATASETDATATYPE'] = job.inputdata.datatype
                if job.inputdata.accessprotocol:
                    environment[
                        'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
                if job.inputdata.check_md5sum:
                    environment['GANGA_CHECKMD5SUM'] = 1

            else:
                raise ApplicationConfigurationError(
                    None,
                    'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.'
                )

            # Raise submission exception
            if (not job.backend.CE and not (job.backend.requirements._name in [
                    'AtlasLCGRequirements', 'AtlasCREAMRequirements'
            ] and job.backend.requirements.sites)
                    and not (job.splitter
                             and job.splitter._name == 'DQ2JobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'TNTJobSplitter')
                    and not (job.splitter
                             and job.splitter._name == 'AnaTaskSplitterJob')
                    and not (job.splitter
                             and job.splitter._name == 'ATLASTier3Splitter')):

                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )

            if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0:
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !'
                )
            #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'):
            #    allLoc = job.inputdata.get_locations(complete=0)
            #    completeLoc = job.inputdata.get_locations(complete=1)
            #    incompleteLoc = []
            #    for loc in allLoc:
            #        if loc not in completeLoc:
            #            incompleteLoc.append(loc)
            #    if incompleteLoc:
            #        raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !')

            # Add TAG datasetname
            if job.inputdata.tagdataset:
                environment['TAGDATASETNAME'] = ':'.join(
                    job.inputdata.tagdataset)

#       prepare job requirements
        requirementsSoftware = getLCGReleaseTag(app)

        releaseBlacklist = job.backend.requirements.list_release_blacklist()
        if requirementsSoftware and requirementsSoftware[0] in releaseBlacklist:
            logger.error(
                'The athena release %s you are using is not recommended for distributed analysis !',
                requirementsSoftware[0])
            logger.error(
                'For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !'
            )
            requirements.software = requirementsSoftware
        else:
            requirements.software = requirementsSoftware

        # Set athena architecture: 32 or 64 bit
        environment['ATLAS_ARCH'] = '32'
        if requirementsSoftware and requirementsSoftware[0].find(
                'x86_64') >= 0:
            environment['ATLAS_ARCH'] = '64'

        #       add software requirement of dq2clients
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ] and job.inputdata.type in [
                'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'
        ] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']:
            try:
                # override the default one if the dq2client_version is presented
                # in the job backend's requirements object
                dq2client_version = job.backend.requirements.dq2client_version
            except AttributeError:
                pass
            if dq2client_version:
                #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version]
                environment['DQ2_CLIENT_VERSION'] = dq2client_version

        if app.atlas_dbrelease:
            if not app._name == "AthenaTask" and not (
                    job.splitter and
                (job.splitter._name == 'DQ2JobSplitter'
                 or job.splitter._name == 'ATLASTier3Splitter')):
                raise ApplicationConfigurationError(
                    None,
                    'Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !'
                )
            try:
                environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split(
                    ':')[0]
                environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1]
            except:
                logger.warning(
                    'Problems with the atlas_dbrelease configuration')

        # Fill AtlasLCGRequirements access mode
        if configDQ2['USE_ACCESS_INFO']:
            logger.warning(
                "config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !"
            )
            import pickle, StringIO
            #if job.backend.requirements.sites:
            info = job.backend.requirements.list_access_info()
            fileHandle = StringIO.StringIO()
            pickle.dump(info, fileHandle)
            fileHandle.seek(-1)
            lines = fileHandle.read()
            inputbox.append(FileBuffer('access_info.pickle', lines))
            _append_files(inputbox, 'access_info.py')
            if not 'make_filestager_joption.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'make_filestager_joption.py',
                              'dm_util.py', 'fs-copy.py')


#       jobscript

        exe = os.path.join(__directory__, 'run-athena-lcg.sh')

        #       output sandbox
        outputbox = [
            'output_guids', 'output_location', 'output_data', 'stats.pickle'
        ]

        ## retrieve the FileStager log
        if configDQ2['USE_ACCESS_INFO'] or (
                job.inputdata and
            (job.inputdata._name
             in ['DQ2Dataset', 'AMIDataset', 'EventPicking'])
                and job.inputdata.type in ['FILE_STAGER']):
            outputbox += ['FileStager.out', 'FileStager.err']

        if job.outputsandbox: outputbox += job.outputsandbox

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'

        return LCGJobConfig(File(exe), inputbox, [], outputbox, environment,
                            [], requirements)
Ejemplo n.º 17
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug('TagPrepareLCGRTHandler master_prepare called: %s',
                     job.id)

        self.username = gridProxy.identity(safe=True)

        # prepare input sandbox
        if app.atlas_release == '':
            logger.warning(
                'No Athena release specified - defaulting to 15.6.9')
            app.atlas_release = '15.6.9'

        logger.warning(
            "Copying grid proxy to input sandbox for transfer to WN...")

        inputbox = [(File(os.path.join(__athdirectory__,
                                       'athena-utility.sh'))),
                    (File(os.path.join(__directory__, 'get_tag_info.py'))),
                    (File(os.path.join(__directory__, 'get_tag_info2.py'))),
                    (File(os.path.join(__directory__, 'template.root'))),
                    (File(gridProxy.location()))]

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.type in [
                'FILE_STAGER'
        ]:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')
            #_append_files(inputbox,'make_filestager_joption.py','dm_util.py')

        if job.inputsandbox: inputbox += job.inputsandbox

        #       prepare environment

        try:
            atlas_software = config['ATLAS_SOFTWARE']
        except ConfigError:
            raise ConfigError(
                'No default location of ATLAS_SOFTWARE specified in the configuration.'
            )

        # prepare environment
        environment = {
            'MAXNUMREFS': str(app.max_num_refs),
            'STREAM_REF': app.stream_ref,
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': '',
            'ATLAS_SOFTWARE': atlas_software,
            'ATHENA_USERSETUPFILE': '',
            'ATLAS_PROJECT': '',
            'ATLAS_EXETYPE': 'ATHENA',
            'GANGA_GLITE_UI': getConfig('LCG')['GLITE_SETUP'],
            'DQ2_SETUP': getConfig('defaults_DQ2SandboxCache')['setup'],
            'GANGA_VERSION': configSystem['GANGA_VERSION'],
            'PROXY_NAME': os.path.basename(gridProxy.location()),
            'GANGA_OUTPUT_PATH': job.outputdir
        }

        if app.lcg_prepare:
            environment['LCG_PREPARE'] = '1'


#       jobscript

        exe = os.path.join(__directory__, 'run-tagprepare-local.sh')

        #       output sandbox
        if app.lcg_prepare:
            outputbox = ['taginfo.pkl', 'subcoll.tar.gz']
        else:
            outputbox = ['taginfo.pkl']

        if job.outputsandbox: outputbox += job.outputsandbox

        return StandardJobConfig(File(exe), inputbox, [], outputbox,
                                 environment)
Ejemplo n.º 18
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):

        logger.debug('RAT::LCGRTHandler prepare ...')
        from Ganga.Lib.LCG import LCGJobConfig

        #create the backend wrapper script
        job = app.getJobObject()

        # Check the current worker node permissions
        grid_config = RATUtil.GridConfig.get_instance()
        for ce in grid_config.get_excluded_worker_nodes():
            ganga_job.backend.requirements.excludedCEs += '%s ' % ce

        #Check whether we're looking for a non-default sw dir
        if app.softwareEnvironment is None:
            # The relative path for CVMFS, SNOPLUS_CVMFS_DIR will be set at the backend
            # Note the extra \ to escape the dollar in the initial python wrapper
            app.softwareEnvironment = '\$SNOPLUS_CVMFS_DIR/sw/%s/env_rat-%s.sh' % (
                app.ratVersion, app.ratVersion)

        #we need to know the name of the file to run
        macroFile = None
        prodFile = None
        if app.ratMacro != '':
            decimated = app.ratMacro.split('/')
            macroFile = decimated[len(decimated) - 1]
        else:
            decimated = app.prodScript.split('/')
            prodFile = decimated[len(decimated) - 1]

        foutList = '['
        finList = '['
        for i, var in enumerate(app.outputFiles):
            foutList += '%s,' % var
        for i, var in enumerate(app.inputFiles):
            finList += '%s,' % var
        if len(foutList) != 1:
            foutList = '%s]' % foutList[:
                                        -1]  #remove final comma, add close bracket
        if len(finList) != 1:
            finList = '%s]' % finList[:
                                      -1]  #remove final comma, add close bracket

        args = ''
        args += '-g lcg '
        args += '-v %s ' % (app.ratVersion)
        args += '-e %s ' % (app.softwareEnvironment)
        args += '-d %s ' % (app.outputDir)
        args += '-o %s ' % (foutList)
        args += '-x %s ' % (app.inputDir)
        args += '-i %s ' % (finList)
        if app.ratMacro != '':
            args += '-m %s ' % (macroFile)
        else:
            args += '-k -m %s ' % (prodFile)
        if app.useDB:
            args += '--dbuser %s ' % (app.config['rat_db_user'])
            args += '--dbpassword %s ' % (app.config['rat_db_pswd'])
            args += '--dbname %s ' % (app.config['rat_db_name'])
            args += '--dbprotocol %s ' % (app.config['rat_db_protocol'])
            args += '--dburl %s ' % (app.config['rat_db_url'])
        if app.discardOutput:
            args += '--nostore '

        wrapperArgs = ['-a', '"%s"' % (args)]
        wrapperArgs += ['ratProdRunner.py', 'lcg']

        app._getParent().inputsandbox.append('%s/ratProdRunner.py' %
                                             _app_directory)

        return LCGJobConfig(File('%s/sillyPythonWrapper.py' % _app_directory),
                            inputbox=app._getParent().inputsandbox,
                            outputbox=app._getParent().outputsandbox,
                            args=wrapperArgs)
Ejemplo n.º 19
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("TagPrepareLocalRTHandler prepare called, %s", job.id)

        # prepare inputdata
        input_files = []
        input_guids = []

        if job.inputdata:

            # check for subjobs
            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'DQ2Dataset':
                    raise ApplicationConfigurationError(
                        None, 'Cannot use DQ2Dataset with a local job')
            else:
                if job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'DQ2Dataset':
                    raise ApplicationConfigurationError(
                        None, 'Cannot use DQ2Dataset with a local job')

        if job.outputdata:
            raise ApplicationConfigurationError(
                None, 'No outputdata required for TagPrepare job.')

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        # prepare inputsandbox
        inputbox = [File(os.path.join(__athdirectory__, 'athena-utility.sh'))]
        if input_files:
            _append_file_buffer(inputbox, 'input_files', input_files)

        exe = os.path.join(__directory__, 'run-tagprepare-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment = jobmasterconfig.env.copy()

        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(
            FileBuffer('athena_options', environment['ATHENA_OPTIONS'] + '\n'))

        # append a property for monitoring to the jobconfig of subjobs
        lcg_config = StandardJobConfig(File(exe), inputbox, [], outputbox,
                                       environment)
        return lcg_config
Ejemplo n.º 20
0
def _append_files(inputbox, *names):

    for name in names:
        inputbox.append(File(os.path.join(__directory__, name)))
Ejemplo n.º 21
0
    def configure(self,masterappconfig):
        '''Configure method, called once per job.
        '''
        logger.debug('RAT::RATUser configure ...')

        job = self._getParent()
        masterjob = job._getParent()

        #Critical options:
        # - ratMacro 
        # - outputDirectory
        # - ratBaseVersion
        #If these aren't defined, don't let the user submit the job
        #Note, the ratMacro can be defined in the subjob...
        if self.ratMacro!=None:
            job.inputsandbox.append(File(self.ratMacro))
        else:
            logger.error('Rat macro not defined')
            raise Exception
        if self.ratBaseVersion=='':
            logger.error('Error: must give a rat base (fixed release) version number')
            raise Exception
        if not self.outputFile:
            logger.error('No output file defined!')   #checks if output file set in command line
            raise Exception 
        if RATUtil.check_command(self.ratMacro,['/rat/procset','file']):
            logger.error('Output file should only be defined via the command line and not in the macro!')  #checks if output file si set in the macro
            raise Exception
        if self.outputFile:
            if not RATUtil.check_command(self.ratMacro,['/rat/proclast','outroot']) and not RATUtil.check_command(self.ratMacro,['/rat/proclast','outntuple']) and not RATUtil.check_command(self.ratMacro,['/rat/proclast','outsoc']):
                logger.error('Have specified an output file, but no root, ntuple or soc processor present in macro')  #checks for putroot processor (needs to be there regardless where the output file is defined)
                raise Exception
        if not self.nEvents and not self.tRun:
            logger.error('Need to specifiy either the number of events or the duration of run! No number of events or run duration defined!') #checks if number of events or run duration is set in command line
            raise Exception
        if self.nEvents and self.tRun:
            logger.error('Cannot specify number of events and the duration of run!')
            raise Exception
        if not RATUtil.check_command(self.ratMacro,['/rat/run/start','']):
            logger.error('/rat/run/start must be set in the macro but no number of events should be specified! Number of events should only be defined via the command line!')  #check if the /rat/run/start command is set
            raise Exception
        if self.inputFile:
            if RATUtil.checkText(self.ratMacro,['inroot/read']):
                logger.error('Cannot specify inputFile in Ganga job if "/rat/inroot/read" line is present in macro')
                raise Exception
        if self.useDB:
            if not config['rat_db_pswd']:
                logger.error('Need a password in order to contact the ratdb database')
                raise Exception

        #Always run rat with a log called rat.log
        job.outputsandbox.append('rat.log')
        job.outputsandbox.append('return_card.js')

        if self.ratVersion!=None:
            #download the code locally
            #only uses the main SNO+ rat branch for now
            #need to add pkl object to inform which branch we have and add others when required
            self.zipFileName = RATUtil.make_rat_snapshot(self.ratFork, self.ratVersion, self.versionUpdate, os.path.expanduser(config['cacheDir']))
            job.inputsandbox.append(File(self.zipFileName))

        #all args have to be str/file - force rat base version to be a string
        self.ratBaseVersion=str(self.ratBaseVersion)

        return(None,None)
Ejemplo n.º 22
0
    def master_prepare(self, app, appconfig):
        """Prepare the master job"""

        job = app._getParent()  # Returns job or subjob object

        logger.debug("AthenaLocalRTHandler master_prepare called, %s", job.id)

        if job._getRoot().subjobs:
            jobid = "%d" % (job._getRoot().id)
        else:
            jobid = "%d" % job.id

        # Generate output dataset name
        if job.outputdata:
            if job.outputdata._name == 'DQ2OutputDataset':
                dq2_datasetname = job.outputdata.datasetname
                dq2_isGroupDS = job.outputdata.isGroupDS
                dq2_groupname = job.outputdata.groupname
            else:
                dq2_datasetname = ''
                dq2_isGroupDS = False
                dq2_groupname = ''
            self.output_datasetname, self.output_lfn = dq2outputdatasetname(
                dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname)

        # Expand Athena jobOptions
        if not app.option_file and not app.command_line:
            raise ConfigError(
                "j.application.option_file='' - No Athena jobOptions files specified."
            )

        athena_options = ''
        inputbox = [
            File(os.path.join(os.path.dirname(__file__), 'athena-utility.sh'))
        ]
        if app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']:

            for option_file in app.option_file:
                athena_options += ' ' + os.path.basename(option_file.name)
                inputbox += [File(option_file.name)]

            athena_options += ' %s ' % app.options

        else:
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options = app.options + ' ' + athena_options
                inputbox += [File(option_file.name)]

            if app.command_line:
                athena_options = app.command_line

        athena_usersetupfile = os.path.basename(app.user_setupfile.name)

        #       prepare input sandbox

        if app.user_setupfile.name: inputbox += [File(app.user_setupfile.name)]
        #CN: added extra test for TNTJobSplitter
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset', 'ATLASTier3Dataset'
        ] or (job._getRoot().splitter
              and job._getRoot().splitter._name == 'TNTJobSplitter'):
            _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
            _append_files(inputbox, 'dq2_get')
            _append_files(inputbox, 'dq2info.tar.gz')
            _append_files(inputbox, 'libdcap.so')

        if job.inputdata and job.inputdata._name == 'ATLASDataset':
            if job.inputdata.lfc:
                _append_files(inputbox, 'ganga-stagein-lfc.py')
            else:
                _append_files(inputbox, 'ganga-stagein.py')

        ## insert more scripts to inputsandbox for FileStager
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset'
        ] and job.inputdata.type in ['FILE_STAGER']:
            _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py',
                          'fs-copy.py')

        if not 'getstats.py' in [
                os.path.basename(file.name) for file in inputbox
        ]:
            _append_files(inputbox, 'getstats.py')

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            if not job.outputdata.location:
                raise ApplicationConfigurationError(
                    None,
                    'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !'
                )
            if not File(
                    os.path.join(os.path.dirname(__file__),
                                 'ganga-stage-in-out-dq2.py')) in inputbox:
                _append_files(inputbox, 'ganga-stage-in-out-dq2.py')
                _append_files(inputbox, 'dq2info.tar.gz')
                _append_files(inputbox, 'libdcap.so')
            _append_files(inputbox, 'ganga-joboption-parse.py')

        if job.inputsandbox:
            for file in job.inputsandbox:
                inputbox += [file]
        if app.user_area.name:
            if app.is_prepared is True:
                inputbox += [File(app.user_area.name)]
            else:
                inputbox += [
                    File(
                        os.path.join(
                            os.path.join(shared_path, app.is_prepared.name),
                            os.path.basename(app.user_area.name)))
                ]
        if app.group_area.name and string.find(app.group_area.name,
                                               "http") < 0:
            if app.is_prepared is True:
                inputbox += [File(app.group_area.name)]
            else:
                inputbox += [
                    File(
                        os.path.join(
                            os.path.join(shared_path, app.is_prepared.name),
                            os.path.basename(app.group_area.name)))
                ]


#       prepare environment

        try:
            atlas_software = config['ATLAS_SOFTWARE']
        except ConfigError:
            raise ConfigError(
                'No default location of ATLAS_SOFTWARE specified in the configuration.'
            )

        if app.atlas_release == '' and app.atlas_project != "AthAnalysisBase":
            raise ApplicationConfigurationError(
                None,
                'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.'
            )

        environment = {
            'ATLAS_RELEASE': app.atlas_release,
            'ATHENA_OPTIONS': athena_options,
            'ATLAS_SOFTWARE': atlas_software,
            'ATHENA_USERSETUPFILE': athena_usersetupfile,
            'ATLAS_PROJECT': app.atlas_project,
            'ATLAS_EXETYPE': app.atlas_exetype,
            'GANGA_VERSION': configSystem['GANGA_VERSION'],
            'DQ2_SETUP_SCRIPT': configDQ2['setupScript']
        }

        # Set athena architecture: 32 or 64 bit
        environment['ATLAS_ARCH'] = '32'
        cmtconfig = app.atlas_cmtconfig
        if cmtconfig.find('x86_64') >= 0:
            environment['ATLAS_ARCH'] = '64'

        environment['ATLAS_CMTCONFIG'] = app.atlas_cmtconfig
        environment['DCACHE_RA_BUFFER'] = str(config['DCACHE_RA_BUFFER'])

        if app.atlas_environment:
            for var in app.atlas_environment:
                vars = var.split('=')
                if len(vars) == 2:
                    environment[vars[0]] = vars[1]

        if app.atlas_production and (app.atlas_project == 'AtlasPoint1'
                                     or app.atlas_release.find('12.') <= 0):
            environment['ATLAS_PRODUCTION'] = app.atlas_production

        if app.user_area.name:
            environment['USER_AREA'] = os.path.basename(app.user_area.name)
        if app.group_area.name:
            if string.find(app.group_area.name, "http") >= 0:
                environment['GROUP_AREA_REMOTE'] = "%s" % (app.group_area.name)
            else:
                environment['GROUP_AREA'] = os.path.basename(
                    app.group_area.name)

        if app.max_events:
            if (app.max_events != -999) and (app.max_events > -2):
                environment['ATHENA_MAX_EVENTS'] = str(app.max_events)

        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if job.inputdata.type not in ['LOCAL']:

                try:
                    environment['X509CERTDIR'] = os.environ['X509_CERT_DIR']
                except KeyError:
                    environment['X509CERTDIR'] = ''

                try:
                    proxy = os.environ['X509_USER_PROXY']
                except KeyError:
                    proxy = '/tmp/x509up_u%s' % os.getuid()

                REMOTE_PROXY = '%s:%s' % (socket.getfqdn(), proxy)
                environment['REMOTE_PROXY'] = REMOTE_PROXY

                try:
                    environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
                except:
                    pass

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                datasetname = job.inputdata.dataset
                environment['DATASETNAME'] = ':'.join(datasetname)
                environment['DATASETLOCATION'] = ':'.join(
                    job.inputdata.get_locations())
                environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
                environment['DQ2_URL_SERVER_SSL'] = configDQ2[
                    'DQ2_URL_SERVER_SSL']
                #environment['DATASETTYPE']=job.inputdata.type
                # At present, DQ2 download is the only thing that works
                environment['DATASETTYPE'] = "DQ2_DOWNLOAD"
                if job.inputdata.accessprotocol:
                    environment[
                        'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol

                try:
                    environment['X509CERTDIR'] = os.environ['X509_CERT_DIR']
                except KeyError:
                    environment['X509CERTDIR'] = ''

                try:
                    proxy = os.environ['X509_USER_PROXY']
                except KeyError:
                    proxy = '/tmp/x509up_u%s' % os.getuid()

                REMOTE_PROXY = '%s:%s' % (socket.getfqdn(), proxy)
                environment['REMOTE_PROXY'] = REMOTE_PROXY
                try:
                    environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
                except:
                    pass

            else:
                raise ConfigError(
                    "j.inputdata.dataset='' - DQ2 dataset name needs to be specified."
                )

            if job.inputdata.tagdataset:
                environment['TAGDATASETNAME'] = ':'.join(
                    job.inputdata.tagdataset)

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            try:
                environment['X509CERTDIR'] = os.environ['X509_CERT_DIR']
            except KeyError:
                environment['X509CERTDIR'] = ''
            try:
                proxy = os.environ['X509_USER_PROXY']
            except KeyError:
                proxy = '/tmp/x509up_u%s' % os.getuid()

            REMOTE_PROXY = '%s:%s' % (socket.getfqdn(), proxy)
            environment['REMOTE_PROXY'] = REMOTE_PROXY
            try:
                environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
            except:
                pass

        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
            elif job.backend.extraopts.find('site=zn') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2[
                    'DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh')

        #       output sandbox
        outputbox = []
        outputGUIDs = 'output_guids'
        outputLOCATION = 'output_location'
        outputDATA = 'output_data'
        outputbox.append(outputGUIDs)
        outputbox.append(outputLOCATION)
        outputbox.append(outputDATA)
        outputbox.append('stats.pickle')
        if (job.outputsandbox):
            for file in job.outputsandbox:
                outputbox += [file]

        ## retrieve the FileStager log
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset'
        ] and job.inputdata.type in ['FILE_STAGER']:
            outputbox += ['FileStager.out', 'FileStager.err']

        # Switch for DEBUG print-out in logfiles
        if app.useNoDebugLogs:
            environment['GANGA_LOG_DEBUG'] = '0'
        else:
            environment['GANGA_LOG_DEBUG'] = '1'

        return StandardJobConfig(File(exe), inputbox, [], outputbox,
                                 environment)
Ejemplo n.º 23
0
    def master_prepare(self, app, appmasterconfig):

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler master_prepare called for %s',
                     job.getFQID('.'))
        usertag = configDQ2['usertag']
        #usertag='user09'
        nickname = getNickname(allowMissingNickname=True)
        self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % (
            usertag, nickname, commands.getoutput('hostname').split('.')[0],
            int(time.time()), job.id)
        #        self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity())
        sources = 'sources.%s.tar.gz' % commands.getoutput(
            'uuidgen 2> /dev/null')
        self.library = '%s.lib.tgz' % self.libDataset

        # check DBRelease
        # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1:
        #   raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'")

        #       unpack library
        logger.debug('Creating source tarball ...')
        tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null')
        os.mkdir(tmpdir)

        inputbox = []
        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [File(app.transform_archive)]
        if app.evgen_job_option:
            self.evgen_job_option = app.evgen_job_option
            if os.path.exists(app.evgen_job_option):
                # locally modified job option file to add to the input sand box
                inputbox += [File(app.evgen_job_option)]
                self.evgen_job_option = app.evgen_job_option.split("/")[-1]

#       add input sandbox files
        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [file]
#        add option files
        for extFile in job.backend.extOutFile:
            try:
                shutil.copy(extFile, tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(extFile, tmpdir)
#       fill the archive
        for opt_file in inputbox:
            try:
                shutil.copy(opt_file.name, tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(opt_file.name, tmpdir)
#       now tar it up again

        inpw = job.getInputWorkspace()
        rc, output = commands.getstatusoutput('tar czf %s -C %s .' %
                                              (inpw.getPath(sources), tmpdir))
        if rc:
            logger.error('Packing sources failed with status %d', rc)
            logger.error(output)
            raise ApplicationConfigurationError(None,
                                                'Packing sources failed.')

        shutil.rmtree(tmpdir)

        #       upload sources

        logger.debug('Uploading source tarball ...')
        try:
            cwd = os.getcwd()
            os.chdir(inpw.getPath())
            rc, output = Client.putFile(sources)
            if output != 'True':
                logger.error('Uploading sources %s failed. Status = %d',
                             sources, rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Uploading archive failed')
        finally:
            os.chdir(cwd)

        # Use Panda's brokerage
##         if job.inputdata and len(app.sites)>0:
##             # update cloud, use inputdata's
##             from dq2.info.TiersOfATLAS import whichCloud,ToACache
##             inclouds=[]
##             for site in app.sites:
##                 cloudSite=whichCloud(app.sites[0])
##                 if cloudSite not in inclouds:
##                     inclouds.append(cloudSite)
##             # now converting inclouds content into proper brokering stuff.
##             outclouds=[]
##             for cloudSite in inclouds:
##                 for cloudID, eachCloud in ToACache.dbcloud.iteritems():
##                     if cloudSite==eachCloud:
##                         cloud=cloudID
##                         outclouds.append(cloud)
##                         break

##             print outclouds
##             # finally, matching with user's wishes
##             if len(outclouds)>0:
##                 if not job.backend.requirements.cloud: # no user wish, update
##                     job.backend.requirements.cloud=outclouds[0]
##                 else:
##                     try:
##                         assert job.backend.requirements.cloud in outclouds
##                     except:
##                         raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds)))

        from GangaPanda.Lib.Panda.Panda import runPandaBrokerage

        runPandaBrokerage(job)

        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(
                None, 'site is still AUTO after brokerage!')

        # output dataset preparation and registration
        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
        except:
            raise ApplicationConfigurationError(
                None,
                "Could not extract output dataset location from job.backend.site value: %s. Aborting"
                % job.backend.site)
        if not app.dryrun:
            for outtype in app.outputpaths.keys():
                dset = string.replace(app.outputpaths[outtype], "/", ".")
                dset = dset[1:]
                # dataset registration must be done only once.
                print "registering output dataset %s at %s" % (dset,
                                                               outDsLocation)
                try:
                    Client.addDataset(dset, False, location=outDsLocation)
                    dq2_set_dataset_lifetime(dset, location=outDsLocation)
                except:
                    raise ApplicationConfigurationError(
                        None,
                        "Fail to create output dataset %s. Aborting" % dset)
            # extend registration to build job lib dataset:
            print "registering output dataset %s at %s" % (self.libDataset,
                                                           outDsLocation)

            try:
                Client.addDataset(self.libDataset,
                                  False,
                                  location=outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, outDsLocation)
            except:
                raise ApplicationConfigurationError(
                    None, "Fail to create output dataset %s. Aborting" %
                    self.libDataset)

        ###
        cacheVer = "-AtlasProduction_" + str(app.prod_release)

        logger.debug("master job submit?")
        self.outsite = job.backend.site
        if app.se_name and app.se_name != "none" and not self.outsite:
            self.outsite = app.se_name

        #       create build job
        jspec = JobSpec()
        jspec.jobDefinitionID = job.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel
        jspec.homepackage = 'AnalysisTransforms' + cacheVer  #+nightVer
        jspec.transformation = '%s/buildJob-00-00-03' % Client.baseURLSUB  # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare()
        jspec.destinationDBlock = self.libDataset
        jspec.destinationSE = job.backend.site
        jspec.prodSourceLabel = 'panda'
        jspec.assignedPriority = 2000
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        #        jspec.jobParameters     = self.args not known yet
        jspec.jobParameters = '-o %s' % (self.library)
        if app.userarea:
            print app.userarea
            jspec.jobParameters += ' -i %s' % (os.path.basename(app.userarea))
        else:
            jspec.jobParameters += ' -i %s' % (sources)
        jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)

        matchURL = re.search('(http.*://[^/]+)/', Client.baseURLSSL)
        if matchURL:
            jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1)

        fout = FileSpec()
        fout.lfn = self.library
        fout.type = 'output'
        fout.dataset = self.libDataset
        fout.destinationDBlock = self.libDataset
        jspec.addFile(fout)

        flog = FileSpec()
        flog.lfn = '%s.log.tgz' % self.libDataset
        flog.type = 'log'
        flog.dataset = self.libDataset
        flog.destinationDBlock = self.libDataset
        jspec.addFile(flog)
        #print "MASTER JOB DETAILS:",jspec.jobParameters

        return jspec