Example #1
0
 def __init__(self, enforceEnter=False, verbose=False, restoreDB=False):
     # verbose
     self.verbose = verbose
     # restore database
     self.restoreDB = restoreDB
     # initialize database
     PdbUtils.initialzieDB(self.verbose, self.restoreDB)
     # check proxy
     self.gridPassPhrase, self.vomsFQAN = PsubUtils.checkGridProxy(
         '', enforceEnter, self.verbose)
Example #2
0
 def finish(self,JobID,soft=False):
     # get logger
     tmpLog = PLogger.getPandaLogger()
     # check proxy
     self.gridPassPhrase,self.vomsFQAN = PsubUtils.checkGridProxy(
             self.gridPassPhrase,
             False,
             self.verbose,
             useCache=True)
     # force update just in case
     self.status(JobID,True)
     # get jobset
     jobList = self.getJobIDsWithSetID(JobID)
     if jobList == None:
         # works only for jobsetID
         if useJobsetID:
             return
         # works with jobID
         jobList = [JobID]
     else:
         tmpMsg = "ID=%s is composed of JobID=" % JobID
         for tmpJobID in jobList:
             tmpMsg += '%s,' % tmpJobID
         tmpMsg = tmpMsg[:-1]
         tmpLog.info(tmpMsg)
     for tmpJobID in jobList:    
         # get job info from local repository
         job = self.getJobInfo(tmpJobID)
         if job == None:
             tmpLog.warning("JobID=%s not found in local repository. Synchronization may be needed" % tmpJobID)            
             continue
         # skip frozen job
         if job.dbStatus == 'frozen':
             tmpLog.info('All subJobs in JobID=%s already finished/failed' % tmpJobID)
             continue
         # finish JEDI task
         tmpLog.info('Sending finishTask command ...')
         status,output = Client.finishTask(job.jediTaskID,soft,self.verbose)
         # communication error
         if status != 0:
             tmpLog.error(output)
             tmpLog.error("Failed to finish JobID=%s" % tmpJobID)
             return False
         tmpStat,tmpDiag = output
         if not tmpStat:
             tmpLog.error(tmpDiag)
             tmpLog.error("Failed to finish JobID=%s" % tmpJobID)
             return False
         tmpLog.info(tmpDiag)
     # done
     tmpLog.info('Done. TaskID=%s will be finished soon' % job.jediTaskID)
     return True
Example #3
0
 def kill(self, JobID):
     # get logger
     tmpLog = PLogger.getPandaLogger()
     # check proxy
     self.gridPassPhrase, self.vomsFQAN = PsubUtils.checkGridProxy(
         self.gridPassPhrase, False, self.verbose)
     # get jobset
     jobList = self.getJobIDsWithSetID(JobID)
     if jobList == None:
         jobList = [JobID]
     else:
         tmpMsg = "JobsetID=%s is composed of JobID=" % JobID
         for tmpJobID in jobList:
             tmpMsg += '%s,' % tmpJobID
         tmpMsg = tmpMsg[:-1]
         tmpLog.info(tmpMsg)
     for tmpJobID in jobList:
         # get job info from local repository
         job = self.getJobInfo(tmpJobID)
         if job == None:
             tmpLog.warning(
                 "JobID=%s not found in local repository. Synchronization may be needed"
                 % tmpJobID)
             continue
         # skip frozen job
         if job.dbStatus == 'frozen':
             tmpLog.info('All subJobs in JobID=%s already finished/failed' %
                         tmpJobID)
             continue
         # get PandaID list
         killJobs = job.PandaID.split(',')
         # kill
         tmpLog.info('Sending kill command ...')
         status, output = Client.killJobs(killJobs, self.verbose)
         if status != 0:
             tmpLog.error(output)
             tmpLog.error("Failed to kill JobID=%s" % tmpJobID)
             return False
         # update database
         job.commandToPilot = 'tobekilled'
         # update DB
         try:
             PdbUtils.updateJobDB(job, self.verbose)
         except:
             tmpLog.error("Failed to update local repository for JobID=%s" %
                          tmpJobID)
             return False
         # done
         tmpLog.info('Done. JobID=%s will be killed in 30min' % tmpJobID)
     return True
 def __init__(self):
     super(PandaRequirements,self).__init__()
     from pandatools import PsubUtils
     import sys
     sys.stdout = open('/dev/null','w')
     sys.stderr = open('/dev/null','w')
     self.cloud = PsubUtils.getCloudUsingFQAN(None,False)
     sys.stdout = sys.__stdout__
     sys.stderr = sys.__stderr__
     if not self.cloud:
         import random
         from pandatools import Client
         clouds = Client.PandaClouds.keys()
         clouds.remove('OSG')
         self.cloud = random.choice(clouds)
Example #5
0
 def __init__(self):
     super(PandaRequirements, self).__init__()
     from pandatools import PsubUtils
     import sys
     sys.stdout = open('/dev/null', 'w')
     sys.stderr = open('/dev/null', 'w')
     self.cloud = PsubUtils.getCloudUsingFQAN(None, False)
     sys.stdout = sys.__stdout__
     sys.stderr = sys.__stderr__
     if not self.cloud:
         import random
         from pandatools import Client
         clouds = Client.PandaClouds.keys()
         clouds.remove('OSG')
         self.cloud = random.choice(clouds)
Example #6
0
 def __init__(self,enforceEnter=False,verbose=False,restoreDB=False):
     # verbose
     self.verbose = verbose
     # restore database
     self.restoreDB = restoreDB
     # initialize database
     PdbUtils.initialzieDB(self.verbose,self.restoreDB)
     # check proxy
     self.gridPassPhrase,self.vomsFQAN = PsubUtils.checkGridProxy(
             '',
             enforceEnter,
             self.verbose,
             useCache=True)
     # map between jobset and jediTaskID
     self.jobsetTaskMap = {}
Example #7
0
 def status(self, JobID, forceUpdate=False):
     # get logger
     tmpLog = PLogger.getPandaLogger()
     # check proxy
     self.gridPassPhrase, self.vomsFQAN = PsubUtils.checkGridProxy(
         self.gridPassPhrase, False, self.verbose)
     # get job info from local repository
     job = self.getJobInfo(JobID)
     if job == None:
         # not found
         return None
     # update if needed
     if job.dbStatus != 'frozen' or forceUpdate:
         tmpLog.info("Getting status for JobID=%s ..." % JobID)
         # get status from Panda server
         status, pandaIDstatus = Client.getPandIDsWithJobID(
             JobID, verbose=self.verbose)
         if status != 0:
             tmpLog.error("Failed to get status for JobID=%s" % JobID)
             return None
         # get one job to set computingSite which may have changed due to rebrokerage
         pandaJob = None
         if pandaIDstatus != {}:
             tmpPandaIDs = pandaIDstatus.keys()
             tmpPandaIDs.sort()
             status, tmpPandaJobs = Client.getFullJobStatus(
                 tmpPandaIDs[:1], verbose=self.verbose)
             if status != 0:
                 tmpLog.error("Failed to get PandaJobs for %s" % JobID)
                 return None
             pandaJob = tmpPandaJobs[0]
         # convert to local job spec
         job = PdbUtils.convertPtoD([],
                                    pandaIDstatus,
                                    job,
                                    pandaJobForSiteID=pandaJob)
         # update DB
         try:
             PdbUtils.updateJobDB(job, self.verbose)
         except:
             tmpLog.error("Failed to update local repository for JobID=%s" %
                          JobID)
             return None
         tmpLog.info("Updated JobID=%s" % JobID)
     # return
     return job
Example #8
0
 def debug(self,pandaID,modeOn):
     # get logger
     tmpLog = PLogger.getPandaLogger()
     # check proxy
     self.gridPassPhrase,self.vomsFQAN = PsubUtils.checkGridProxy(
         self.gridPassPhrase,
         False,
         self.verbose,
         useCache=True)
     # rebrokerage
     status,output = Client.setDebugMode(pandaID,modeOn,self.verbose)
     if status != 0:
         tmpLog.error(output)
         tmpLog.error("Failed to set debug mode for %s" % pandaID)
         return
     # done
     tmpLog.info(output)
     return
Example #9
0
    def __init__(self):
        super(JediRequirements,self).__init__()
        from pandatools import PsubUtils
        import sys
        sys.stdout = open('/dev/null','w')
        sys.stderr = open('/dev/null','w')

        # cache the call to getCloudUsingFQAN as otherwise it can be slow
        if JediRequirements._defcloud == '':
            JediRequirements._defcloud = PsubUtils.getCloudUsingFQAN(None,False)
        self.cloud = self._defcloud

        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
        if not self.cloud:
            import random
            from pandatools import Client
            clouds = Client.PandaClouds.keys()
            clouds.remove('OSG')
            self.cloud = random.choice(clouds)
Example #10
0
    def __init__(self):
        super(JediRequirements, self).__init__()
        from pandatools import PsubUtils
        import sys
        sys.stdout = open('/dev/null', 'w')
        sys.stderr = open('/dev/null', 'w')

        # cache the call to getCloudUsingFQAN as otherwise it can be slow
        if JediRequirements._defcloud == '':
            JediRequirements._defcloud = PsubUtils.getCloudUsingFQAN(
                None, False)
        self.cloud = self._defcloud

        sys.stdout = sys.__stdout__
        sys.stderr = sys.__stderr__
        if not self.cloud:
            import random
            from pandatools import Client
            clouds = Client.PandaClouds.keys()
            clouds.remove('OSG')
            self.cloud = random.choice(clouds)
Example #11
0
 def rebrokerage(self,JobsetID,cloud):
     # get logger
     tmpLog = PLogger.getPandaLogger()
     # check proxy
     self.gridPassPhrase,self.vomsFQAN = PsubUtils.checkGridProxy(
             self.gridPassPhrase,    
             False,
             self.verbose,
             useCache=True)
     # get jobset
     jobList = self.getJobIDsWithSetID(JobsetID)
     if jobList == None:
         jobList = [JobsetID]
     else:
         tmpMsg = "JobsetID=%s is composed of JobID=" % JobsetID
         for tmpJobID in jobList:
             tmpMsg += '%s,' % tmpJobID
         tmpMsg = tmpMsg[:-1]
         tmpLog.info(tmpMsg)
     for JobID in jobList:    
         # get job info using status
         job = self.status(JobID)
         if job == None:
             # not found
             continue
         # skip frozen job
         if job.dbStatus == 'frozen':
             tmpLog.info('All subJobs in JobID=%s already finished/failed' % JobID)
             continue
     # rebrokerage
     tmpLog.info('Sending rebrokerage request ...')
     status,output = Client.runReBrokerage(JobID,job.libDS,cloud,self.verbose)
     if status != 0:
         tmpLog.error(output)
         tmpLog.error("Failed to reassign JobID=%s" % JobID)
         return
     # done
     tmpLog.info('Done for %s' % JobID)
     return
Example #12
0
    def rebrokerage(self, JobsetID, cloud):
        # get logger
        tmpLog = PLogger.getPandaLogger()
        # check proxy
        self.gridPassPhrase, self.vomsFQAN = PsubUtils.checkGridProxy(
            self.gridPassPhrase, False, self.verbose)
        # get jobset
        jobList = self.getJobIDsWithSetID(JobsetID)
        if jobList == None:
            jobList = [JobsetID]
        else:
            tmpMsg = "JobsetID=%s is composed of JobID=" % JobsetID
            for tmpJobID in jobList:
                tmpMsg += '%s,' % tmpJobID
            tmpMsg = tmpMsg[:-1]
            tmpLog.info(tmpMsg)
        for JobID in jobList:
            # get job info using status
            job = self.status(JobID)
            if job == None:
                # not found
                continue
# skip frozen job
            if job.dbStatus == 'frozen':
                tmpLog.info('All subJobs in JobID=%s already finished/failed' %
                            JobID)
                continue
# rebrokerage
            tmpLog.info('Sending rebrokerage request ...')
            status, output = Client.runReBrokerage(JobID, job.libDS, cloud,
                                                   self.verbose)
            if status != 0:
                tmpLog.error(output)
                tmpLog.error("Failed to reassign JobID=%s" % JobID)
                return
            # done
            tmpLog.info('Done for %s' % JobID)
        return
Example #13
0
    def master_prepare(self,app,appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from pandatools import MiscUtils
        from pandatools import AthenaUtils
        from pandatools import PsubUtils

        # create a random number for this submission to allow multiple use of containers
        self.rndSubNum = random.randint(1111,9999)

        job = app._getParent()
        logger.debug('AthenaJediRTHandler master_prepare called for %s', job.getFQID('.')) 

        if app.useRootCoreNoBuild:
            logger.info('Athena.useRootCoreNoBuild is True, setting Panda.nobuild=True.')
            job.backend.nobuild = True

        if job.backend.bexec and job.backend.nobuild:
            raise ApplicationConfigurationError("Contradicting options: job.backend.bexec and job.backend.nobuild are both enabled.")

        if job.backend.requirements.rootver != '' and job.backend.nobuild:
            raise ApplicationConfigurationError("Contradicting options: job.backend.requirements.rootver given and job.backend.nobuild are enabled.")
        
        # Switch on compilation flag if bexec is set or libds is empty
        if job.backend.bexec != '' or not job.backend.nobuild:
            app.athena_compile = True
            for sj in job.subjobs:
                sj.application.athena_compile = True
            logger.info('"job.backend.nobuild=False" or "job.backend.bexec" is set - Panda build job is enabled.')

        if job.backend.nobuild:
            app.athena_compile = False
            for sj in job.subjobs:
                sj.application.athena_compile = False
            logger.info('"job.backend.nobuild=True" or "--nobuild" chosen - Panda build job is switched off.')

        # check for auto datri
        if job.outputdata.location != '':
            if not PsubUtils.checkDestSE(job.outputdata.location,job.outputdata.datasetname,False):
                raise ApplicationConfigurationError("Problems with outputdata.location setting '%s'" % job.outputdata.location)

        # validate application
        if not app.atlas_release and not job.backend.requirements.rootver and not app.atlas_exetype in [ 'EXE' ]:
            raise ApplicationConfigurationError("application.atlas_release is not set. Did you run application.prepare()")

        self.dbrelease = app.atlas_dbrelease
        if self.dbrelease != '' and self.dbrelease != 'LATEST' and self.dbrelease.find(':') == -1:
            raise ApplicationConfigurationError("ERROR : invalid argument for DB Release. Must be 'LATEST' or 'DatasetName:FileName'")

        self.runConfig = AthenaUtils.ConfigAttr(app.atlas_run_config)
        for k in self.runConfig.keys():
            self.runConfig[k]=AthenaUtils.ConfigAttr(self.runConfig[k])
        if not app.atlas_run_dir:
            raise ApplicationConfigurationError("application.atlas_run_dir is not set. Did you run application.prepare()")
 
        self.rundirectory = app.atlas_run_dir
        self.cacheVer = ''
        if app.atlas_project and app.atlas_production:
            self.cacheVer = "-" + app.atlas_project + "_" + app.atlas_production

        # handle different atlas_exetypes
        self.job_options = ''
        if app.atlas_exetype == 'TRF':
            self.job_options += ' '.join([os.path.basename(fopt.name) for fopt in app.option_file])

            #if not job.outputdata.outputdata:
            #    raise ApplicationConfigurationError("job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend")
            #raise ApplicationConfigurationError("Sorry TRF on Panda backend not yet supported")

            if app.options:
                self.job_options += ' %s ' % app.options
                
        elif app.atlas_exetype == 'ATHENA':
            
            if len(app.atlas_environment) > 0 and app.atlas_environment[0].find('DBRELEASE_OVERRIDE')==-1:
                logger.warning("Passing of environment variables to Athena using Panda not supported. Ignoring atlas_environment setting.")
                
            if job.outputdata.outputdata:
                raise ApplicationConfigurationError("job.outputdata.outputdata must be empty if atlas_exetype='ATHENA' and Panda backend is used (outputs are auto-detected)")
            if app.options:
                if app.options.startswith('-c'):
                    self.job_options += ' %s ' % app.options
                else:
                    self.job_options += ' -c %s ' % app.options

                logger.warning('The value of j.application.options has been prepended with " -c " ')
                logger.warning('Please make sure to use proper quotes for the values of j.application.options !')

            self.job_options += ' '.join([os.path.basename(fopt.name) for fopt in app.option_file])

            # check for TAG compression
            if 'subcoll.tar.gz' in app.append_to_user_area:
                self.job_options = ' uncompress.py ' + self.job_options
                
        elif app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']:

            #if not job.outputdata.outputdata:
            #    raise ApplicationConfigurationError("job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend")
            self.job_options += ' '.join([os.path.basename(fopt.name) for fopt in app.option_file])

            # sort out environment variables
            env_str = ""
            if len(app.atlas_environment) > 0:
                for env_var in app.atlas_environment:
                    env_str += "export %s ; " % env_var
            else: 
                env_str = ""

            # below fixes issue with runGen -- job_options are executed by os.system when dbrelease is used, and by the shell otherwise
            ## - REMOVED FIX DUE TO CHANGE IN PILOT - MWS 8/11/11
            if job.backend.requirements.usecommainputtxt:
                input_str = '/bin/echo %IN > input.txt; cat input.txt; '
            else:
                input_str = '/bin/echo %IN | sed \'s/,/\\\n/g\' > input.txt; cat input.txt; '
            if app.atlas_exetype == 'PYARA':
                self.job_options = env_str + input_str + ' python ' + self.job_options
            elif app.atlas_exetype == 'ARES':
                self.job_options = env_str + input_str + ' athena.py ' + self.job_options
            elif app.atlas_exetype == 'ROOT':
                self.job_options = env_str + input_str + ' root -b -q ' + self.job_options
            elif app.atlas_exetype == 'EXE':
                self.job_options = env_str + input_str + self.job_options

            if app.options:
                self.job_options += ' %s ' % app.options

        if self.job_options == '':
            raise ApplicationConfigurationError("No Job Options found!")
        logger.info('Running job options: %s'%self.job_options)

        # validate dbrelease
        if self.dbrelease != "LATEST":
            self.dbrFiles,self.dbrDsList = getDBDatasets(self.job_options,'',self.dbrelease)

        # handle the output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        # validate the output dataset name (and make it a container)
        job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname)
        if not job.outputdata.datasetname.endswith('/'):
            job.outputdata.datasetname+='/'

        # add extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)
        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # use the shared area if possible
        tmp_user_area_name = app.user_area.name
        if app.is_prepared is not True:
            from Ganga.Utility.files import expandfilename
            shared_path = os.path.join(expandfilename(getConfig('Configuration')['gangadir']),'shared',getConfig('Configuration')['user'])
            tmp_user_area_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name))


        # Add inputsandbox to user_area
        if job.inputsandbox:
            logger.warning("Submitting Panda job with inputsandbox. This may slow the submission slightly.")

            if tmp_user_area_name:
                inpw = os.path.dirname(tmp_user_area_name)
                self.inputsandbox = os.path.join(inpw, 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null'))
            else:
                inpw = job.getInputWorkspace()
                self.inputsandbox = inpw.getPath('sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null'))

            if tmp_user_area_name:
                rc, output = commands.getstatusoutput('cp %s %s.gz' % (tmp_user_area_name, self.inputsandbox))
                if rc:
                    logger.error('Copying user_area failed with status %d',rc)
                    logger.error(output)
                    raise ApplicationConfigurationError('Packing inputsandbox failed.')
                rc, output = commands.getstatusoutput('gunzip %s.gz' % (self.inputsandbox))
                if rc:
                    logger.error('Unzipping user_area failed with status %d',rc)
                    logger.error(output)
                    raise ApplicationConfigurationError('Packing inputsandbox failed.')

            for fname in [os.path.abspath(f.name) for f in job.inputsandbox]:
                fname.rstrip(os.sep)
                path = os.path.dirname(fname)
                fn = os.path.basename(fname)

                #app.atlas_run_dir
                # get Athena versions
                rc, out = AthenaUtils.getAthenaVer()
                # failed
                if not rc:
                    #raise ApplicationConfigurationError('CMT could not parse correct environment ! \n Did you start/setup ganga in the run/ or cmt/ subdirectory of your athena analysis package ?')
                    logger.warning("CMT could not parse correct environment for inputsandbox - will use the atlas_run_dir as default")
                    
                    # as we don't have to be in the run dir now, create a copy of the run_dir directory structure and use that
                    input_dir = os.path.dirname(self.inputsandbox)
                    run_path = "%s/sbx_tree/%s" % (input_dir, app.atlas_run_dir)
                    rc, output = commands.getstatusoutput("mkdir -p %s" % run_path)
                    if not rc:
                        # copy this sandbox file
                        rc, output = commands.getstatusoutput("cp %s %s" % (fname, run_path))
                        if not rc:
                            path = os.path.join(input_dir, 'sbx_tree')
                            fn = os.path.join(app.atlas_run_dir, fn)
                        else:
                            raise ApplicationConfigurationError("Couldn't copy file %s to recreate run_dir for input sandbox" % fname)
                    else:
                        raise ApplicationConfigurationError("Couldn't create directory structure to match run_dir %s for input sandbox" % run_path)

                else:
                    userarea = out['workArea']

                    # strip the path from the filename if present in the userarea
                    ua = os.path.abspath(userarea)
                    if ua in path:
                        fn = fname[len(ua)+1:]
                        path = ua

                rc, output = commands.getstatusoutput('tar -h -r -f %s -C %s %s' % (self.inputsandbox, path, fn))
                if rc:
                    logger.error('Packing inputsandbox failed with status %d',rc)
                    logger.error(output)
                    raise ApplicationConfigurationError('Packing inputsandbox failed.')

            # remove sandbox tree if created
            if "sbx_tree" in os.listdir(os.path.dirname(self.inputsandbox)):                
                rc, output = commands.getstatusoutput("rm -r %s/sbx_tree" % os.path.dirname(self.inputsandbox))
                if rc:
                    raise ApplicationConfigurationError("Couldn't remove directory structure used for input sandbox")
                
            rc, output = commands.getstatusoutput('gzip %s' % (self.inputsandbox))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
            self.inputsandbox += ".gz"
        else:
            self.inputsandbox = tmp_user_area_name

        # job name
        jobName = 'ganga.%s' % MiscUtils.wrappedUuidGen()

        # make task
        taskParamMap = {}
        # Enforce that outputdataset name ends with / for container
        if not job.outputdata.datasetname.endswith('/'):
            job.outputdata.datasetname = job.outputdata.datasetname + '/'

        taskParamMap['taskName'] = job.outputdata.datasetname

        taskParamMap['uniqueTaskName'] = True
        taskParamMap['vo'] = 'atlas'
        taskParamMap['architecture'] = AthenaUtils.getCmtConfig(athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig)
        if app.atlas_release:
            taskParamMap['transUses'] = 'Atlas-%s' % app.atlas_release
        else:
            taskParamMap['transUses'] = ''
        taskParamMap['transHome'] = 'AnalysisTransforms'+self.cacheVer#+nightVer

        configSys = getConfig('System')
        gangaver = configSys['GANGA_VERSION'].lower()
        if not gangaver:
            gangaver = "ganga"

        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['processingType'] = '{0}-jedi-athena'.format(gangaver)
        else:
            taskParamMap['processingType'] = '{0}-jedi-run'.format(gangaver)

        #if options.eventPickEvtList != '':
        #    taskParamMap['processingType'] += '-evp'
        taskParamMap['prodSourceLabel'] = 'user'
        if job.backend.site != 'AUTO':
            taskParamMap['cloud'] = Client.PandaSites[job.backend.site]['cloud']
            taskParamMap['site'] = job.backend.site
        elif job.backend.requirements.cloud != None and not job.backend.requirements.anyCloud:
            taskParamMap['cloud'] = job.backend.requirements.cloud
        if job.backend.requirements.excluded_sites != []:
            taskParamMap['excludedSite'] = expandExcludedSiteList( job )

        # if only a single site specifed, don't set includedSite
        #if job.backend.site != 'AUTO':
        #    taskParamMap['includedSite'] = job.backend.site
        #taskParamMap['cliParams'] = fullExecString
        if job.backend.requirements.noEmail:
            taskParamMap['noEmail'] = True
        if job.backend.requirements.skipScout:
            taskParamMap['skipScout'] = True
        if not app.atlas_exetype in ["ATHENA", "TRF"]: 
            taskParamMap['nMaxFilesPerJob'] = job.backend.requirements.maxNFilesPerJob
        if job.backend.requirements.disableAutoRetry:
            taskParamMap['disableAutoRetry'] = 1
        # source URL
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL)
        if matchURL != None:
            taskParamMap['sourceURL'] = matchURL.group(1)

        # dataset names
        outDatasetName = job.outputdata.datasetname
        logDatasetName = re.sub('/$','.log/',job.outputdata.datasetname)
        # log
        taskParamMap['log'] = {'dataset': logDatasetName,
                               'container': logDatasetName,
                               'type':'template',
                               'param_type':'log',
                               'value':'{0}.${{SN}}.log.tgz'.format(logDatasetName[:-1])
                               }
        # job parameters
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['jobParameters'] = [
                {'type':'constant',
                 'value': ' --sourceURL ${SURL}',
                 },
                ]
        else:
            taskParamMap['jobParameters'] = [
                {'type':'constant',
                 'value': '-j "" --sourceURL ${SURL}',
                 },
                ]

        taskParamMap['jobParameters'] += [
            {'type':'constant',
             'value': '-r {0}'.format(self.rundirectory),
             },
            ]

        # Add the --trf option to jobParameters if required
        if app.atlas_exetype == "TRF":
            taskParamMap['jobParameters'] += [{'type': 'constant', 'value': '--trf'}]

        # output
        # output files
        outMap = {}
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            outMap, tmpParamList = AthenaUtils.convertConfToOutput(self.runConfig, self.extOutFile, job.outputdata.datasetname, destination=job.outputdata.location)
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '-o "%s" ' % outMap
                 },
                ]
            taskParamMap['jobParameters'] += tmpParamList 

        else: 
            if job.outputdata.outputdata:
                for tmpLFN in job.outputdata.outputdata:
                    if len(job.outputdata.datasetname.split('.')) > 2:
                        lfn = '{0}.{1}'.format(*job.outputdata.datasetname.split('.')[:2])
                    else:
                        lfn = job.outputdata.datasetname[:-1]
                    lfn += '.$JOBSETID._${{SN/P}}.{0}'.format(tmpLFN)
                    dataset = '{0}_{1}/'.format(job.outputdata.datasetname[:-1],tmpLFN)
                    taskParamMap['jobParameters'] += MiscUtils.makeJediJobParam(lfn,dataset,'output',hidden=True, destination=job.outputdata.location)
                    outMap[tmpLFN] = lfn

                taskParamMap['jobParameters'] += [ 
                    {'type':'constant',
                     'value': '-o "{0}"'.format(str(outMap)),
                     },
                    ]

        if app.atlas_exetype in ["ATHENA"]:
            # jobO parameter
            tmpJobO = self.job_options
            # replace full-path jobOs
            for tmpFullName,tmpLocalName in AthenaUtils.fullPathJobOs.iteritems():
                tmpJobO = re.sub(tmpFullName,tmpLocalName,tmpJobO)
            # modify one-liner for G4 random seeds
            if self.runConfig.other.G4RandomSeeds > 0:
                if app.options != '':
                    tmpJobO = re.sub('-c "%s" ' % app.options,
                                     '-c "%s;from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' \
                                         % app.options,tmpJobO)
                else:
                    tmpJobO = '-c "from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" '
                dictItem = {'type':'template',
                            'param_type':'number',
                            'value':'${RNDMSEED}',
                            'hidden':True,
                            'offset':self.runConfig.other.G4RandomSeeds,
                            }
                taskParamMap['jobParameters'] += [dictItem]
        elif app.atlas_exetype in ["TRF"]:
            # replace parameters for TRF
            tmpJobO = self.job_options
            # output : basenames are in outMap['IROOT'] trough extOutFile
            tmpOutMap = []
            for tmpName,tmpLFN in outMap['IROOT']:
                tmpJobO = tmpJobO.replace('%OUT.' + tmpName,tmpName)
            # replace DBR
            tmpJobO = re.sub('%DB=[^ \'\";]+','${DBR}',tmpJobO)

        if app.atlas_exetype in ["TRF"]:
            taskParamMap['useLocalIO'] = 1

        # build
        if job.backend.nobuild:
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '-a {0}'.format(os.path.basename(self.inputsandbox)),
                 },
                ]
        else:
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '-l ${LIB}',
                 },
                ]

        #
        # input
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.backend.requirements.nFilesPerJob > 0 and job.inputdata.number_of_files == 0 and job.backend.requirements.split > 0:
                job.inputdata.number_of_files = job.backend.requirements.nFilesPerJob * job.backend.requirements.split

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.number_of_files != 0:
            taskParamMap['nFiles'] = job.inputdata.number_of_files
        elif job.backend.requirements.nFilesPerJob > 0 and job.backend.requirements.split > 0:
            # pathena does this for some reason even if there is no input files
            taskParamMap['nFiles'] = job.backend.requirements.nFilesPerJob * job.backend.requirements.split
        if job.backend.requirements.nFilesPerJob > 0:    
            taskParamMap['nFilesPerJob'] = job.backend.requirements.nFilesPerJob
            
        if job.backend.requirements.nEventsPerFile > 0:    
            taskParamMap['nEventsPerFile'] = job.backend.requirements.nEventsPerFile

        if not job.backend.requirements.nGBPerJob in [ 0,'MAX']:
            try:
                if job.backend.requirements.nGBPerJob != 'MAX':
                    job.backend.requirments.nGBPerJob = int(job.backend.requirements.nGBPerJob)
            except:
                logger.error("nGBPerJob must be an integer or MAX")
            # check negative                                                                                                                                                         
            if job.backend.requirements.nGBPerJob <= 0:
                logger.error("nGBPerJob must be positive")

            # don't set MAX since it is the defalt on the server side
            if not job.backend.requirements.nGBPerJob in [-1,'MAX']: 
                taskParamMap['nGBPerJob'] = job.backend.requirements.nGBPerJob

        if app.atlas_exetype in ["ATHENA", "TRF"]:
            inputMap = {}
            if job.inputdata and job.inputdata._name == 'DQ2Dataset':
                tmpDict = {'type':'template',
                           'param_type':'input',
                           'value':'-i "${IN/T}"',
                           'dataset': ','.join(job.inputdata.dataset),
                           'expand':True,
                           'exclude':'\.log\.tgz(\.\d+)*$',
                           }
                #if options.inputType != '':
                #    tmpDict['include'] = options.inputType
                taskParamMap['jobParameters'].append(tmpDict)
                taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset)
                inputMap['IN'] = ','.join(job.inputdata.dataset)
            else:
                # no input
                taskParamMap['noInput'] = True
                if job.backend.requirements.split > 0:
                    taskParamMap['nEvents'] = job.backend.requirements.split
                else:
                    taskParamMap['nEvents'] = 1
                taskParamMap['nEventsPerJob'] = 1
                taskParamMap['jobParameters'] += [
                    {'type':'constant',
                     'value': '-i "[]"',
                     },
                    ]
        else:
            if job.inputdata and job.inputdata._name == 'DQ2Dataset':
                tmpDict = {'type':'template',
                           'param_type':'input',
                           'value':'-i "${IN/T}"',
                           'dataset': ','.join(job.inputdata.dataset),
                           'expand':True,
                           'exclude':'\.log\.tgz(\.\d+)*$',
                           }
               #if options.nSkipFiles != 0:
               #    tmpDict['offset'] = options.nSkipFiles
                taskParamMap['jobParameters'].append(tmpDict)
                taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset)
            else:
                # no input
                taskParamMap['noInput'] = True
                if job.backend.requirements.split > 0:
                    taskParamMap['nEvents'] = job.backend.requirements.split
                else:
                    taskParamMap['nEvents'] = 1
                taskParamMap['nEventsPerJob'] = 1

        # param for DBR     
        if self.dbrelease != '':
            dbrDS = self.dbrelease.split(':')[0]
            # change LATEST to DBR_LATEST
            if dbrDS == 'LATEST':
                dbrDS = 'DBR_LATEST'
            dictItem = {'type':'template',
                        'param_type':'input',
                        'value':'--dbrFile=${DBR}',
                        'dataset':dbrDS,
                            }
            taskParamMap['jobParameters'] += [dictItem]
        # no expansion
        #if options.notExpandDBR:
        #dictItem = {'type':'constant',
        #            'value':'--noExpandDBR',
        #            }
        #taskParamMap['jobParameters'] += [dictItem]

        # secondary FIXME disabled
        self.secondaryDSs = {}
        if self.secondaryDSs != {}:
            inMap = {}
            streamNames = []
            for tmpDsName,tmpMap in self.secondaryDSs.iteritems():
                # make template item
                streamName = tmpMap['streamName']
                dictItem = MiscUtils.makeJediJobParam('${'+streamName+'}',tmpDsName,'input',hidden=True,
                                                      expand=True,include=tmpMap['pattern'],offset=tmpMap['nSkip'],
                                                      nFilesPerJob=tmpMap['nFiles'])
                taskParamMap['jobParameters'] += dictItem
                inMap[streamName] = 'tmp_'+streamName 
                streamNames.append(streamName)
            # make constant item
            strInMap = str(inMap)
            # set placeholders
            for streamName in streamNames:
                strInMap = strInMap.replace("'tmp_"+streamName+"'",'${'+streamName+'/T}')
            dictItem = {'type':'constant',
                        'value':'--inMap "%s"' % strInMap,
                        }
            taskParamMap['jobParameters'] += [dictItem]

        # misc
        jobParameters = ''
        # use Athena packages
        if app.atlas_exetype == 'ARES' or (app.atlas_exetype in ['PYARA','ROOT','EXE'] and app.useAthenaPackages):
            jobParameters += "--useAthenaPackages "
            
        # use RootCore
        if app.useRootCore or app.useRootCoreNoBuild:
            jobParameters += "--useRootCore "
            
        # use mana
        if app.useMana:
            jobParameters += "--useMana "
            if app.atlas_release != "":
                jobParameters += "--manaVer %s " % app.atlas_release
        # root
        if app.atlas_exetype in ['PYARA','ROOT','EXE'] and job.backend.requirements.rootver != '':
            rootver = re.sub('/','.', job.backend.requirements.rootver)
            jobParameters += "--rootVer %s " % rootver

        # write input to txt
        #if options.writeInputToTxt != '':
        #    jobParameters += "--writeInputToTxt %s " % options.writeInputToTxt
        # debug parameters
        #if options.queueData != '':
        #    jobParameters += "--overwriteQueuedata=%s " % options.queueData
        # JEM
        #if options.enableJEM:
        #    jobParameters += "--enable-jem "
        #    if options.configJEM != '':
        #        jobParameters += "--jem-config %s " % options.configJEM

        # set task param
        if jobParameters != '':
            taskParamMap['jobParameters'] += [ 
                {'type':'constant',
                 'value': jobParameters,
                 },
                ]

        # force stage-in
        if job.backend.accessmode == "LocalIO":
            taskParamMap['useLocalIO'] = 1

        # set jobO parameter
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '-j "',
                 'padding':False,
                 },
                ]
            taskParamMap['jobParameters'] += PsubUtils.convertParamStrToJediParam(tmpJobO,inputMap,job.outputdata.datasetname[:-1], True,False)
            taskParamMap['jobParameters'] += [
                {'type':'constant',
                 'value': '"',
                 },
                ]

        else:
            taskParamMap['jobParameters'] += [ {'type':'constant',
                                                'value': '-p "{0}"'.format(urllib.quote(self.job_options)),
                                                },
                                               ]

        # build step
        if not job.backend.nobuild:
            jobParameters = '-i ${IN} -o ${OUT} --sourceURL ${SURL} '

            if job.backend.bexec != '':
                jobParameters += ' --bexec "%s" ' % urllib.quote(job.backend.bexec)

            if app.atlas_exetype == 'ARES' or (app.atlas_exetype in ['PYARA','ROOT','EXE'] and app.useAthenaPackages):
                # use Athena packages
                jobParameters += "--useAthenaPackages "
            # use RootCore
            if app.useRootCore or app.useRootCoreNoBuild:
                jobParameters += "--useRootCore "

            # run directory
            if app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']:
                jobParameters += '-r {0} '.format(self.rundirectory)
                
            # no compile
            #if options.noCompile:
            #    jobParameters += "--noCompile "
            # use mana
            if app.useMana:
                jobParameters += "--useMana "
                if app.atlas_release != "":
                    jobParameters += "--manaVer %s " % app.atlas_release

            # root
            if app.atlas_exetype in ['PYARA','ROOT','EXE'] and job.backend.requirements.rootver != '':
                rootver = re.sub('/','.', job.backend.requirements.rootver)
                jobParameters += "--rootVer %s " % rootver
                
            # cmt config
            if app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']:
                if not app.atlas_cmtconfig in ['','NULL',None]:
                    jobParameters += " --cmtConfig %s " % app.atlas_cmtconfig
                                            
            
            #cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig)
            #if cmtConfig:
            #    jobParameters += "--cmtConfig %s " % cmtConfig
            # debug parameters
            #if options.queueData != '':
            #    jobParameters += "--overwriteQueuedata=%s " % options.queueData
            # set task param
            taskParamMap['buildSpec'] = {
                'prodSourceLabel':'panda',
                'archiveName':os.path.basename(self.inputsandbox),
                'jobParameters':jobParameters,
                }


        # enable merging
        if job.backend.requirements.enableMerge:
            jobParameters = '-r {0} '.format(self.rundirectory)
            if 'exec' in job.backend.requirements.configMerge and job.backend.requirements.configMerge['exec'] != '':
                jobParameters += '-j "{0}" '.format(job.backend.requirements.configMerge['exec'])
            if not job.backend.nobuild:
                jobParameters += '-l ${LIB} '
            else:
                jobParameters += '-a {0} '.format(os.path.basename(self.inputsandbox))
                jobParameters += "--sourceURL ${SURL} "
            jobParameters += '${TRN_OUTPUT:OUTPUT} ${TRN_LOG:LOG}'
            taskParamMap['mergeSpec'] = {}
            taskParamMap['mergeSpec']['useLocalIO'] = 1
            taskParamMap['mergeSpec']['jobParameters'] = jobParameters
            taskParamMap['mergeOutput'] = True    
            
        # Selected by Jedi
        #if not app.atlas_exetype in ['PYARA','ROOT','EXE']:
        #    taskParamMap['transPath'] = 'http://atlpan.web.cern.ch/atlpan/runAthena-00-00-12'

        logger.debug(taskParamMap)

        # upload sources
        if self.inputsandbox and not job.backend.libds:
            uploadSources(os.path.dirname(self.inputsandbox),os.path.basename(self.inputsandbox))

            if not self.inputsandbox == tmp_user_area_name:
                logger.info('Removing source tarball %s ...' % self.inputsandbox )
                os.remove(self.inputsandbox)

        return taskParamMap
Example #14
0
        path = '.'

    if os.path.exists(path) and os.path.isdir(path) and 'pandatools' in os.listdir(path) \
           and os.path.exists('%s/pandatools/__init__.py' % path):
        # make symlink for module name
        os.symlink('%s/pandatools' % path, '%s/taskbuffer' % tmpDir)
        break
sys.path = [tmpDir] + sys.path

# total time
TotalCPUTime = 0
TotalRealTime = 0
TotalJobs = 0

tmpLog = PLogger.getPandaLogger()
gridPassPhrase, vomsFQAN = PsubUtils.checkGridProxy("", False,
                                                    prog_options.verbose)
bookConf = BookConfig.getConfig()

if prog_options.verbose > 0:
    verbose2 = prog_options.verbose - 1
else:
    verbose2 = 0

for strid in prog_args:
    print "Retrieving information for Jobset:", strid
    status, pandaIDstatus = Client.getPandIDsWithJobID(int(strid))
    if prog_options.verbose:
        print status, pandaIDstatus

    if status != 0:
        print "Skip..."
Example #15
0
 def retry(self,JobsetID,newSite=False,newOpts={},noSubmit=False,ignoreDuplication=False,useJobsetID=False,retryBuild=False,reproduceFiles=[],unsetRetryID=False):
     # get logger
     tmpLog = PLogger.getPandaLogger()
     # check proxy
     self.gridPassPhrase,self.vomsFQAN = PsubUtils.checkGridProxy(
         self.gridPassPhrase,
         False,
         self.verbose,
         useCache=True)
     # force update just in case
     self.status(JobsetID,True)
     # set an empty map since mutable default value is used
     if newOpts == {}:
         newOpts = {}
     # get jobset
     newJobsetID = -1
     jobList = self.getJobIDsWithSetID(JobsetID)
     if jobList == None:
         # works only for jobsetID
         if useJobsetID:
             return
         # works with jobID   
         isJobset = False
         jobList = [JobsetID]
     else:
         isJobset = True
         tmpMsg = "ID=%s is composed of JobID=" % JobsetID
         for tmpJobID in jobList:
             tmpMsg += '%s,' % tmpJobID
         tmpMsg = tmpMsg[:-1]
         tmpLog.info(tmpMsg)
     for JobID in jobList:    
         # get job info from local repository
         localJob = self.getJobInfo(JobID)
         if localJob == None:
             tmpLog.warning("JobID=%s not found in local repository. Synchronization may be needed" % JobID)            
             return None
         # for JEDI
         if localJob.isJEDI():
             status,out = Client.retryTask(
                     localJob.jediTaskID,
                     verbose=self.verbose,
                     properErrorCode=True,
                     newParams=newOpts)
             if status != 0:
                 tmpLog.error(status)
                 tmpLog.error(out)
                 tmpLog.error("Failed to retry TaskID=%s" % localJob.jediTaskID)
                 return False
             tmpStat,tmpDiag = out
             if (not tmpStat in [0,True] and newOpts == {}) or (newOpts != {} and tmpStat != 3):
                 tmpLog.error(tmpDiag)
                 tmpLog.error("Failed to retry TaskID=%s" % localJob.jediTaskID)
                 return False
             tmpLog.info(tmpDiag)
             continue
         # skip running job
         if localJob.dbStatus != 'frozen':
             tmpLog.info('Retry failed subjobs in running jobId=%s' % JobID)
             status,out = Client.retryFailedJobsInActive(JobID,verbose=self.verbose)
             if status != 0:
                 tmpLog.error(status)
                 tmpLog.error(out)
                 tmpLog.error("Failed to retry JobID=%s" % JobID)
             else:
                 job = self.status(JobID)
             if isJobset:
                 continue
             else:
                 return
         # skip already retried
         if localJob.retryID != '0':
             if isJobset:
                 tmpLog.info('Skip JobID=%s since already retried by JobID=%s JobsetID=%s' % \
                             (JobID,localJob.retryID,localJob.retryJobsetID))
                 continue
             else:
                 tmpLog.warning('This job was already retried by JobID=%s' % localJob.retryID)
                 return
         # check status of buildJob
         if not retryBuild and not localJob.buildStatus in ['','finished']:
             tmpMsgStr = 'Cannot retry since status of buildJob %s is %s (!= finished). ' \
                         % (localJob.PandaID.split(',')[0],localJob.buildStatus)
             tmpMsgStr += 'Please execute %s with the same input/output datasets (or containers). ' % localJob.jobType
             tmpMsgStr += 'It will run only on failed/cancelled/unused input files '
             tmpMsgStr += 'and append output files to the output dataset container. '
             tmpMsgStr += 'Or you may set retryBuild=True in pbook.retry() '                
             tmpLog.warning(tmpMsgStr)
             if isJobset:
                 continue
             else:
                 return
         # check opts for newSite
         if newSite or newOpts != {}:
             if not localJob.outDS.endswith('/') and not newOpts.has_key('outDS') and not newOpts.has_key('--outDS'):
                 tmpLog.warning('You need to specify --outDS in newOpts to retry at new site unless container is used as output')
                 return
         # get list of failed jobs
         pandaIDs  = localJob.PandaID.split(',')
         statusList= localJob.jobStatus.split(',')
         jobList = []
         for idx in range(len(pandaIDs)):
             # check status unless reproduce files
             if reproduceFiles == [] and not statusList[idx] in ['failed','cancelled']:
                 continue
             jobList.append(pandaIDs[idx])
         # no failed job
         if jobList == []:
             if isJobset:
                 tmpLog.info('Skip JobID=%s since no failed jobs' % JobID)                    
                 continue
             else:
                 tmpLog.info('No failed jobs to be retried for JobID=%s' % JobID)
                 return
         # get full job spec
         tmpLog.info("Retrying JobID=%s ..." % JobID)
         tmpLog.info("Getting job info")
         idxJL  = 0
         nQuery = 500
         pandaJobs = []
         while idxJL < len(jobList):
             # avoid burst query
             tmpLog.info(" %5s/%s" % (idxJL,len(jobList)))                
             status,oTmp = Client.getFullJobStatus(
                     jobList[idxJL:idxJL+nQuery],
                     verbose=self.verbose)
             if status != 0:
                 tmpLog.error(status)
                 tmpLog.error(oTmp)
                 tmpLog.error("Cannot get job info from Panda server")
                 return
             pandaJobs += oTmp
             idxJL += nQuery
             time.sleep(1)
         tmpLog.info(" %5s/%s" % (len(jobList),len(jobList)))
         # get PandaIDs to reproduce files
         if reproduceFiles != []:
             # change wildcard to .* for regexp
             reproduceFilePatt = []
             for tmpReproduceFile in reproduceFiles:
                 if '*' in tmpReproduceFile:
                     tmpReproduceFile = tmpReproduceFile.replace('*','.*')
                 reproduceFilePatt.append(tmpReproduceFile)
             # get list of jobs which produced interesting files    
             tmpJobList = []
             tmpPandaJobs = []
             for tmpPandaJob in pandaJobs:
                 # check names
                 tmpMatchFlag = False
                 for tmpFile in tmpPandaJob.Files:
                     if tmpFile.type == 'output' and tmpFile.status == 'ready':
                         for tmpReproduceFile in reproduceFilePatt:
                             # normal matching
                             if tmpReproduceFile == tmpFile.lfn:
                                 tmpMatchFlag = True
                                 break
                             # wild card
                             if '*' in tmpReproduceFile and \
                                re.search('^'+tmpReproduceFile,tmpFile.lfn) != None:
                                 tmpMatchFlag = True
                                 break
                         if tmpMatchFlag:
                             break
                 # append
                 if tmpMatchFlag:
                     tmpJobList.append(tmpPandaJob.PandaID)
                     tmpPandaJobs.append(tmpPandaJob)
             # use new list
             jobList = tmpJobList
             pandaJobs = tmpPandaJobs
             if jobList == []:
                 tmpLog.info("No jobs to reproduce files : Jobs in JobID=%s didn't produce lost files" % JobID)
                 continue
         # jobdefID
         newJobdefID = PsubUtils.readJobDefID()
         # reset some parameters
         retryJobs    = []
         retrySite    = None
         retryElement = None
         retryDestSE  = None
         outDsName    = None
         shadowList   = []
         oldLibDS     = None
         newLibDS     = None
         newLibTgz    = None
         rebroMap     = {}
         for idx in range(len(jobList)):
             job = pandaJobs[idx]
             # skip exired
             if job == None:
                 tmpLog.warning("Could not retry jobs older than 30 days : JobID=%s (PandaID=%s) expired" \
                                % (JobID,jobList[idxJob]))
                 return
             # skip jobs reassigned by rebrokerage
             if (job.jobStatus == 'cancelled' and job.taskBufferErrorCode in [105,'105']) or \
                    (job.jobStatus == 'failed' and job.taskBufferErrorCode in [106,'106']):
                 # extract JobIDs of reassigned jobs
                 tmpM = re.search('JobsetID=(\d+) JobID=(\d+)',job.taskBufferErrorDiag)
                 if tmpM != None:
                     tmpRebKey = (tmpM.group(1),tmpM.group(2))
                     if not rebroMap.has_key(tmpRebKey):
                         rebroMap[tmpRebKey] = 0
                     # count # of reassigned jobs
                     rebroMap[tmpRebKey] += 1
                 continue
             # get shadow list
             if (not ignoreDuplication) and outDsName == None and job.prodSourceLabel == 'user':
                 # look for dataset for log since it doesn't have suffix even when --individualOutDS is used
                 for tmpFile in job.Files:
                     if tmpFile.type == 'log':
                         outDsName = tmpFile.dataset
                         break
                 # output dataset was not found    
                 if outDsName == None:
                     tmpLog.error("Could not get output dataset name for JobID=%s (PandaID=%s)" \
                                  % (JobID,job.PandaID))
                     return
                 # get files in shadow
                 if outDsName.endswith('/'):
                     shadowList = Client.getFilesInShadowDataset(
                             outDsName,
                             Client.suffixShadow,
                             self.verbose)
                 else:
                     # disable duplication check mainly for old overlay jobs since non-signal files are wrongly skipped
                     #shadowList = Client.getFilesInShadowDatasetOld(outDsName,Client.suffixShadow,self.verbose)
                     pass
             # unify sitename
             if retrySite == None:
                 retrySite    = job.computingSite
                 retryElement = job.computingElement
                 retryDestSE  = job.destinationSE
             # reset
             job.jobStatus           = None
             job.commandToPilot      = None
             job.startTime           = None
             job.endTime             = None
             job.attemptNr           = 1+job.attemptNr
             for attr in job._attributes:
                 if attr.endswith('ErrorCode') or attr.endswith('ErrorDiag'):
                     setattr(job,attr,None)
             job.transExitCode       = None
             job.computingSite       = retrySite
             job.computingElement    = retryElement
             job.destinationSE       = retryDestSE
             job.dispatchDBlock      = None
             if not unsetRetryID:
                 job.jobExecutionID  = JobID
             job.jobDefinitionID     = newJobdefID
             job.parentID            = job.PandaID
             if job.jobsetID != ['NULL',None,-1]:
                 if not unsetRetryID:
                     job.sourceSite  = job.jobsetID
                 job.jobsetID        = newJobsetID
             skipInputList = []
             numUsedFiles = 0
             # loop over all files    
             for file in job.Files:
                 file.rowID = None
                 if file.type == 'input':
                     # protection against wrong sync which doesn't update buildStatus correctly
                     if not retryBuild and file.lfn.endswith('.lib.tgz') and file.GUID == 'NULL':
                         tmpLog.warning('GUID for %s is unknown. Cannot retry when corresponding buildJob failed' \
                                        % file.lfn)
                         return
                     if not retryBuild or not file.lfn.endswith('.lib.tgz'):
                         file.status = 'ready'
                     # set new lib dataset    
                     if retryBuild and file.lfn.endswith('.lib.tgz'):
                         if newLibTgz != None:
                             file.lfn            = newLibTgz
                             file.dataset        = newLibDS
                             file.dispatchDBlock = newLibDS
                     # check with shadow for non lib.tgz/DBR 
                     tmpDbrMatch = re.search('^DBRelease-.*\.tar\.gz$',file.lfn)
                     if tmpDbrMatch == None and not file.lfn.endswith('.lib.tgz'):
                         if file.lfn in shadowList:
                             skipInputList.append(file)
                         else:
                             numUsedFiles += 1
                 elif file.type in ('output','log'):
                     file.destinationSE = retryDestSE
                     file.destinationDBlock = re.sub('_sub\d+$','',file.destinationDBlock)
                     # add retry num
                     if file.dataset.endswith('/') or job.prodSourceLabel == 'panda':
                         oldOutDsName = file.destinationDBlock
                         retryDsPatt = '_r'
                         if reproduceFiles != []:
                             retryDsPatt = '_rp'
                         retryMatch = re.search(retryDsPatt+'(\d+)$',file.destinationDBlock)
                         if retryMatch == None:
                             file.destinationDBlock += (retryDsPatt+'1')
                         else:
                             tmpDestinationDBlock = re.sub(retryDsPatt+'(\d+)$','',file.destinationDBlock)
                             file.destinationDBlock = tmpDestinationDBlock + retryDsPatt + '%d' % (1+int(retryMatch.group(1)))
                         if job.processingType == 'usermerge':
                             job.jobParameters = job.jobParameters.replace(' %s ' % oldOutDsName,
                                                                           ' %s ' % file.destinationDBlock)
                         # use new dataset name for buildXYZ
                         if job.prodSourceLabel == 'panda':
                             if file.lfn.endswith('.lib.tgz'):
                                 # get new libDS and lib.tgz names
                                 oldLibDS  = file.dataset
                                 file.dataset = file.destinationDBlock
                                 newLibDS = file.dataset
                                 file.lfn = re.sub(oldLibDS,newLibDS,file.lfn)
                                 newLibTgz = file.lfn
                             else:
                                 file.dataset = file.destinationDBlock                                    
                     # add attempt nr
                     oldName  = file.lfn
                     if job.prodSourceLabel == 'panda' and file.lfn.endswith('.lib.tgz'):
                         continue
                     else:
                         # append attempt number at the tail 
                         file.lfn = re.sub("\.\d+$","",file.lfn)
                         file.lfn = "%s.%d" % (file.lfn,job.attemptNr)
                     newName  = file.lfn
                     # modify jobParameters
                     job.jobParameters = re.sub("'%s'" % oldName ,"'%s'" % newName,
                                                job.jobParameters)
                     # look for output in trf
                     oldGenelicName = re.sub('\.\d+$','',oldName)
                     match = re.search(oldGenelicName+'(\.\d+)*(%20|")',job.jobParameters)
                     if match != None:
                         job.jobParameters = job.jobParameters.replace(match.group(0),newName+match.group(2))
             # change lib.tgz name
             if retryBuild and newLibDS != None:
                 job.jobParameters = re.sub(oldLibDS,newLibDS,job.jobParameters)
                 # change destinationDBlock
                 if job.prodSourceLabel == 'panda':
                     job.destinationDBlock = newLibDS
             # all files are used by others
             if numUsedFiles == 0 and skipInputList != []:
                 continue
             # remove skipped files
             strSkipped = ''
             for tmpFile in skipInputList:
                 strSkipped += '%s,' % tmpFile.lfn
                 job.Files.remove(tmpFile)
             strSkipped = strSkipped[:-1]
             # modify jobpar
             if strSkipped != '':
                 optionToSkipFiles = '--skipInputByRetry'
                 if not optionToSkipFiles in job.jobParameters:
                     # just append
                     job.jobParameters += "%s=%s " % (optionToSkipFiles,strSkipped)
                 else:
                     # extract already skipped files
                     tmpMatch = re.search("(%s=[^ ]+)",job.jobParameters)
                     if tmpMatch == None:
                         tmpLog.error("Failed to extract arg of %s for PandaID=%s" \
                                      % (optionToSkipFiles,job.PandaID))
                         return
                     # replace
                     job.jobParameters = re.sub(tmpMatch.group(1),"%s,%s" % (tmpMatch.group(1),optionToSkipFiles),
                                                job.jobParameters)
             if self.verbose:
                 tmpLog.debug(job.jobParameters)
             # append
             retryJobs.append(job)
         # info on rebrokeage    
         if rebroMap != {}:
             for tmpRebKey,tmpRebNumJobs in rebroMap.iteritems():
                 tmpRebSetID,tmpRebJobID = tmpRebKey
                 tmpLog.info('Skip %s jobs since JobID=%s JobsetID=%s already reassigned them to another site' % \
                             (tmpRebNumJobs,tmpRebJobID,tmpRebSetID))
             if retryJobs == []:
                 tmpLog.info("No more jobs to be retried for JobID=%s" % JobID)
                 if isJobset:
                     continue
                 else:
                     return
         # all input files were or are being used by other jobs
         if retryJobs == []:
             tmpLog.info('All input files were or are being used by other jobs for the same output. No jobs to be retried. If you need to ignore duplication check (e.g., using the same EVNT file for multiple simulation subjobs), set ignoreDuplication=True. i.e. retry(123,ignoreDuplication=True)')
             if isJobset:
                 continue
             else:
                 return
         # check voms role
         if not retryJobs[0].workingGroup in ['NULL',None,'']:
             # VOMS role was used 
             if not "--workingGroup" in job.metadata:
                 # extract voms roles from metadata
                 match =  re.search("--voms( |=)[ \"]*([^ \"]+)",job.metadata)
                 if match != None:
                     vomsRoles = match.group(2)
                 else:
                     vomsRoles = "atlas:/atlas/%s/Role=production" % retryJobs[0].workingGroup
             # regenerate proxy with VOMS roles
             try:
                 tmpLog.info("Checking proxy role to resubmit %s jobs" % retryJobs[0].workingGroup)
                 self.gridPassPhrase,self.vomsFQAN = PsubUtils.checkGridProxy(
                         self.gridPassPhrase,
                         False,
                         self.verbose,vomsRoles,
                         useCache=True)
             except:
                 tmpLog.error("Failed to generate a proxy with %s" % vomsRoles)
                 return
         # check runtime env for new site submission
         if (newSite or newOpts != {}):
             if retryJobs[0].processingType == 'pathena' or '--useAthenaPackages' in retryJobs[0].metadata:
                 from pandatools import AthenaUtils
                 stA,retA = AthenaUtils.getAthenaVer()
                 if not stA:
                     tmpLog.error("Failed to get Athena rel/cache version in current runtime env")
                     return
                 athenaVer = retA['athenaVer']
                 cacheVer  = retA['cacheVer']
                 nightVer  = retA['nightVer']
                 wrongSetup = False
                 if retryJobs[0].AtlasRelease != 'Atlas-%s' % athenaVer:
                     wrongSetup = True
                     errMsg =  "Current Athena version Atlas-%s is inconsitent with the previous submission %s. " % (athenaVer,retryJobs[0].AtlasRelease)
                 elif retryJobs[0].homepackage != 'AnalysisTransforms'+cacheVer+nightVer:
                     wrongSetup = True                        
                     errMsg =  "Current cache version %s is inconsitent with the previous submission. " % cacheVer.replace('-','').replace('_','-')
                 if wrongSetup:    
                     errMsg += 'You need to have the same runtime env as before since all job spec need to be re-created to send jobs to a new site. '
                     errMsg += 'Please setup Athena correctly and restart pbook'                        
                     tmpLog.error(errMsg)
                     return
         # test mode
         if noSubmit:
             continue
         # invoke pathena/prun to send job to new site
         if (newSite or newOpts != {}) and retryJobs[0].processingType != 'usermerge':
             # set parent jobID and jobsetID
             newOpts['provenanceID'] = retryJobs[0].jobExecutionID
             newOpts['panda_parentJobsetID'] = retryJobs[0].sourceSite
             tmpLog.info("Constructing job spec again to be sent to another site ...")
             comStat= PsubUtils.execWithModifiedParams(retryJobs,newOpts,self.verbose,newSite)
             if comStat == 0:
                 # update database
                 time.sleep(2)
                 self.sync()
             else:
                 tmpLog.error("Failed to submit jobs to Panda server")                
             return
         # register datasets
         tmpOutDsLocation = Client.PandaSites[retryJobs[-1].computingSite]['ddm']
         addedDataset = []
         shadowDSname = None
         for tmpFile in retryJobs[-1].Files:
             if tmpFile.type in ['output','log'] and tmpFile.dataset.endswith('/'):
                 # add shadow
                 """
                 removed shadow
                 if shadowDSname == None and tmpFile.type == 'log':
                     shadowDSname = "%s%s" % (tmpFile.destinationDBlock,Client.suffixShadow)
                     Client.addDataset(shadowDSname,self.verbose)
                 """    
                 # add datasets    
                 if not tmpFile.destinationDBlock in addedDataset:
                     # create dataset
                     Client.addDataset(
                             tmpFile.destinationDBlock,
                             self.verbose,
                             location=tmpOutDsLocation,
                             dsCheck=False)
                     # add to container
                     Client.addDatasetsToContainer(
                             tmpFile.dataset,
                             [tmpFile.destinationDBlock],
                             self.verbose)
                     # append
                     addedDataset.append(tmpFile.destinationDBlock)
         # register libDS
         if retryBuild and newLibDS != None:
             Client.addDataset(
                     newLibDS,
                     self.verbose,
                     location=tmpOutDsLocation,
                     dsCheck=False)
         # submit
         tmpLog.info("Submitting job ...")            
         status,out = Client.submitJobs(retryJobs,verbose=self.verbose)
         if out == None or status != 0:
             tmpLog.error(status)
             tmpLog.error(out)
             tmpLog.error("Failed to submit jobs to Panda server")
             return
         # update database
         pandaIDstatus = {}
         newJobID = None
         for items in out:
             # get newJobID
             if newJobID == None:
                 newJobID = items[1]
             # check PandaID
             PandaID = items[0]
             if PandaID == 'NULL':
                 tmpLog.error("Panda server returned wrong IDs. It may have a temporary problem")
                 return
             # set newJobsetID
             if newJobsetID in [None,-1]:
                 newJobsetID = items[2]['jobsetID']
             # dummy statuso
             pandaIDstatus[PandaID] = ('defined','NULL')
         # set retry ID
         if not unsetRetryID:
             localJob.retryID = newJobID
             if not newJobsetID in [None,-1,'NULL']:
                 localJob.retryJobsetID = newJobsetID
             try:
                 PdbUtils.updateJobDB(localJob,self.verbose)
             except:
                 tmpLog.error("Failed to set retryID for JobID=%s" % JobID)
                 return
         # set new paramers
         newLocalJob = PdbUtils.convertPtoD(retryJobs,pandaIDstatus)
         newLocalJob.JobID = newJobID
         if not newJobsetID in [None,-1,'NULL']:
             newLocalJob.groupID = newJobsetID
         newLocalJob.creationTime = datetime.datetime.utcnow()
         # insert to DB
         try:
             PdbUtils.insertJobDB(newLocalJob,self.verbose)
         except:
             tmpLog.error("Failed to insert JobID=%s to local repository" % newJobID)
             return
         # write new jobdefID
         PsubUtils.writeJobDefID(newJobID)
         # done
         tmpMsg = 'Done. New JobID=%s' % newJobID
         if not newJobsetID in [None,-1,'NULL']:
             tmpMsg += " JobsetID=%s" % newJobsetID
         tmpLog.info(tmpMsg)
Example #16
0
                    jsonExecStr += ' --{0}'.format(k)
                else:
                    if isinstance(v, types.StringType):
                        jsonExecStr += " --{0}='{1}'".format(k, v)
                    else:
                        jsonExecStr += " --{0}={1}".format(k, v)
            else:
                tmpLog.warning('ignore unknown option {0} in {1}'.format(
                    k, options.loadJson))

if options.version:
    print("Version: %s" % PandaToolsPkgInfo.release_version)
    sys.exit(0)

# check grid-proxy
PsubUtils.check_proxy(options.verbose, options.vomsRoles)

# check options
#non_null_opts = ['outDS', 'evaluationContainer', 'evaluationExec', 'steeringContainer', 'steeringExec']
non_null_opts = [
    'outDS', 'evaluationContainer', 'evaluationExec', 'steeringExec'
]
for opt_name in non_null_opts:
    if getattr(options, opt_name) is None:
        tmpLog.error('--{0} is not specified'.format(opt_name))
        sys.exit(1)

if not options.outDS.endswith('/'):
    options.outDS += '/'

if options.maxEvaluationJobs is None:
Example #17
0
    def master_prepare(self, app, appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from pandatools import MiscUtils
        from pandatools import AthenaUtils
        from pandatools import PsubUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec
        from pandatools import PandaToolsPkgInfo

        # create a random number for this submission to allow multiple use of containers
        self.rndSubNum = random.randint(1111, 9999)

        job = app._getParent()
        logger.debug('AthenaJediRTHandler master_prepare called for %s',
                     job.getFQID('.'))

        if app.useRootCoreNoBuild:
            logger.info(
                'Athena.useRootCoreNoBuild is True, setting Panda.nobuild=True.'
            )
            job.backend.nobuild = True

        if job.backend.bexec and job.backend.nobuild:
            raise ApplicationConfigurationError(
                None,
                "Contradicting options: job.backend.bexec and job.backend.nobuild are both enabled."
            )

        if job.backend.requirements.rootver != '' and job.backend.nobuild:
            raise ApplicationConfigurationError(
                None,
                "Contradicting options: job.backend.requirements.rootver given and job.backend.nobuild are enabled."
            )

        # Switch on compilation flag if bexec is set or libds is empty
        if job.backend.bexec != '' or not job.backend.nobuild:
            app.athena_compile = True
            for sj in job.subjobs:
                sj.application.athena_compile = True
            logger.info(
                '"job.backend.nobuild=False" or "job.backend.bexec" is set - Panda build job is enabled.'
            )

        if job.backend.nobuild:
            app.athena_compile = False
            for sj in job.subjobs:
                sj.application.athena_compile = False
            logger.info(
                '"job.backend.nobuild=True" or "--nobuild" chosen - Panda build job is switched off.'
            )

        # check for auto datri
        if job.outputdata.location != '':
            if not PsubUtils.checkDestSE(job.outputdata.location,
                                         job.outputdata.datasetname, False):
                raise ApplicationConfigurationError(
                    None, "Problems with outputdata.location setting '%s'" %
                    job.outputdata.location)

        # validate application
        if not app.atlas_release and not job.backend.requirements.rootver and not app.atlas_exetype in [
                'EXE'
        ]:
            raise ApplicationConfigurationError(
                None,
                "application.atlas_release is not set. Did you run application.prepare()"
            )

        self.dbrelease = app.atlas_dbrelease
        if self.dbrelease != '' and self.dbrelease != 'LATEST' and self.dbrelease.find(
                ':') == -1:
            raise ApplicationConfigurationError(
                None,
                "ERROR : invalid argument for DB Release. Must be 'LATEST' or 'DatasetName:FileName'"
            )

        self.runConfig = AthenaUtils.ConfigAttr(app.atlas_run_config)
        for k in self.runConfig.keys():
            self.runConfig[k] = AthenaUtils.ConfigAttr(self.runConfig[k])
        if not app.atlas_run_dir:
            raise ApplicationConfigurationError(
                None,
                "application.atlas_run_dir is not set. Did you run application.prepare()"
            )

        self.rundirectory = app.atlas_run_dir
        self.cacheVer = ''
        if app.atlas_project and app.atlas_production:
            self.cacheVer = "-" + app.atlas_project + "_" + app.atlas_production

        # handle different atlas_exetypes
        self.job_options = ''
        if app.atlas_exetype == 'TRF':
            self.job_options += ' '.join(
                [os.path.basename(fopt.name) for fopt in app.option_file])

            #if not job.outputdata.outputdata:
            #    raise ApplicationConfigurationError(None,"job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend")
            #raise ApplicationConfigurationError(None,"Sorry TRF on Panda backend not yet supported")

            if app.options:
                self.job_options += ' %s ' % app.options

        elif app.atlas_exetype == 'ATHENA':

            if len(app.atlas_environment) > 0 and app.atlas_environment[
                    0].find('DBRELEASE_OVERRIDE') == -1:
                logger.warning(
                    "Passing of environment variables to Athena using Panda not supported. Ignoring atlas_environment setting."
                )

            if job.outputdata.outputdata:
                raise ApplicationConfigurationError(
                    None,
                    "job.outputdata.outputdata must be empty if atlas_exetype='ATHENA' and Panda backend is used (outputs are auto-detected)"
                )
            if app.options:
                if app.options.startswith('-c'):
                    self.job_options += ' %s ' % app.options
                else:
                    self.job_options += ' -c %s ' % app.options

                logger.warning(
                    'The value of j.application.options has been prepended with " -c " '
                )
                logger.warning(
                    'Please make sure to use proper quotes for the values of j.application.options !'
                )

            self.job_options += ' '.join(
                [os.path.basename(fopt.name) for fopt in app.option_file])

            # check for TAG compression
            if 'subcoll.tar.gz' in app.append_to_user_area:
                self.job_options = ' uncompress.py ' + self.job_options

        elif app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']:

            #if not job.outputdata.outputdata:
            #    raise ApplicationConfigurationError(None,"job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend")
            self.job_options += ' '.join(
                [os.path.basename(fopt.name) for fopt in app.option_file])

            # sort out environment variables
            env_str = ""
            if len(app.atlas_environment) > 0:
                for env_var in app.atlas_environment:
                    env_str += "export %s ; " % env_var
            else:
                env_str = ""

            # below fixes issue with runGen -- job_options are executed by os.system when dbrelease is used, and by the shell otherwise
            ## - REMOVED FIX DUE TO CHANGE IN PILOT - MWS 8/11/11
            if job.backend.requirements.usecommainputtxt:
                input_str = '/bin/echo %IN > input.txt; cat input.txt; '
            else:
                input_str = '/bin/echo %IN | sed \'s/,/\\\n/g\' > input.txt; cat input.txt; '
            if app.atlas_exetype == 'PYARA':
                self.job_options = env_str + input_str + ' python ' + self.job_options
            elif app.atlas_exetype == 'ARES':
                self.job_options = env_str + input_str + ' athena.py ' + self.job_options
            elif app.atlas_exetype == 'ROOT':
                self.job_options = env_str + input_str + ' root -b -q ' + self.job_options
            elif app.atlas_exetype == 'EXE':
                self.job_options = env_str + input_str + self.job_options

            if app.options:
                self.job_options += ' %s ' % app.options

        if self.job_options == '':
            raise ApplicationConfigurationError(None, "No Job Options found!")
        logger.info('Running job options: %s' % self.job_options)

        # validate dbrelease
        if self.dbrelease != "LATEST":
            self.dbrFiles, self.dbrDsList = getDBDatasets(
                self.job_options, '', self.dbrelease)

        # handle the output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError(
                    None, 'Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        # validate the output dataset name (and make it a container)
        job.outputdata.datasetname, outlfn = dq2outputdatasetname(
            job.outputdata.datasetname, job.id, job.outputdata.isGroupDS,
            job.outputdata.groupname)
        if not job.outputdata.datasetname.endswith('/'):
            job.outputdata.datasetname += '/'

        # add extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)
        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # use the shared area if possible
        tmp_user_area_name = app.user_area.name
        if app.is_prepared is not True:
            from Ganga.Utility.files import expandfilename
            shared_path = os.path.join(
                expandfilename(getConfig('Configuration')['gangadir']),
                'shared',
                getConfig('Configuration')['user'])
            tmp_user_area_name = os.path.join(
                os.path.join(shared_path, app.is_prepared.name),
                os.path.basename(app.user_area.name))

        # Add inputsandbox to user_area
        if job.inputsandbox:
            logger.warning(
                "Submitting Panda job with inputsandbox. This may slow the submission slightly."
            )

            if tmp_user_area_name:
                inpw = os.path.dirname(tmp_user_area_name)
                self.inputsandbox = os.path.join(
                    inpw, 'sources.%s.tar' %
                    commands.getoutput('uuidgen 2> /dev/null'))
            else:
                inpw = job.getInputWorkspace()
                self.inputsandbox = inpw.getPath(
                    'sources.%s.tar' %
                    commands.getoutput('uuidgen 2> /dev/null'))

            if tmp_user_area_name:
                rc, output = commands.getstatusoutput(
                    'cp %s %s.gz' % (tmp_user_area_name, self.inputsandbox))
                if rc:
                    logger.error('Copying user_area failed with status %d', rc)
                    logger.error(output)
                    raise ApplicationConfigurationError(
                        None, 'Packing inputsandbox failed.')
                rc, output = commands.getstatusoutput('gunzip %s.gz' %
                                                      (self.inputsandbox))
                if rc:
                    logger.error('Unzipping user_area failed with status %d',
                                 rc)
                    logger.error(output)
                    raise ApplicationConfigurationError(
                        None, 'Packing inputsandbox failed.')

            for fname in [os.path.abspath(f.name) for f in job.inputsandbox]:
                fname.rstrip(os.sep)
                path = os.path.dirname(fname)
                fn = os.path.basename(fname)

                #app.atlas_run_dir
                # get Athena versions
                rc, out = AthenaUtils.getAthenaVer()
                # failed
                if not rc:
                    #raise ApplicationConfigurationError(None, 'CMT could not parse correct environment ! \n Did you start/setup ganga in the run/ or cmt/ subdirectory of your athena analysis package ?')
                    logger.warning(
                        "CMT could not parse correct environment for inputsandbox - will use the atlas_run_dir as default"
                    )

                    # as we don't have to be in the run dir now, create a copy of the run_dir directory structure and use that
                    input_dir = os.path.dirname(self.inputsandbox)
                    run_path = "%s/sbx_tree/%s" % (input_dir,
                                                   app.atlas_run_dir)
                    rc, output = commands.getstatusoutput("mkdir -p %s" %
                                                          run_path)
                    if not rc:
                        # copy this sandbox file
                        rc, output = commands.getstatusoutput(
                            "cp %s %s" % (fname, run_path))
                        if not rc:
                            path = os.path.join(input_dir, 'sbx_tree')
                            fn = os.path.join(app.atlas_run_dir, fn)
                        else:
                            raise ApplicationConfigurationError(
                                None,
                                "Couldn't copy file %s to recreate run_dir for input sandbox"
                                % fname)
                    else:
                        raise ApplicationConfigurationError(
                            None,
                            "Couldn't create directory structure to match run_dir %s for input sandbox"
                            % run_path)

                else:
                    userarea = out['workArea']

                    # strip the path from the filename if present in the userarea
                    ua = os.path.abspath(userarea)
                    if ua in path:
                        fn = fname[len(ua) + 1:]
                        path = ua

                rc, output = commands.getstatusoutput(
                    'tar -h -r -f %s -C %s %s' % (self.inputsandbox, path, fn))
                if rc:
                    logger.error('Packing inputsandbox failed with status %d',
                                 rc)
                    logger.error(output)
                    raise ApplicationConfigurationError(
                        None, 'Packing inputsandbox failed.')

            # remove sandbox tree if created
            if "sbx_tree" in os.listdir(os.path.dirname(self.inputsandbox)):
                rc, output = commands.getstatusoutput(
                    "rm -r %s/sbx_tree" % os.path.dirname(self.inputsandbox))
                if rc:
                    raise ApplicationConfigurationError(
                        None,
                        "Couldn't remove directory structure used for input sandbox"
                    )

            rc, output = commands.getstatusoutput('gzip %s' %
                                                  (self.inputsandbox))
            if rc:
                logger.error('Packing inputsandbox failed with status %d', rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Packing inputsandbox failed.')
            self.inputsandbox += ".gz"
        else:
            self.inputsandbox = tmp_user_area_name

        # job name
        jobName = 'ganga.%s' % MiscUtils.wrappedUuidGen()

        # make task
        taskParamMap = {}
        # Enforce that outputdataset name ends with / for container
        if not job.outputdata.datasetname.endswith('/'):
            job.outputdata.datasetname = job.outputdata.datasetname + '/'

        taskParamMap['taskName'] = job.outputdata.datasetname

        taskParamMap['uniqueTaskName'] = True
        taskParamMap['vo'] = 'atlas'
        taskParamMap['architecture'] = AthenaUtils.getCmtConfig(
            athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig)
        if app.atlas_release:
            taskParamMap['transUses'] = 'Atlas-%s' % app.atlas_release
        else:
            taskParamMap['transUses'] = ''
        taskParamMap[
            'transHome'] = 'AnalysisTransforms' + self.cacheVer  #+nightVer

        configSys = getConfig('System')
        gangaver = configSys['GANGA_VERSION'].lower()
        if not gangaver:
            gangaver = "ganga"

        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['processingType'] = '{0}-jedi-athena'.format(gangaver)
        else:
            taskParamMap['processingType'] = '{0}-jedi-run'.format(gangaver)

        #if options.eventPickEvtList != '':
        #    taskParamMap['processingType'] += '-evp'
        taskParamMap['prodSourceLabel'] = 'user'
        if job.backend.site != 'AUTO':
            taskParamMap['cloud'] = Client.PandaSites[
                job.backend.site]['cloud']
            taskParamMap['site'] = job.backend.site
        elif job.backend.requirements.cloud != None and not job.backend.requirements.anyCloud:
            taskParamMap['cloud'] = job.backend.requirements.cloud
        if job.backend.requirements.excluded_sites != []:
            taskParamMap['excludedSite'] = expandExcludedSiteList(job)

        # if only a single site specifed, don't set includedSite
        #if job.backend.site != 'AUTO':
        #    taskParamMap['includedSite'] = job.backend.site
        #taskParamMap['cliParams'] = fullExecString
        if job.backend.requirements.noEmail:
            taskParamMap['noEmail'] = True
        if job.backend.requirements.skipScout:
            taskParamMap['skipScout'] = True
        if not app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap[
                'nMaxFilesPerJob'] = job.backend.requirements.maxNFilesPerJob
        if job.backend.requirements.disableAutoRetry:
            taskParamMap['disableAutoRetry'] = 1
        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL)
        if matchURL != None:
            taskParamMap['sourceURL'] = matchURL.group(1)

        # dataset names
        outDatasetName = job.outputdata.datasetname
        logDatasetName = re.sub('/$', '.log/', job.outputdata.datasetname)
        # log
        taskParamMap['log'] = {
            'dataset': logDatasetName,
            'container': logDatasetName,
            'type': 'template',
            'param_type': 'log',
            'value': '{0}.${{SN}}.log.tgz'.format(logDatasetName[:-1])
        }
        # job parameters
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['jobParameters'] = [
                {
                    'type': 'constant',
                    'value': ' --sourceURL ${SURL}',
                },
            ]
        else:
            taskParamMap['jobParameters'] = [
                {
                    'type': 'constant',
                    'value': '-j "" --sourceURL ${SURL}',
                },
            ]

        taskParamMap['jobParameters'] += [
            {
                'type': 'constant',
                'value': '-r {0}'.format(self.rundirectory),
            },
        ]

        # output
        # output files
        outMap = {}
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            outMap, tmpParamList = AthenaUtils.convertConfToOutput(
                self.runConfig,
                self.extOutFile,
                job.outputdata.datasetname,
                destination=job.outputdata.location)
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '-o "%s" ' % outMap
                },
            ]
            taskParamMap['jobParameters'] += tmpParamList

        else:
            if job.outputdata.outputdata:
                for tmpLFN in job.outputdata.outputdata:
                    if len(job.outputdata.datasetname.split('.')) > 2:
                        lfn = '{0}.{1}'.format(
                            *job.outputdata.datasetname.split('.')[:2])
                    else:
                        lfn = job.outputdata.datasetname[:-1]
                    lfn += '.$JOBSETID._${{SN/P}}.{0}'.format(tmpLFN)
                    dataset = '{0}_{1}/'.format(
                        job.outputdata.datasetname[:-1], tmpLFN)
                    taskParamMap[
                        'jobParameters'] += MiscUtils.makeJediJobParam(
                            lfn,
                            dataset,
                            'output',
                            hidden=True,
                            destination=job.outputdata.location)
                    outMap[tmpLFN] = lfn

                taskParamMap['jobParameters'] += [
                    {
                        'type': 'constant',
                        'value': '-o "{0}"'.format(str(outMap)),
                    },
                ]

        if app.atlas_exetype in ["ATHENA"]:
            # jobO parameter
            tmpJobO = self.job_options
            # replace full-path jobOs
            for tmpFullName, tmpLocalName in AthenaUtils.fullPathJobOs.iteritems(
            ):
                tmpJobO = re.sub(tmpFullName, tmpLocalName, tmpJobO)
            # modify one-liner for G4 random seeds
            if self.runConfig.other.G4RandomSeeds > 0:
                if app.options != '':
                    tmpJobO = re.sub('-c "%s" ' % app.options,
                                     '-c "%s;from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' \
                                         % app.options,tmpJobO)
                else:
                    tmpJobO = '-c "from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" '
                dictItem = {
                    'type': 'template',
                    'param_type': 'number',
                    'value': '${RNDMSEED}',
                    'hidden': True,
                    'offset': self.runConfig.other.G4RandomSeeds,
                }
                taskParamMap['jobParameters'] += [dictItem]
        elif app.atlas_exetype in ["TRF"]:
            # replace parameters for TRF
            tmpJobO = self.job_options
            # output : basenames are in outMap['IROOT'] trough extOutFile
            tmpOutMap = []
            for tmpName, tmpLFN in outMap['IROOT']:
                tmpJobO = tmpJobO.replace('%OUT.' + tmpName, tmpName)
            # replace DBR
            tmpJobO = re.sub('%DB=[^ \'\";]+', '${DBR}', tmpJobO)

        if app.atlas_exetype in ["TRF"]:
            taskParamMap['useLocalIO'] = 1

        # build
        if job.backend.nobuild:
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value':
                    '-a {0}'.format(os.path.basename(self.inputsandbox)),
                },
            ]
        else:
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '-l ${LIB}',
                },
            ]

        #
        # input
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.backend.requirements.nFilesPerJob > 0 and job.inputdata.number_of_files == 0 and job.backend.requirements.split > 0:
                job.inputdata.number_of_files = job.backend.requirements.nFilesPerJob * job.backend.requirements.split

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.number_of_files != 0:
            taskParamMap['nFiles'] = job.inputdata.number_of_files
        elif job.backend.requirements.nFilesPerJob > 0 and job.backend.requirements.split > 0:
            # pathena does this for some reason even if there is no input files
            taskParamMap[
                'nFiles'] = job.backend.requirements.nFilesPerJob * job.backend.requirements.split
        if job.backend.requirements.nFilesPerJob > 0:
            taskParamMap[
                'nFilesPerJob'] = job.backend.requirements.nFilesPerJob

        if job.backend.requirements.nEventsPerFile > 0:
            taskParamMap[
                'nEventsPerFile'] = job.backend.requirements.nEventsPerFile

        if not job.backend.requirements.nGBPerJob in [0, 'MAX']:
            try:
                if job.backend.requirements.nGBPerJob != 'MAX':
                    job.backend.requirments.nGBPerJob = int(
                        job.backend.requirements.nGBPerJob)
            except:
                logger.error("nGBPerJob must be an integer or MAX")
            # check negative
            if job.backend.requirements.nGBPerJob <= 0:
                logger.error("nGBPerJob must be positive")

            # don't set MAX since it is the defalt on the server side
            if not job.backend.requirements.nGBPerJob in [-1, 'MAX']:
                taskParamMap['nGBPerJob'] = job.backend.requirements.nGBPerJob

        if app.atlas_exetype in ["ATHENA", "TRF"]:
            inputMap = {}
            if job.inputdata and job.inputdata._name == 'DQ2Dataset':
                tmpDict = {
                    'type': 'template',
                    'param_type': 'input',
                    'value': '-i "${IN/T}"',
                    'dataset': ','.join(job.inputdata.dataset),
                    'expand': True,
                    'exclude': '\.log\.tgz(\.\d+)*$',
                }
                #if options.inputType != '':
                #    tmpDict['include'] = options.inputType
                taskParamMap['jobParameters'].append(tmpDict)
                taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset)
                inputMap['IN'] = ','.join(job.inputdata.dataset)
            else:
                # no input
                taskParamMap['noInput'] = True
                if job.backend.requirements.split > 0:
                    taskParamMap['nEvents'] = job.backend.requirements.split
                else:
                    taskParamMap['nEvents'] = 1
                taskParamMap['nEventsPerJob'] = 1
                taskParamMap['jobParameters'] += [
                    {
                        'type': 'constant',
                        'value': '-i "[]"',
                    },
                ]
        else:
            if job.inputdata and job.inputdata._name == 'DQ2Dataset':
                tmpDict = {
                    'type': 'template',
                    'param_type': 'input',
                    'value': '-i "${IN/T}"',
                    'dataset': ','.join(job.inputdata.dataset),
                    'expand': True,
                    'exclude': '\.log\.tgz(\.\d+)*$',
                }
                #if options.nSkipFiles != 0:
                #    tmpDict['offset'] = options.nSkipFiles
                taskParamMap['jobParameters'].append(tmpDict)
                taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset)
            else:
                # no input
                taskParamMap['noInput'] = True
                if job.backend.requirements.split > 0:
                    taskParamMap['nEvents'] = job.backend.requirements.split
                else:
                    taskParamMap['nEvents'] = 1
                taskParamMap['nEventsPerJob'] = 1

        # param for DBR
        if self.dbrelease != '':
            dbrDS = self.dbrelease.split(':')[0]
            # change LATEST to DBR_LATEST
            if dbrDS == 'LATEST':
                dbrDS = 'DBR_LATEST'
            dictItem = {
                'type': 'template',
                'param_type': 'input',
                'value': '--dbrFile=${DBR}',
                'dataset': dbrDS,
            }
            taskParamMap['jobParameters'] += [dictItem]
        # no expansion
        #if options.notExpandDBR:
        #dictItem = {'type':'constant',
        #            'value':'--noExpandDBR',
        #            }
        #taskParamMap['jobParameters'] += [dictItem]

        # secondary FIXME disabled
        self.secondaryDSs = {}
        if self.secondaryDSs != {}:
            inMap = {}
            streamNames = []
            for tmpDsName, tmpMap in self.secondaryDSs.iteritems():
                # make template item
                streamName = tmpMap['streamName']
                dictItem = MiscUtils.makeJediJobParam(
                    '${' + streamName + '}',
                    tmpDsName,
                    'input',
                    hidden=True,
                    expand=True,
                    include=tmpMap['pattern'],
                    offset=tmpMap['nSkip'],
                    nFilesPerJob=tmpMap['nFiles'])
                taskParamMap['jobParameters'] += dictItem
                inMap[streamName] = 'tmp_' + streamName
                streamNames.append(streamName)
            # make constant item
            strInMap = str(inMap)
            # set placeholders
            for streamName in streamNames:
                strInMap = strInMap.replace("'tmp_" + streamName + "'",
                                            '${' + streamName + '/T}')
            dictItem = {
                'type': 'constant',
                'value': '--inMap "%s"' % strInMap,
            }
            taskParamMap['jobParameters'] += [dictItem]

        # misc
        jobParameters = ''
        # use Athena packages
        if app.atlas_exetype == 'ARES' or (app.atlas_exetype
                                           in ['PYARA', 'ROOT', 'EXE']
                                           and app.useAthenaPackages):
            jobParameters += "--useAthenaPackages "

        # use RootCore
        if app.useRootCore or app.useRootCoreNoBuild:
            jobParameters += "--useRootCore "

        # use mana
        if app.useMana:
            jobParameters += "--useMana "
            if app.atlas_release != "":
                jobParameters += "--manaVer %s " % app.atlas_release
        # root
        if app.atlas_exetype in ['PYARA', 'ROOT', 'EXE'
                                 ] and job.backend.requirements.rootver != '':
            rootver = re.sub('/', '.', job.backend.requirements.rootver)
            jobParameters += "--rootVer %s " % rootver

        # write input to txt
        #if options.writeInputToTxt != '':
        #    jobParameters += "--writeInputToTxt %s " % options.writeInputToTxt
        # debug parameters
        #if options.queueData != '':
        #    jobParameters += "--overwriteQueuedata=%s " % options.queueData
        # JEM
        #if options.enableJEM:
        #    jobParameters += "--enable-jem "
        #    if options.configJEM != '':
        #        jobParameters += "--jem-config %s " % options.configJEM

        # set task param
        if jobParameters != '':
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': jobParameters,
                },
            ]

        # force stage-in
        if job.backend.accessmode == "LocalIO":
            taskParamMap['useLocalIO'] = 1

        # set jobO parameter
        if app.atlas_exetype in ["ATHENA", "TRF"]:
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '-j "',
                    'padding': False,
                },
            ]
            taskParamMap[
                'jobParameters'] += PsubUtils.convertParamStrToJediParam(
                    tmpJobO, inputMap, job.outputdata.datasetname[:-1], True,
                    False)
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '"',
                },
            ]

        else:
            taskParamMap['jobParameters'] += [
                {
                    'type': 'constant',
                    'value': '-p "{0}"'.format(urllib.quote(self.job_options)),
                },
            ]

        # build step
        if not job.backend.nobuild:
            jobParameters = '-i ${IN} -o ${OUT} --sourceURL ${SURL} '

            if job.backend.bexec != '':
                jobParameters += ' --bexec "%s" ' % urllib.quote(
                    job.backend.bexec)

            if app.atlas_exetype == 'ARES' or (app.atlas_exetype
                                               in ['PYARA', 'ROOT', 'EXE']
                                               and app.useAthenaPackages):
                # use Athena packages
                jobParameters += "--useAthenaPackages "
            # use RootCore
            if app.useRootCore or app.useRootCoreNoBuild:
                jobParameters += "--useRootCore "

            # run directory
            if app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']:
                jobParameters += '-r {0} '.format(self.rundirectory)

            # no compile
            #if options.noCompile:
            #    jobParameters += "--noCompile "
            # use mana
            if app.useMana:
                jobParameters += "--useMana "
                if app.atlas_release != "":
                    jobParameters += "--manaVer %s " % app.atlas_release

            # root
            if app.atlas_exetype in [
                    'PYARA', 'ROOT', 'EXE'
            ] and job.backend.requirements.rootver != '':
                rootver = re.sub('/', '.', job.backend.requirements.rootver)
                jobParameters += "--rootVer %s " % rootver

            # cmt config
            if app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']:
                if not app.atlas_cmtconfig in ['', 'NULL', None]:
                    jobParameters += " --cmtConfig %s " % app.atlas_cmtconfig

            #cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig)
            #if cmtConfig:
            #    jobParameters += "--cmtConfig %s " % cmtConfig
            # debug parameters
            #if options.queueData != '':
            #    jobParameters += "--overwriteQueuedata=%s " % options.queueData
            # set task param
            taskParamMap['buildSpec'] = {
                'prodSourceLabel': 'panda',
                'archiveName': os.path.basename(self.inputsandbox),
                'jobParameters': jobParameters,
            }

        # enable merging
        if job.backend.requirements.enableMerge:
            jobParameters = '-r {0} '.format(self.rundirectory)
            if 'exec' in job.backend.requirements.configMerge and job.backend.requirements.configMerge[
                    'exec'] != '':
                jobParameters += '-j "{0}" '.format(
                    job.backend.requirements.configMerge['exec'])
            if not job.backend.nobuild:
                jobParameters += '-l ${LIB} '
            else:
                jobParameters += '-a {0} '.format(
                    os.path.basename(self.inputsandbox))
                jobParameters += "--sourceURL ${SURL} "
            jobParameters += '${TRN_OUTPUT:OUTPUT} ${TRN_LOG:LOG}'
            taskParamMap['mergeSpec'] = {}
            taskParamMap['mergeSpec']['useLocalIO'] = 1
            taskParamMap['mergeSpec']['jobParameters'] = jobParameters
            taskParamMap['mergeOutput'] = True

        # Selected by Jedi
        #if not app.atlas_exetype in ['PYARA','ROOT','EXE']:
        #    taskParamMap['transPath'] = 'http://atlpan.web.cern.ch/atlpan/runAthena-00-00-12'

        logger.debug(taskParamMap)

        # upload sources
        if self.inputsandbox and not job.backend.libds:
            uploadSources(os.path.dirname(self.inputsandbox),
                          os.path.basename(self.inputsandbox))

            if not self.inputsandbox == tmp_user_area_name:
                logger.info('Removing source tarball %s ...' %
                            self.inputsandbox)
                os.remove(self.inputsandbox)

        return taskParamMap
	def checkGridProxy(self):
		self.gridPass, self.gridVoms = PsubUtils.checkGridProxy(higgs_hacker_conf.GRID_PSWD, False, False)
Example #19
0
 def sync(self):
     # get logger
     tmpLog = PLogger.getPandaLogger()
     tmpLog.info("Synchronizing local repository ...")
     # check proxy
     self.gridPassPhrase,self.vomsFQAN = PsubUtils.checkGridProxy(
             self.gridPassPhrase,
             False,
             self.verbose,
             useCache=True)
     # get nickname
     nickName = PsubUtils.getNickname()
     # set Rucio accounting
     PsubUtils.setRucioAccount(nickName,'pbook',True)
     # get JobIDs in local repository
     localJobIDs = PdbUtils.getListOfJobIDs()
     # get recent JobIDs from panda server
     syncTimeRaw = datetime.datetime.utcnow()
     syncTime = syncTimeRaw.strftime('%Y-%m-%d %H:%M:%S')
     # set sync time for the first attempt
     bookConf = BookConfig.getConfig()
     if self.restoreDB:
         # reset last_synctime to restore database 
         bookConf.last_synctime = ''
     # disable
     self.restoreDB = False
     tmpLog.info("It may take several minutes to restore local repository ...")
     if bookConf.last_synctime == '':
         bookConf.last_synctime = datetime.datetime.utcnow()-datetime.timedelta(days=180)
         bookConf.last_synctime = bookConf.last_synctime.strftime('%Y-%m-%d %H:%M:%S')
     maxTaskID = None
     while True:
         status, jediTaskDicts = Client.getJobIDsJediTasksInTimeRange(bookConf.last_synctime,
                                                                      minTaskID=maxTaskID,
                                                                      verbose=self.verbose)
         if status != 0:
             tmpLog.error("Failed to get tasks from panda server")
             return
         if len(jediTaskDicts) == 0:
             break
         tmpLog.info("Got %s tasks to be updated" % len(jediTaskDicts))
         # insert if missing
         for remoteJobID in jediTaskDicts.keys():
             taskID = jediTaskDicts[remoteJobID]['jediTaskID']
             # get max
             if maxTaskID is None or taskID > maxTaskID:
                 maxTaskID = taskID
             # check local status
             job = None
             if remoteJobID in localJobIDs:
                 # get job info from local repository
                 job = PdbUtils.readJobDB(remoteJobID, self.verbose)
                 # skip if frozen
                 if job.dbStatus == 'frozen':
                     continue
             tmpLog.info("Updating taskID=%s ..." % taskID)
             # convert JEDI task
             localJob = PdbUtils.convertJTtoD(jediTaskDicts[remoteJobID],job)
             # update database
             if not remoteJobID in localJobIDs:
                 # insert to DB
                 try:
                     PdbUtils.insertJobDB(localJob,self.verbose)
                 except:
                     tmpLog.error("Failed to insert taskID=%s to local repository" % taskID)
                     return
             else:
                 # update
                 try:
                     PdbUtils.updateJobDB(localJob,self.verbose,syncTimeRaw)
                 except:
                     tmpLog.error("Failed to update local repository for taskID=%s" % taskID)
                     return
     # update sync time
     bookConf = BookConfig.getConfig()
     bookConf.last_synctime = syncTime
     BookConfig.updateConfig(bookConf)
     self.updateTaskJobsetMap()
     tmpLog.info("Synchronization Completed")
Example #20
0
    def sync(self):
        # get logger
        tmpLog = PLogger.getPandaLogger()
        tmpLog.info("Synchronizing local repository ...")
        # check proxy
        self.gridPassPhrase, self.vomsFQAN = PsubUtils.checkGridProxy(
            self.gridPassPhrase, False, self.verbose)
        # get JobIDs in local repository
        localJobIDs = PdbUtils.getListOfJobIDs()
        # get recent JobIDs from panda server
        syncTime = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
        # set sync time for the first attempt
        bookConf = BookConfig.getConfig()
        if self.restoreDB:
            # reset last_synctime to restore database
            bookConf.last_synctime = ''
            # disable
            self.restoreDB = False
            tmpLog.info(
                "It may take several minutes to restore local repository ...")
        if bookConf.last_synctime == '':
            bookConf.last_synctime = datetime.datetime.utcnow(
            ) - datetime.timedelta(days=180)
            bookConf.last_synctime = bookConf.last_synctime.strftime(
                '%Y-%m-%d %H:%M:%S')
        status, remoteJobIDs = Client.getJobIDsInTimeRange(
            bookConf.last_synctime, verbose=self.verbose)
        if status != 0:
            tmpLog.error("Failed to get JobIDs from panda server")
            return
        tmpLog.info("Got %s jobs to be updated" % len(remoteJobIDs))
        # insert if missing
        for remoteJobID in remoteJobIDs:
            # check local status
            job = None
            if remoteJobID in localJobIDs:
                # get job info from local repository
                job = PdbUtils.readJobDB(remoteJobID, self.verbose)
                # skip if frozen
                if job.dbStatus == 'frozen':
                    continue
            tmpLog.info("Updating JobID=%s ..." % remoteJobID)
            # get PandaIDs
            status, pandaIDstatus = Client.getPandIDsWithJobID(
                remoteJobID, verbose=self.verbose)
            if status != 0:
                tmpLog.error("Failed to get PandaIDs for %s" % remoteJobID)
                return
            pandaIDs = pandaIDstatus.keys()
            pandaIDs.sort()
            # get full JobSpec
            pandaJobs = []
            pandaFileInfo = {}
            pandaJobForSiteID = None
            if job == None:
                tmpIDs = [pandaIDs[0], pandaIDs[-1]]
                status, pandaJobs = Client.getFullJobStatus(
                    tmpIDs, verbose=self.verbose)
                if status != 0:
                    tmpLog.error("Failed to get PandaJobs for %s" %
                                 remoteJobID)
                    return
# get slimmed file info
                status, pandaFileInfo = Client.getSlimmedFileInfoPandaIDs(
                    pandaIDs, verbose=self.verbose)
                if status != 0:
                    tmpLog.error("Failed to get file info  for %s" %
                                 remoteJobID)
                    return
            else:
                # get one job to set computingSite which may have changed due to rebrokerage
                status, tmpPandaJobs = Client.getFullJobStatus(
                    [pandaIDs[0]], verbose=self.verbose)
                if status != 0:
                    tmpLog.error("Failed to get PandaJobs for %s" %
                                 remoteJobID)
                    return
                pandaJobForSiteID = tmpPandaJobs[0]
            # convert to local job spec
            localJob = PdbUtils.convertPtoD(pandaJobs, pandaIDstatus, job,
                                            pandaFileInfo, pandaJobForSiteID)
            # update database
            if not remoteJobID in localJobIDs:
                # insert to DB
                try:
                    PdbUtils.insertJobDB(localJob, self.verbose)
                except:
                    tmpLog.error(
                        "Failed to insert JobID=%s to local repository" %
                        remoteJobID)
                    return
                # set retryID
                if not localJob.provenanceID in [0, '0']:
                    try:
                        PdbUtils.setRetryID(localJob, self.verbose)
                    except:
                        tmpLog.error(
                            "Failed to set retryID for JobID=%s in local repository"
                            % remoteJobID)
                        return
            else:
                # update
                try:
                    PdbUtils.updateJobDB(localJob, self.verbose)
                except:
                    tmpLog.error(
                        "Failed to update local repository for JobID=%s" %
                        remoteJobID)
                    return
        # update sync time
        bookConf = BookConfig.getConfig()
        bookConf.last_synctime = syncTime
        BookConfig.updateConfig(bookConf)
        tmpLog.info("Synchronization Completed")
Example #21
0
 def kill(self,JobID,useJobsetID=False):
     # get logger
     tmpLog = PLogger.getPandaLogger()
     # check proxy
     self.gridPassPhrase,self.vomsFQAN = PsubUtils.checkGridProxy(
             self.gridPassPhrase,
             False,
             self.verbose,
             useCache=True)
     # force update just in case
     self.status(JobID,True)
     # get jobset
     jobList = self.getJobIDsWithSetID(JobID)
     if jobList == None:
         # works only for jobsetID
         if useJobsetID:
             return
         # works with jobID
         jobList = [JobID]
     else:
         tmpMsg = "ID=%s is composed of JobID=" % JobID
         for tmpJobID in jobList:
             tmpMsg += '%s,' % tmpJobID
         tmpMsg = tmpMsg[:-1]
         tmpLog.info(tmpMsg)
     for tmpJobID in jobList:    
         # get job info from local repository
         job = self.getJobInfo(tmpJobID)
         if job == None:
             tmpLog.warning("JobID=%s not found in local repository. Synchronization may be needed" % tmpJobID)            
             continue
         # skip frozen job
         if job.dbStatus == 'frozen':
             tmpLog.info('All subJobs in JobID=%s already finished/failed' % tmpJobID)
             continue
         if not job.isJEDI():
             # get PandaID list
             killJobs = job.PandaID.split(',')
             # kill
             tmpLog.info('Sending kill command ...')
             status,output = Client.killJobs(killJobs,self.verbose)
             if status != 0:
                 tmpLog.error(output)
                 tmpLog.error("Failed to kill JobID=%s" % tmpJobID)
                 return False
             # update database
             job.commandToPilot = 'tobekilled'
             # update DB
             try:
                 PdbUtils.updateJobDB(job,self.verbose)
             except:
                 tmpLog.error("Failed to update local repository for JobID=%s" % tmpJobID)
                 return False
         else:
             # kill JEDI task
             tmpLog.info('Sending killTask command ...')
             status,output = Client.killTask(job.jediTaskID,self.verbose)
             # communication error
             if status != 0:
                 tmpLog.error(output)
                 tmpLog.error("Failed to kill JobID=%s" % tmpJobID)
                 return False
             tmpStat,tmpDiag = output
             if not tmpStat:
                 tmpLog.error(tmpDiag)
                 tmpLog.error("Failed to kill JobID=%s" % tmpJobID)
                 return False
             tmpLog.info(tmpDiag)
         # done
         if job.isJEDI():
             tmpLog.info('Done. TaskID=%s will be killed in 30min' % job.jediTaskID)
         else:
             tmpLog.info('Done. JobID=%s will be killed in 30min' % tmpJobID)
     return True
Example #22
0
    def retry(self,
              JobsetID,
              newSite=False,
              newOpts={},
              noSubmit=False,
              ignoreDuplication=True):
        # get logger
        tmpLog = PLogger.getPandaLogger()
        # check proxy
        self.gridPassPhrase, self.vomsFQAN = PsubUtils.checkGridProxy(
            self.gridPassPhrase, False, self.verbose)
        # get jobset
        newJobsetID = -1
        jobList = self.getJobIDsWithSetID(JobsetID)
        if jobList == None:
            isJobset = False
            jobList = [JobsetID]
        else:
            isJobset = True
            tmpMsg = "JobsetID=%s is composed of JobID=" % JobsetID
            for tmpJobID in jobList:
                tmpMsg += '%s,' % tmpJobID
            tmpMsg = tmpMsg[:-1]
            tmpLog.info(tmpMsg)
        for JobID in jobList:
            # get job info from local repository
            localJob = self.getJobInfo(JobID)
            if localJob == None:
                tmpLog.warning(
                    "JobID=%s not found in local repository. Synchronization may be needed"
                    % JobID)
                return None
            # skip running job
            if localJob.dbStatus != 'frozen':
                tmpLog.warning('Cannot retry running jobs')
                if isJobset:
                    continue
                else:
                    return
            # skip already retried
            if localJob.retryID != '0':
                if isJobset:
                    tmpLog.info('Skip JobID=%s since already retried by JobID=%s JobsetID=%s' % \
                                (JobID,localJob.retryID,localJob.retryJobsetID))
                    continue
                else:
                    tmpLog.warning('This job was already retried by JobID=%s' %
                                   localJob.retryID)
                    return
            # check status of buildJob
            if not localJob.buildStatus in ['', 'finished']:
                tmpMsgStr = 'Cannot retry since status of buildJob %s is %s (!= finished). ' \
                            % (localJob.PandaID.split(',')[0],localJob.buildStatus)
                tmpMsgStr += 'Please execute %s with the same input/output datasets (or containers). ' % localJob.jobType
                tmpMsgStr += 'It will run only on failed/cancelled/unused input files '
                tmpMsgStr += 'and append output files to the output dataset container'
                tmpLog.warning(tmpMsgStr)
                if isJobset:
                    continue
                else:
                    return
            # check opts for newSite
            if newSite:
                if not localJob.outDS.endswith('/') and not newOpts.has_key(
                        'outDS') and not newOpts.has_key('--outDS'):
                    tmpLog.warning(
                        'You need to specify --outDS in newOpts to retry at new site unless container is used as output'
                    )
                    return
            # get list of failed jobs
            pandaIDs = localJob.PandaID.split(',')
            statusList = localJob.jobStatus.split(',')
            jobList = []
            for idx in range(len(pandaIDs)):
                # check status
                if not statusList[idx] in ['failed', 'cancelled']:
                    continue
                jobList.append(pandaIDs[idx])
            # no failed job
            if jobList == []:
                if isJobset:
                    tmpLog.info('Skip JobID=%s since no failed jobs' % JobID)
                    continue
                else:
                    tmpLog.info('No failed jobs to be retried for JobID=%s' %
                                JobID)
                    return
            # get full job spec
            tmpLog.info("Retrying JobID=%s ..." % JobID)
            tmpLog.info("Getting job info")
            idxJL = 0
            nQuery = 500
            pandaJobs = []
            while idxJL < len(jobList):
                # avoid burst query
                tmpLog.info(" %5s/%s" % (idxJL, len(jobList)))
                status, oTmp = Client.getFullJobStatus(jobList[idxJL:idxJL +
                                                               nQuery],
                                                       verbose=self.verbose)
                if status != 0:
                    tmpLog.error(status)
                    tmpLog.error(oTmp)
                    tmpLog.error("Cannot get job info from Panda server")
                    return
                pandaJobs += oTmp
                idxJL += nQuery
                time.sleep(1)
            tmpLog.info(" %5s/%s" % (len(jobList), len(jobList)))
            # jobdefID
            newJobdefID = PsubUtils.readJobDefID()
            # reset some parameters
            retryJobs = []
            retrySite = None
            retryElement = None
            retryDestSE = None
            outDsName = None
            shadowList = []
            for idx in range(len(jobList)):
                job = pandaJobs[idx]
                # skip exired
                if job == None:
                    tmpLog.warning("Could not retry jobs older than 30 days : JobID=%s (PandaID=%s) expired" \
                                   % (JobID,jobList[idxJob]))
                    return
                # get shadow list
                if (not ignoreDuplication
                    ) and outDsName == None and job.prodSourceLabel == 'user':
                    # look for dataset for log since it doesn't have suffix even when --individualOutDS is used
                    for tmpFile in job.Files:
                        if tmpFile.type == 'log':
                            outDsName = tmpFile.dataset
                            break
                    # output dataset was not found
                    if outDsName == None:
                        tmpLog.error("Could not get output dataset name for JobID=%s (PandaID=%s)" \
                                     % (JobID,job.PandaID))
                        return
                    # get files in shadow
                    if outDsName.endswith('/'):
                        shadowList = Client.getFilesInShadowDataset(
                            outDsName, Client.suffixShadow, self.verbose)
                    else:
                        # disable duplication check mainly for old overlay jobs since non-signal files are wrongly skipped
                        #shadowList = Client.getFilesInShadowDatasetOld(outDsName,Client.suffixShadow,self.verbose)
                        pass
                # unify sitename
                if retrySite == None:
                    retrySite = job.computingSite
                    retryElement = job.computingElement
                    retryDestSE = job.destinationSE
                # reset
                job.jobStatus = None
                job.commandToPilot = None
                job.startTime = None
                job.endTime = None
                job.attemptNr = 1 + job.attemptNr
                for attr in job._attributes:
                    if attr.endswith('ErrorCode') or attr.endswith(
                            'ErrorDiag'):
                        setattr(job, attr, None)
                job.transExitCode = None
                job.computingSite = retrySite
                job.computingElement = retryElement
                job.destinationSE = retryDestSE
                job.dispatchDBlock = None
                job.jobExecutionID = JobID
                job.jobDefinitionID = newJobdefID
                job.parentID = job.PandaID
                if job.jobsetID != ['NULL', None, -1]:
                    job.sourceSite = job.jobsetID
                    job.jobsetID = newJobsetID
                skipInputList = []
                numUsedFiles = 0
                for file in job.Files:
                    file.rowID = None
                    if file.type == 'input':
                        # protection against wrong sync which doesn't update buildStatus correctly
                        if file.lfn.endswith(
                                '.lib.tgz') and file.GUID == 'NULL':
                            tmpLog.warning('GUID for %s is unknown. Cannot retry when corresponding buildJob failed' \
                                           % file.lfn)
                            return
                        file.status = 'ready'
                        # check with shadow for non lib.tgz/DBR
                        tmpDbrMatch = re.search('^DBRelease-.*\.tar\.gz$',
                                                file.lfn)
                        if tmpDbrMatch == None and not file.lfn.endswith(
                                '.lib.tgz'):
                            if file.lfn in shadowList:
                                skipInputList.append(file)
                            else:
                                numUsedFiles += 1
                    elif file.type in ('output', 'log'):
                        file.destinationSE = retryDestSE
                        file.destinationDBlock = re.sub(
                            '_sub\d+$', '', file.destinationDBlock)
                        # add retry num
                        if file.dataset.endswith('/'):
                            retryMatch = re.search('_r(\d+)$',
                                                   file.destinationDBlock)
                            if retryMatch == None:
                                file.destinationDBlock += '_r1'
                            else:
                                tmpDestinationDBlock = re.sub(
                                    '_r(\d+)$', '', file.destinationDBlock)
                                file.destinationDBlock = tmpDestinationDBlock + '_r%d' % (
                                    1 + int(retryMatch.group(1)))
                        # add attempt nr
                        oldName = file.lfn
                        file.lfn = re.sub("\.\d+$", "", file.lfn)
                        file.lfn = "%s.%d" % (file.lfn, job.attemptNr)
                        newName = file.lfn
                        # modify jobParameters
                        job.jobParameters = re.sub("'%s'" % oldName,
                                                   "'%s'" % newName,
                                                   job.jobParameters)
                        # look for output in trf
                        oldGenelicName = re.sub('\.\d+$', '', oldName)
                        match = re.search(oldGenelicName + '(\.\d+)*(%20|")',
                                          job.jobParameters)
                        if match != None:
                            job.jobParameters = job.jobParameters.replace(
                                match.group(0), newName + match.group(2))
                # all files are used by others
                if numUsedFiles == 0 and skipInputList != []:
                    continue
                # remove skipped files
                strSkipped = ''
                for tmpFile in skipInputList:
                    strSkipped += '%s,' % tmpFile.lfn
                    job.Files.remove(tmpFile)
                strSkipped = strSkipped[:-1]
                # modify jobpar
                if strSkipped != '':
                    optionToSkipFiles = '--skipInputByRetry'
                    if not optionToSkipFiles in job.jobParameters:
                        # just append
                        job.jobParameters += "%s=%s " % (optionToSkipFiles,
                                                         strSkipped)
                    else:
                        # extract already skipped files
                        tmpMatch = re.search("(%s=[^ ]+)", job.jobParameters)
                        if tmpMatch == None:
                            tmpLog.error("Failed to extract arg of %s for PandaID=%s" \
                                         % (optionToSkipFiles,job.PandaID))
                            return
                        # replace
                        job.jobParameters = re.sub(
                            tmpMatch.group(1),
                            "%s,%s" % (tmpMatch.group(1), optionToSkipFiles),
                            job.jobParameters)
                if self.verbose:
                    tmpLog.debug(job.jobParameters)
                # append
                retryJobs.append(job)
            # all input files were or are being used by other jobs
            if retryJobs == []:
                tmpLog.info(
                    'All input files were or are being used by other jobs for the same output. No jobs to be retried. If you need to ignore duplication check (e.g., using the same EVNT file for multiple simulation subjobs), set ignoreDuplication=True. i.e. retry(123,ignoreDuplication=True)'
                )
                return
# check voms role
            if not retryJobs[0].workingGroup in ['NULL', None, '']:
                # VOMS role was used
                if not "--workingGroup" in job.metadata:
                    # extract voms roles from metadata
                    match = re.search("--voms( |=)[ \"]*([^ \"]+)",
                                      job.metadata)
                    if match != None:
                        vomsRoles = match.group(2)
                    else:
                        vomsRoles = "atlas:/atlas/%s/Role=production" % retryJobs[
                            0].workingGroup
# regenerate proxy with VOMS roles
                    try:
                        tmpLog.info("Checking proxy role to resubmit %s jobs" %
                                    retryJobs[0].workingGroup)
                        self.gridPassPhrase, self.vomsFQAN = PsubUtils.checkGridProxy(
                            self.gridPassPhrase, False, self.verbose,
                            vomsRoles)
                    except:
                        tmpLog.error("Failed to generate a proxy with %s" %
                                     vomsRoles)
                        return
            # check runtime env for new site submission
            if newSite:
                if retryJobs[
                        0].processingType == 'pathena' or '--useAthenaPackages' in retryJobs[
                            0].metadata:
                    from pandatools import AthenaUtils
                    stA, retA = AthenaUtils.getAthenaVer()
                    if not stA:
                        tmpLog.error(
                            "Failed to get Athena rel/cache version in current runtime env"
                        )
                        return
                    athenaVer = retA['athenaVer']
                    cacheVer = retA['cacheVer']
                    nightVer = retA['nightVer']
                    wrongSetup = False
                    if retryJobs[0].AtlasRelease != 'Atlas-%s' % athenaVer:
                        wrongSetup = True
                        errMsg = "Current Athena version Atlas-%s is inconsitent with the previous submission %s. " % (
                            athenaVer, retryJobs[0].AtlasRelease)
                    elif retryJobs[
                            0].homepackage != 'AnalysisTransforms' + cacheVer + nightVer:
                        wrongSetup = True
                        errMsg = "Current cache version %s is inconsitent with the previous submission. " % cacheVer.replace(
                            '-', '').replace('_', '-')
                    if wrongSetup:
                        errMsg += 'You need to have the same runtime env as before since all job spec need to be re-created to send jobs to a new site. '
                        errMsg += 'Please setup Athena correctly and restart pbook'
                        tmpLog.error(errMsg)
                        return
# test mode
            if noSubmit:
                continue
            # invoke pathena/prun to send job to new site
            if newSite:
                tmpLog.info(
                    "Constrcuting job spec again to be sent to another site ..."
                )
                comStat = PsubUtils.execWithModifiedParams(
                    retryJobs, newOpts, self.verbose)
                if comStat == 0:
                    # update database
                    time.sleep(2)
                    self.sync()
                else:
                    tmpLog.error("Failed to submit jobs to Panda server")
                return
            # register datasets
            tmpOutDsLocation = Client.PandaSites[
                retryJobs[-1].computingSite]['ddm']
            addedDataset = []
            shadowDSname = None
            for tmpFile in retryJobs[-1].Files:
                if tmpFile.type in ['output', 'log'
                                    ] and tmpFile.dataset.endswith('/'):
                    # add shadow
                    if shadowDSname == None and tmpFile.type == 'log':
                        shadowDSname = "%s%s" % (tmpFile.destinationDBlock,
                                                 Client.suffixShadow)
                        Client.addDataset(shadowDSname, self.verbose)
                    # add datasets
                    if not tmpFile.destinationDBlock in addedDataset:
                        # create dataset
                        Client.addDataset(tmpFile.destinationDBlock,
                                          self.verbose,
                                          location=tmpOutDsLocation)
                        # add to container
                        Client.addDatasetsToContainer(
                            tmpFile.dataset, [tmpFile.destinationDBlock],
                            self.verbose)
                        # append
                        addedDataset.append(tmpFile.destinationDBlock)
            # submit
            tmpLog.info("Submitting job ...")
            status, out = Client.submitJobs(retryJobs, verbose=self.verbose)
            if out == None or status != 0:
                tmpLog.error(status)
                tmpLog.error(out)
                tmpLog.error("Failed to submit jobs to Panda server")
                return
            # update database
            pandaIDstatus = {}
            newJobID = None
            for items in out:
                # get newJobID
                if newJobID == None:
                    newJobID = items[1]
                # check PandaID
                PandaID = items[0]
                if PandaID == 'NULL':
                    tmpLog.error(
                        "Panda server returned wrong IDs. It may have a temporary problem"
                    )
                    return
                # set newJobsetID
                if newJobsetID in [None, -1]:
                    newJobsetID = items[2]['jobsetID']
                # dummy statuso
                pandaIDstatus[PandaID] = ('defined', 'NULL')
            # set retry ID
            localJob.retryID = newJobID
            if not newJobsetID in [None, -1, 'NULL']:
                localJob.retryJobsetID = newJobsetID
            try:
                PdbUtils.updateJobDB(localJob, self.verbose)
            except:
                tmpLog.error("Failed to set retryID for JobID=%s" % JobID)
                return
            # set new paramers
            newLocalJob = PdbUtils.convertPtoD(retryJobs, pandaIDstatus)
            newLocalJob.JobID = newJobID
            if not newJobsetID in [None, -1, 'NULL']:
                newLocalJob.groupID = newJobsetID
            newLocalJob.creationTime = datetime.datetime.utcnow()
            # insert to DB
            try:
                PdbUtils.insertJobDB(newLocalJob, self.verbose)
            except:
                tmpLog.error("Failed to insert JobID=%s to local repository" %
                             newJobID)
                return
            # write new jobdefID
            PsubUtils.writeJobDefID(newJobID)
            # done
            tmpMsg = 'Done. New JobID=%s' % newJobID
            if not newJobsetID in [None, -1, 'NULL']:
                tmpMsg += " JobsetID=%s" % newJobsetID
            tmpLog.info(tmpMsg)
Example #23
0
 def status(self,JobID,forceUpdate=False):
     # get logger
     tmpLog = PLogger.getPandaLogger()
     # check proxy
     self.gridPassPhrase,self.vomsFQAN = PsubUtils.checkGridProxy(
             self.gridPassPhrase,
             False,
             self.verbose,
             useCache=True)
     # get job info from local repository
     job = self.getJobInfo(JobID)
     if job == None:
         # not found
         return None
     # update if needed
     if job.dbStatus != 'frozen' or forceUpdate:
         if not job.isJEDI():
             tmpLog.info("Getting status for JobID=%s ..." % JobID)
             # get status from Panda server
             status,pandaIDstatus = Client.getPandIDsWithJobID(JobID,verbose=self.verbose)
             if status != 0:
                 tmpLog.error("Failed to get status for ID=%s" % JobID)
                 return None
             # get one job to set computingSite which may have changed due to rebrokerage
             pandaJob = None
             if pandaIDstatus != {}:
                 tmpPandaIDs = pandaIDstatus.keys()
                 tmpPandaIDs.sort()
                 status,tmpPandaJobs = Client.getFullJobStatus(
                         tmpPandaIDs[:1],
                         verbose=self.verbose)
                 if status != 0:
                     tmpLog.error("Failed to get PandaJobs for %s" % JobID)
                     return None
                 pandaJob = tmpPandaJobs[0]
             # convert to local job spec
             job = PdbUtils.convertPtoD([],pandaIDstatus,job,pandaJobForSiteID=pandaJob)
             # check merge job generation
             status = self.setMergeJobStatus(job,forceUpdate)
             if not status:
                 return None
         else:
             tmpLog.info("Getting status for TaskID=%s ..." % job.jediTaskID)
             # get JEDI task
             status,jediTaskDict = Client.getJediTaskDetails(
                     {'jediTaskID':job.jediTaskID},
                     False,
                     True,
                     verbose=self.verbose)
             if status != 0:
                 tmpLog.error("Failed to get task details for %s" % JobID)
                 return
             # convert JEDI task
             job = PdbUtils.convertJTtoD(jediTaskDict,job)
         # update DB
         try:
             PdbUtils.updateJobDB(job,self.verbose)
         except:
             tmpLog.error("Failed to update local repository for JobID=%s" % JobID)
             return None
         if not job.isJEDI():
             tmpLog.info("Updated JobID=%s" % JobID)                        
         else:
             tmpLog.info("Updated TaskID=%s ..." % job.jediTaskID)
     # return
     return job