Exemple #1
0
 def uploadLog(self, id):
     strMsg = self.dumpToString()
     s, o = Client.uploadLog(strMsg, id)
     if s != 0:
         return "failed to upload log with {0}.".format(s)
     if o.startswith('http'):
         return '<a href="{0}">log</a>'.format(o)
     return o
Exemple #2
0
 def uploadLog(self,id):
     strMsg = self.dumpToString()
     s,o = Client.uploadLog(strMsg,id)
     if s != 0:
         return "failed to upload log with {0}.".format(s)
     if o.startswith('http'):
         return '<a href="{0}">log</a>'.format(o)
     return o
Exemple #3
0
 def doCheck(self,taskSpecList):
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doCheck')
     # return for failure
     retFatal    = self.SC_FATAL,{}
     retTmpError = self.SC_FAILED,{}
     # get list of jediTaskIDs
     taskIdList = []
     taskSpecMap = {}
     for taskSpec in taskSpecList:
         taskIdList.append(taskSpec.jediTaskID)
         taskSpecMap[taskSpec.jediTaskID] = taskSpec
     # check with panda
     tmpLog.debug('check with panda')
     tmpPandaStatus,cloudsInPanda = PandaClient.seeCloudTask(taskIdList)
     if tmpPandaStatus != 0:
         tmpLog.error('failed to see clouds')
         return retTmpError
     # make return map
     retMap = {}
     for tmpTaskID,tmpCoreName in cloudsInPanda.iteritems():
         tmpLog.debug('jediTaskID={0} -> {1}'.format(tmpTaskID,tmpCoreName))
         if not tmpCoreName in ['NULL','',None]:
             taskSpec = taskSpecMap[tmpTaskID]
             if taskSpec.useWorldCloud():
                 # get destinations for WORLD cloud
                 ddmIF = self.ddmIF.getInterface(taskSpec.vo)
                 # get site
                 siteSpec = self.siteMapper.getSite(tmpCoreName)
                 # get nucleus
                 nucleus = siteSpec.pandasite
                 # get output/log datasets
                 tmpStat,tmpDatasetSpecs = self.taskBufferIF.getDatasetsWithJediTaskID_JEDI(tmpTaskID,['output','log'])
                 # get destinations
                 retMap[tmpTaskID] = {'datasets':[],'nucleus':nucleus}
                 for datasetSpec in tmpDatasetSpecs:
                     # skip distributed datasets
                     if DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) != None:
                         continue
                     # get token
                     token = ddmIF.convertTokenToEndpoint(siteSpec.ddm,datasetSpec.storageToken)
                     # use default endpoint
                     if token == None:
                         token = siteSpec.ddm
                     # add origianl token
                     if not datasetSpec.storageToken in ['',None]:
                         token += '/{0}'.format(datasetSpec.storageToken)
                     retMap[tmpTaskID]['datasets'].append({'datasetID':datasetSpec.datasetID,
                                                           'token':'dst:{0}'.format(token),
                                                           'destination':tmpCoreName})
             else:
                 retMap[tmpTaskID] = tmpCoreName
     tmpLog.debug('ret {0}'.format(str(retMap)))
     # return
     tmpLog.debug('done')        
     return self.SC_SUCCEEDED,retMap
 def doCheck(self,taskSpecList):
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doCheck')
     # return for failure
     retFatal    = self.SC_FATAL,{}
     retTmpError = self.SC_FAILED,{}
     # get list of jediTaskIDs
     taskIdList = []
     taskSpecMap = {}
     for taskSpec in taskSpecList:
         taskIdList.append(taskSpec.jediTaskID)
         taskSpecMap[taskSpec.jediTaskID] = taskSpec
     # check with panda
     tmpLog.debug('check with panda')
     tmpPandaStatus,cloudsInPanda = PandaClient.seeCloudTask(taskIdList)
     if tmpPandaStatus != 0:
         tmpLog.error('failed to see clouds')
         return retTmpError
     # make return map
     retMap = {}
     for tmpTaskID,tmpCoreName in cloudsInPanda.iteritems():
         tmpLog.debug('jediTaskID={0} -> {1}'.format(tmpTaskID,tmpCoreName))
         if not tmpCoreName in ['NULL','',None]:
             taskSpec = taskSpecMap[tmpTaskID]
             if taskSpec.useWorldCloud():
                 # get destinations for WORLD cloud
                 ddmIF = self.ddmIF.getInterface(taskSpec.vo)
                 # get site
                 siteSpec = self.siteMapper.getSite(tmpCoreName)
                 # get nucleus
                 nucleus = siteSpec.pandasite
                 # get output/log datasets
                 tmpStat,tmpDatasetSpecs = self.taskBufferIF.getDatasetsWithJediTaskID_JEDI(tmpTaskID,['output','log'])
                 # get destinations
                 retMap[tmpTaskID] = {'datasets':[],'nucleus':nucleus}
                 for datasetSpec in tmpDatasetSpecs:
                     # skip distributed datasets
                     if DataServiceUtils.getDistributedDestination(datasetSpec.storageToken) != None:
                         continue
                     # get token
                     token = ddmIF.convertTokenToEndpoint(siteSpec.ddm,datasetSpec.storageToken)
                     # use default endpoint
                     if token == None:
                         token = siteSpec.ddm
                     # add origianl token
                     if not datasetSpec.storageToken in ['',None]:
                         token += '/{0}'.format(datasetSpec.storageToken)
                     retMap[tmpTaskID]['datasets'].append({'datasetID':datasetSpec.datasetID,
                                                           'token':'dst:{0}'.format(token),
                                                           'destination':tmpCoreName})
             else:
                 retMap[tmpTaskID] = tmpCoreName
     tmpLog.debug('ret {0}'.format(str(retMap)))
     # return
     tmpLog.debug('done')        
     return self.SC_SUCCEEDED,retMap
Exemple #5
0
 def uploadLog(self):
     if self.jediTaskID is None:
         return 'cannot find jediTaskID'
     strMsg = self.logger.dumpToString()
     s, o = Client.uploadLog(strMsg, self.jediTaskID)
     if s != 0:
         return "failed to upload log with {0}.".format(s)
     if o.startswith('http'):
         return '<a href="{0}">log</a>'.format(o)
     return o
 def doCheck(self, taskSpecList):
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug("start doCheck")
     # return for failure
     retFatal = self.SC_FATAL, {}
     retTmpError = self.SC_FAILED, {}
     # get list of jediTaskIDs
     taskIdList = []
     taskSpecMap = {}
     for taskSpec in taskSpecList:
         taskIdList.append(taskSpec.jediTaskID)
         taskSpecMap[taskSpec.jediTaskID] = taskSpec
     # check with panda
     tmpLog.debug("check with panda")
     tmpPandaStatus, cloudsInPanda = PandaClient.seeCloudTask(taskIdList)
     if tmpPandaStatus != 0:
         tmpLog.error("failed to see clouds")
         return retTmpError
     # make return map
     retMap = {}
     for tmpTaskID, tmpCoreName in cloudsInPanda.iteritems():
         tmpLog.debug("jediTaskID={0} -> {1}".format(tmpTaskID, tmpCoreName))
         if not tmpCoreName in ["NULL", "", None]:
             taskSpec = taskSpecMap[tmpTaskID]
             if taskSpec.useWorldCloud():
                 # get destinations for WORLD cloud
                 ddmIF = self.ddmIF.getInterface(taskSpec.vo)
                 # get site
                 siteSpec = self.siteMapper.getSite(tmpCoreName)
                 # get output/log datasets
                 tmpStat, tmpDatasetSpecs = self.taskBufferIF.getDatasetsWithJediTaskID_JEDI(
                     tmpTaskID, ["output", "log"]
                 )
                 # get destinations
                 retMap[tmpTaskID] = []
                 for datasetSpec in tmpDatasetSpecs:
                     token = ddmIF.convertTokenToEndpoint(siteSpec.ddm, datasetSpec.storageToken)
                     # use default endpoint
                     if token == None:
                         token = siteSpec.ddm
                     retMap[tmpTaskID].append(
                         {
                             "datasetID": datasetSpec.datasetID,
                             "token": "dst:{0}".format(token),
                             "destination": tmpCoreName,
                         }
                     )
             else:
                 retMap[tmpTaskID] = tmpCoreName
     tmpLog.debug("ret {0}".format(str(retMap)))
     # return
     tmpLog.debug("done")
     return self.SC_SUCCEEDED, retMap
Exemple #7
0
    def getStatus(self, expectedStates):

        idList = [job['jobID'] for job in self.__jobList]
        _logger.info("%s" % idList)
        status, jobInfoList = Client.getJobStatus(idList)
        _logger.info("%s" % jobInfoList)

        assert status == 0, "Retrieval of job state finished with status: %s" % status

        for job in jobInfoList:
            assert job.jobStatus in expectedStates, "Recently defined job was not in states %s (PandaID: %s jobStatus: %s)" % (
                expectedStates, job.PandaID, job.jobStatus)

        return jobInfoList
Exemple #8
0
 def doCheck(self,taskSpecList):
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doCheck')
     # return for failure
     retFatal    = self.SC_FATAL,{}
     retTmpError = self.SC_FAILED,{}
     # get list of reqIDs wchih are mapped to taskID in Panda
     reqIdTaskIdMap = {}
     for taskSpec in taskSpecList:
         if taskSpec.reqID != None:
             if reqIdTaskIdMap.has_key(taskSpec.reqID):
                 tmpLog.error('reqID={0} is dubplicated in jediTaskID={1},{2}'.format(taskSpec.reqID,
                                                                                      taskSpec.jediTaskID,
                                                                                      reqIdTaskIdMap[taskSpec.reqID]))
             else:
                 reqIdTaskIdMap[taskSpec.reqID] = taskSpec.jediTaskID
                 tmpLog.debug('jediTaskID={0} has reqID={1}'.format(taskSpec.jediTaskID,taskSpec.reqID))
         else:
             tmpLog.error('jediTaskID={0} has undefined reqID'.format(taskSpec.jediTaskID)) 
     # check with panda
     tmpLog.debug('check with panda')
     tmpPandaStatus,cloudsInPanda = PandaClient.seeCloudTask(reqIdTaskIdMap.keys())
     if tmpPandaStatus != 0:
         tmpLog.error('failed to see clouds')
         return retTmpError
     # make return map
     retMap = {}
     for tmpReqID,tmpCloud in cloudsInPanda.iteritems():
         if not tmpCloud in ['NULL','',None]:
             tmpLog.debug('reqID={0} jediTaskID={1} -> {2}'.format(tmpReqID,reqIdTaskIdMap[tmpReqID],tmpCloud))
             """
             # check file availability
             tmpSt = self.findMissingFiles(reqIdTaskIdMap[tmpReqID],tmpCloud)
             if tmpSt != self.SC_SUCCEEDED:
                 tmpLog.error('failed to check file availability for jediTaskID={0}'.format(reqIdTaskIdMap[tmpReqID]))
                 continue
             """    
             retMap[reqIdTaskIdMap[tmpReqID]] = tmpCloud
     tmpLog.debug('ret {0}'.format(str(retMap)))
     # return
     tmpLog.debug('done')        
     return self.SC_SUCCEEDED,retMap
Exemple #9
0
    def generateJobs(self):

        for i in range(self.__nJobs):
            job = self.defineEvgen16Job(i)
            self.__jobList.append({'jobSpec': job, 'jobID': None})

        status, output = Client.submitJobs(
            [job['jobSpec'] for job in self.__jobList]
        )  #Return from submitJobs: ret.append((job.PandaID,job.jobDefinitionID,{'jobsetID':job.jobsetID}))

        assert status == 0, "Submission of jobs finished with status: %s" % status

        assert len(self.__jobList) == len(
            output), "Not all jobs seem to have been submitted properly"

        for job, ids in zip(self.__jobList, output):
            jobID = ids[0]
            job['jobID'] = jobID
            _logger.info("Generated job PandaID = %s" % jobID)

        return
def main(taskBuffer=None, exec_options=None, log_stream=None, args_list=None):
    # options
    parser = argparse.ArgumentParser()
    if taskBuffer:
        parser.add_argument('--ds',action='store',dest='ds',default=None,
                            help='dataset name')
    else:
        parser.add_argument('--ds',action='store',dest='ds',default=None,required=True,
                            help='dataset name')
    parser.add_argument('--files',action='store',dest='files',default=None,
                        help='comma-separated list of lost file names. The list is dedeuced if this option is omitted')
    parser.add_argument('--noChildRetry',action='store_const',const=True,dest='noChildRetry',default=False,
                        help='not retry child tasks')
    parser.add_argument('--resurrectDS',action='store_const',const=True,dest='resurrectDS',default=False,
                        help='resurrect output and log datasets if they were already deleted')
    parser.add_argument('--dryRun',action='store_const',const=True,dest='dryRun',default=False,
                        help='dry run')
    parser.add_argument('--force', action='store_const', const=True, dest='force', default=False,
                        help='force retry even if no lost files')
    parser.add_argument('--reproduceParent', action='store_const', const=True, dest='reproduceParent',
                        default=False, help='reproduce the input files from which the lost files were produced. '
                        'Typically useful to recover merged files when unmerged files were already deleted')
    # parse options
    if taskBuffer:
        if args_list:
            options = parser.parse_args(args_list)
        else:
            options, unknown = parser.parse_known_args()
    else:
        if args_list:
            options = parser.parse_args(args_list)
        else:
            options = parser.parse_args()

    # executed via command-line
    givenTaskID = None
    dn = None
    if taskBuffer is None:
        # instantiate TB
        from pandaserver.taskbuffer.TaskBuffer import taskBuffer
        taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

    else:
        # set options from dict
        if exec_options is None:
            exec_options = {}
        keys = set(vars(options).keys())
        for k in exec_options:
            if k in keys:
                setattr(options, k, exec_options[k])
        if 'jediTaskID' in exec_options:
            givenTaskID = exec_options['jediTaskID']
        if 'userName' in exec_options:
            dn = exec_options['userName']

    ds_files = {}
    if options.files is not None:
        files = options.files.split(',')
        ds_files[options.ds] = files
    else:
        # look for lost files
        if not givenTaskID:
            # get files from rucio
            st, files_rucio = get_files_from_rucio(options.ds, log_stream)
            if st is not True:
                return st, files_rucio
            # get files from panda
            dsName = options.ds.split(':')[-1]
            fd, fo = taskBuffer.querySQLS(
                'SELECT c.lfn FROM ATLAS_PANDA.JEDI_Datasets d,ATLAS_PANDA.JEDI_Dataset_Contents c '
                'WHERE c.jediTaskID=d.jediTaskID AND c.datasetID=d.datasetID AND '
                'd.type IN (:t1,:t2) AND c.status=:s AND d.datasetName=:name ',
                {':s': 'finished', ':t1': 'output', ':t2': 'log', ':name': dsName})
            for tmpLFN, in fo:
                if tmpLFN not in files_rucio:
                    ds_files.setdefault(options.ds, [])
                    ds_files[options.ds].append(tmpLFN)
            # get taskID
            td, to = taskBuffer.querySQLS(
                        'SELECT jediTaskID FROM ATLAS_PANDA.JEDI_Datasets '
                        'WHERE datasetName=:datasetName AND type IN (:t1,:t2) ',
                        {':t1': 'output', ':t2': 'log', ':datasetName': dsName})
            jediTaskID, = to[0]
        else:
            # get dataset names
            dd, do = taskBuffer.querySQLS(
                'SELECT datasetName FROM ATLAS_PANDA.JEDI_Datasets '
                'WHERE jediTaskID=:jediTaskID AND type IN (:t1,:t2) ',
                {':t1': 'output', ':t2': 'log', ':jediTaskID': givenTaskID})
            # get files from rucio
            files_rucio = set()
            for tmpDS, in do:
                st, tmp_files_rucio = get_files_from_rucio(tmpDS, log_stream)
                if st is None:
                    return st, tmp_files_rucio
                # ignore unknown dataset
                if st:
                    files_rucio = files_rucio.union(tmp_files_rucio)
            # get files from rucio
            fd, fo = taskBuffer.querySQLS(
                'SELECT d.datasetName,c.lfn FROM ATLAS_PANDA.JEDI_Datasets d,ATLAS_PANDA.JEDI_Dataset_Contents c '
                'WHERE d.jediTaskID=:jediTaskID AND c.jediTaskID=d.jediTaskID AND c.datasetID=d.datasetID AND '
                'd.type IN (:t1,:t2) AND c.status=:s ',
                {':s': 'finished', ':t1': 'output', ':t2': 'log', ':jediTaskID': givenTaskID})
            for tmpDS, tmpLFN in fo:
                if tmpLFN not in files_rucio:
                    ds_files.setdefault(tmpDS, [])
                    ds_files[tmpDS].append(tmpLFN)
        for tmpDS in ds_files:
            files = ds_files[tmpDS]
            msgStr = '{} has {} lost files -> {}'.format(tmpDS, len(files), ','.join(files))
            if log_stream:
                log_stream.info(msgStr)
            else:
                print(msgStr)

    # no lost files
    if not ds_files and not options.force:
        return True, "No lost files. Use --force to ignore this check"

    # reset file status
    s = False
    for tmpDS in ds_files:
        files = ds_files[tmpDS]
        if dn:
            ts, jediTaskID, lostInputFiles = taskBuffer.resetFileStatusInJEDI(dn, False, tmpDS,
                                                                              files, options.reproduceParent,
                                                                              options.dryRun)
        else:
            ts, jediTaskID, lostInputFiles = taskBuffer.resetFileStatusInJEDI('', True, tmpDS,
                                                                              files, options.reproduceParent,
                                                                              options.dryRun)
        msgStr = 'reset file status for {} in the DB: done with {} for jediTaskID={}'.format(tmpDS, ts, jediTaskID)
        if log_stream:
            log_stream.info(msgStr)
        else:
            print(msgStr)
        s |= ts
        # recover parent
        if options.reproduceParent:
            # reproduce input
            for lostDS in lostInputFiles:
                com_args = ['--ds', lostDS, '--noChildRetry', '--resurrectDS']
                if options.dryRun:
                    com_args.append('--dryRun')
                com_args += ['--files', ','.join(lostInputFiles[lostDS])]
            main(taskBuffer=taskBuffer, log_stream=log_stream, args_list=com_args)

    # go ahead
    if options.dryRun:
        return True, 'Done in the dry-run mode with {}'.format(s)
    if s or options.force:
        if options.resurrectDS:
            sd,so = taskBuffer.querySQLS(
                'SELECT datasetName FROM ATLAS_PANDA.JEDI_Datasets WHERE jediTaskID=:id AND type IN (:t1,:t2)',
                {':id': jediTaskID, ':t1': 'output', ':t2': 'log'})
            rc = RucioClient()
            for datasetName, in so:
                for i in range(3):
                    try:
                        scope, name = rucioAPI.extract_scope(datasetName)
                        rc.get_did(scope, name)
                        break
                    except DataIdentifierNotFound:
                        print('resurrect {0}'.format(datasetName))
                        rc.resurrect([{'scope': scope, 'name': name}])
                        try:
                            rc.set_metadata(scope, name, 'lifetime', None)
                        except Exception:
                            pass
        if not options.reproduceParent:
            msgStr = Client.retryTask(jediTaskID, noChildRetry=options.noChildRetry)[-1][-1]
        else:
            msgStr = Client.reloadInput(jediTaskID)[-1][-1]
        if log_stream:
            log_stream.info("Retried task with {}".format(msgStr))
            log_stream.info("Done")
        else:
            print("Retried task: done with {}".format(msgStr))
        return True, msgStr
    else:
        msgStr = 'failed'
        if log_stream:
            log_stream.error(msgStr)
        else:
            print(msgStr)
        return False, msgStr
Exemple #11
0
def main(argv=tuple(), tbuf=None, **kwargs):

    try:
        long
    except NameError:
        long = int

    tmpLog = LogWrapper(_logger, None)

    tmpLog.debug("===================== start =====================")

    # current minute
    currentMinute = datetime.datetime.utcnow().minute

    # instantiate TB
    if tbuf is None:
        from pandaserver.taskbuffer.TaskBuffer import taskBuffer
        taskBuffer.init(panda_config.dbhost,
                        panda_config.dbpasswd,
                        nDBConnection=1)
    else:
        taskBuffer = tbuf

    # instantiate sitemapper
    aSiteMapper = SiteMapper(taskBuffer)

    # delete
    tmpLog.debug("Del session")
    status, retSel = taskBuffer.querySQLS(
        "SELECT MAX(PandaID) FROM ATLAS_PANDA.jobsDefined4", {})
    if retSel is not None:
        try:
            maxID = retSel[0][0]
            tmpLog.debug("maxID : %s" % maxID)
            if maxID is not None:
                varMap = {}
                varMap[':maxID'] = maxID
                varMap[':jobStatus1'] = 'activated'
                varMap[':jobStatus2'] = 'waiting'
                varMap[':jobStatus3'] = 'failed'
                varMap[':jobStatus4'] = 'cancelled'
                status, retDel = taskBuffer.querySQLS(
                    "DELETE FROM ATLAS_PANDA.jobsDefined4 WHERE PandaID<:maxID AND jobStatus IN (:jobStatus1,:jobStatus2,:jobStatus3,:jobStatus4)",
                    varMap)
        except Exception:
            pass

    # count # of getJob/updateJob in dispatcher's log
    try:
        # don't update when logrotate is running
        timeNow = datetime.datetime.utcnow()
        logRotateTime = timeNow.replace(hour=3,
                                        minute=2,
                                        second=0,
                                        microsecond=0)
        if (timeNow > logRotateTime and (timeNow-logRotateTime) < datetime.timedelta(minutes=5)) or \
               (logRotateTime > timeNow and (logRotateTime-timeNow) < datetime.timedelta(minutes=5)):
            tmpLog.debug("skip pilotCounts session for logrotate")
        else:
            # log filename
            dispLogName = '%s/panda-PilotRequests.log' % panda_config.logdir
            # time limit
            timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
                hours=3)
            timeLimitS = datetime.datetime.utcnow() - datetime.timedelta(
                hours=1)
            # check if tgz is required
            com = 'head -1 %s' % dispLogName
            lostat, loout = commands_get_status_output(com)
            useLogTgz = True
            if lostat == 0:
                match = re.search('^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}',
                                  loout)
                if match is not None:
                    startTime = datetime.datetime(*time.strptime(
                        match.group(0), '%Y-%m-%d %H:%M:%S')[:6])
                    # current log contains all info
                    if startTime < timeLimit:
                        useLogTgz = False
            # log files
            dispLogNameList = [dispLogName]
            if useLogTgz:
                today = datetime.date.today()
                dispLogNameList.append('{0}-{1}.gz'.format(
                    dispLogName, today.strftime('%Y%m%d')))
            # delete tmp
            commands_get_status_output('rm -f %s.tmp-*' % dispLogName)
            # tmp name
            tmpLogName = '%s.tmp-%s' % (dispLogName, datetime.datetime.utcnow(
            ).strftime('%Y-%m-%d-%H-%M-%S'))
            # loop over all files
            pilotCounts = {}
            pilotCountsS = {}
            for tmpDispLogName in dispLogNameList:
                # expand or copy
                if tmpDispLogName.endswith('.gz'):
                    com = 'gunzip -c %s > %s' % (tmpDispLogName, tmpLogName)
                else:
                    com = 'cp %s %s' % (tmpDispLogName, tmpLogName)
                lostat, loout = commands_get_status_output(com)
                if lostat != 0:
                    errMsg = 'failed to expand/copy %s with : %s' % (
                        tmpDispLogName, loout)
                    raise RuntimeError(errMsg)
                # search string
                sStr = '^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*'
                sStr += 'method=(.+),site=(.+),node=(.+),type=(.+)'
                # read
                logFH = open(tmpLogName)
                for line in logFH:
                    # check format
                    match = re.search(sStr, line)
                    if match is not None:
                        # check timerange
                        timeStamp = datetime.datetime(*time.strptime(
                            match.group(1), '%Y-%m-%d %H:%M:%S')[:6])
                        if timeStamp < timeLimit:
                            continue
                        tmpMethod = match.group(2)
                        tmpSite = match.group(3)
                        tmpNode = match.group(4)
                        tmpType = match.group(5)

                        # protection against corrupted entries from pilot,
                        # e.g. pilot reading site json from cvmfs while it was being updated
                        if tmpSite not in aSiteMapper.siteSpecList:
                            continue
                        # sum
                        pilotCounts.setdefault(tmpSite, {})
                        pilotCounts[tmpSite].setdefault(tmpMethod, {})
                        pilotCounts[tmpSite][tmpMethod].setdefault(tmpNode, 0)
                        pilotCounts[tmpSite][tmpMethod][tmpNode] += 1
                        # short
                        if timeStamp > timeLimitS:
                            if tmpSite not in pilotCountsS:
                                pilotCountsS[tmpSite] = dict()
                            if tmpMethod not in pilotCountsS[tmpSite]:
                                pilotCountsS[tmpSite][tmpMethod] = dict()
                            if tmpNode not in pilotCountsS[tmpSite][tmpMethod]:
                                pilotCountsS[tmpSite][tmpMethod][tmpNode] = 0
                            pilotCountsS[tmpSite][tmpMethod][tmpNode] += 1
                # close
                logFH.close()
            # delete tmp
            commands_get_status_output('rm %s' % tmpLogName)
            # update
            hostID = panda_config.pserverhost.split('.')[0]
            tmpLog.debug("pilotCounts session")
            retPC = taskBuffer.updateSiteData(hostID, pilotCounts, interval=3)
            tmpLog.debug(retPC)
            retPC = taskBuffer.updateSiteData(hostID, pilotCountsS, interval=1)
            tmpLog.debug(retPC)
    except Exception:
        errType, errValue = sys.exc_info()[:2]
        tmpLog.error("updateJob/getJob : %s %s" % (errType, errValue))

    # nRunning
    tmpLog.debug("nRunning session")
    try:
        if (currentMinute / panda_config.nrun_interval
            ) % panda_config.nrun_hosts == panda_config.nrun_snum:
            retNR = taskBuffer.insertnRunningInSiteData()
            tmpLog.debug(retNR)
    except Exception:
        errType, errValue = sys.exc_info()[:2]
        tmpLog.error("nRunning : %s %s" % (errType, errValue))

    # session for co-jumbo jobs
    tmpLog.debug("co-jumbo session")
    try:
        ret = taskBuffer.getCoJumboJobsToBeFinished(30, 0, 1000)
        if ret is None:
            tmpLog.debug("failed to get co-jumbo jobs to finish")
        else:
            coJumboA, coJumboD, coJumboW, coJumboTokill = ret
            tmpLog.debug("finish {0} co-jumbo jobs in Active".format(
                len(coJumboA)))
            if len(coJumboA) > 0:
                jobSpecs = taskBuffer.peekJobs(coJumboA,
                                               fromDefined=False,
                                               fromActive=True,
                                               fromArchived=False,
                                               fromWaiting=False)
                for jobSpec in jobSpecs:
                    fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(
                        jobSpec)
                    if not fileCheckInJEDI:
                        jobSpec.jobStatus = 'closed'
                        jobSpec.jobSubStatus = 'cojumbo_wrong'
                        jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                    taskBuffer.archiveJobs([jobSpec], False)
            tmpLog.debug("finish {0} co-jumbo jobs in Defined".format(
                len(coJumboD)))
            if len(coJumboD) > 0:
                jobSpecs = taskBuffer.peekJobs(coJumboD,
                                               fromDefined=True,
                                               fromActive=False,
                                               fromArchived=False,
                                               fromWaiting=False)
                for jobSpec in jobSpecs:
                    fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(
                        jobSpec)
                    if not fileCheckInJEDI:
                        jobSpec.jobStatus = 'closed'
                        jobSpec.jobSubStatus = 'cojumbo_wrong'
                        jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                    taskBuffer.archiveJobs([jobSpec], True)
            tmpLog.debug("finish {0} co-jumbo jobs in Waiting".format(
                len(coJumboW)))
            if len(coJumboW) > 0:
                jobSpecs = taskBuffer.peekJobs(coJumboW,
                                               fromDefined=False,
                                               fromActive=False,
                                               fromArchived=False,
                                               fromWaiting=True)
                for jobSpec in jobSpecs:
                    fileCheckInJEDI = taskBuffer.checkInputFileStatusInJEDI(
                        jobSpec)
                    if not fileCheckInJEDI:
                        jobSpec.jobStatus = 'closed'
                        jobSpec.jobSubStatus = 'cojumbo_wrong'
                        jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                    taskBuffer.archiveJobs([jobSpec], False, True)
            tmpLog.debug("kill {0} co-jumbo jobs in Waiting".format(
                len(coJumboTokill)))
            if len(coJumboTokill) > 0:
                jediJobs = list(coJumboTokill)
                nJob = 100
                iJob = 0
                while iJob < len(jediJobs):
                    tmpLog.debug(' killing %s' %
                                 str(jediJobs[iJob:iJob + nJob]))
                    Client.killJobs(jediJobs[iJob:iJob + nJob],
                                    51,
                                    keepUnmerged=True)
                    iJob += nJob
    except Exception:
        errStr = traceback.format_exc()
        tmpLog.error(errStr)

    tmpLog.debug("Fork session")

    # thread for fork
    class ForkThr(threading.Thread):
        def __init__(self, fileName):
            threading.Thread.__init__(self)
            self.fileName = fileName

        def run(self):
            if 'VIRTUAL_ENV' in os.environ:
                prefix = os.environ['VIRTUAL_ENV']
            else:
                prefix = ''
            setupStr = 'source {0}/etc/sysconfig/panda_server; '.format(prefix)
            runStr = '%s/python -Wignore ' % panda_config.native_python
            runStr += panda_config.pandaPython_dir + '/dataservice/forkSetupper.py -i '
            runStr += self.fileName
            if self.fileName.split('/')[-1].startswith('set.NULL.'):
                runStr += ' -t'
            comStr = setupStr + runStr
            tmpLog.debug(comStr)
            commands_get_status_output(comStr)

    # get set.* files
    filePatt = panda_config.logdir + '/' + 'set.*'
    fileList = glob.glob(filePatt)

    # the max number of threads
    maxThr = 10
    nThr = 0

    # loop over all files
    forkThrList = []
    timeNow = datetime.datetime.utcnow()
    for tmpName in fileList:
        if not os.path.exists(tmpName):
            continue
        try:
            # takes care of only recent files
            modTime = datetime.datetime(
                *(time.gmtime(os.path.getmtime(tmpName))[:7]))
            if (timeNow - modTime) > datetime.timedelta(minutes=1) and \
                    (timeNow - modTime) < datetime.timedelta(hours=1):
                cSt, cOut = commands_get_status_output(
                    'ps aux | grep fork | grep -v PYTH')
                # if no process is running for the file
                if cSt == 0 and tmpName not in cOut:
                    nThr += 1
                    thr = ForkThr(tmpName)
                    thr.start()
                    forkThrList.append(thr)
                    if nThr > maxThr:
                        break
        except Exception:
            errType, errValue = sys.exc_info()[:2]
            tmpLog.error("%s %s" % (errType, errValue))

    # join fork threads
    for thr in forkThrList:
        thr.join()

    # terminate TaskBuffer IF
    # taskBufferIF.terminate()

    tmpLog.debug("===================== end =====================")
 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue):
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug("start doBrokerage")
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug("vo={0} label={1} queue={2}".format(vo, prodSourceLabel, workQueue.queue_name))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = "managed"
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue, jobSpec.currentPriority
                 )
                 if tmpRW == None:
                     tmpLog.error("failed to calculate RW with prio={0}".format(jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error("failed to calculate RW for jediTaskID={0}".format(jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # get fullRWs
     fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(vo, prodSourceLabel, None, None)
     if fullRWs == None:
         tmpLog.error("failed to calculate full RW")
         return retTmpError
     # set metadata
     for jobSpec in jobSpecList:
         rwValues = allRwMap[jobSpec.currentPriority]
         jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
             jobSpec.metadata,
             str(rwValues),
             str(expRWs),
             str(prioMap),
             str(fullRWs),
             str(tt2Map),
         )
     tmpLog.debug("run task assigner for {0} tasks".format(len(jobSpecList)))
     nBunchTask = 0
     while nBunchTask < len(jobSpecList):
         # get a bunch
         jobsBunch = jobSpecList[nBunchTask : nBunchTask + maxBunchTask]
         strIDs = "jediTaskID="
         for tmpJobSpec in jobsBunch:
             strIDs += "{0},".format(tmpJobSpec.taskID)
         strIDs = strIDs[:-1]
         tmpLog.debug(strIDs)
         # increment index
         nBunchTask += maxBunchTask
         # run task brokerge
         stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
         tmpLog.debug("{0}:{1}".format(stS, str(outSs)))
     # return
     tmpLog.debug("done")
     return self.SC_SUCCEEDED
Exemple #13
0
    sys.exit(0)
if s:
    if options.resurrectDS:
        sd, so = taskBuffer.querySQLS(
            'SELECT datasetName FROM ATLAS_PANDA.JEDI_Datasets WHERE jediTaskID=:id AND type IN (:t1,:t2)',
            {
                ':id': jediTaskID,
                ':t1': 'output',
                ':t2': 'log'
            })
        rc = RucioClient()
        for datasetName, in so:
            for i in range(3):
                try:
                    scope, name = rucioAPI.extract_scope(datasetName)
                    rc.get_did(scope, name)
                    break
                except DataIdentifierNotFound:
                    print('resurrect {0}'.format(datasetName))
                    rc.resurrect([{'scope': scope, 'name': name}])
                    try:
                        rc.set_metadata(scope, name, 'lifetime', None)
                    except Exception:
                        pass
    print(
        Client.retryTask(jediTaskID,
                         noChildRetry=options.noChildRetry)[-1][-1])
    print('done for jediTaskID={0}'.format(jediTaskID))
else:
    print('failed')
Exemple #14
0
                    jobSpec)
                if not fileCheckInJEDI:
                    jobSpec.jobStatus = 'closed'
                    jobSpec.jobSubStatus = 'cojumbo_wrong'
                    jobSpec.taskBufferErrorCode = pandaserver.taskbuffer.ErrorCode.EC_EventServiceInconsistentIn
                taskBuffer.archiveJobs([jobSpec], False, True)
        tmpLog.debug("kill {0} co-jumbo jobs in Waiting".format(
            len(coJumboTokill)))
        if len(coJumboTokill) > 0:
            jediJobs = list(coJumboTokill)
            nJob = 100
            iJob = 0
            while iJob < len(jediJobs):
                tmpLog.debug(' killing %s' % str(jediJobs[iJob:iJob + nJob]))
                Client.killJobs(jediJobs[iJob:iJob + nJob],
                                51,
                                keepUnmerged=True)
                iJob += nJob
except Exception:
    errStr = traceback.format_exc()
    tmpLog.error(errStr)

tmpLog.debug("Fork session")


# thread for fork
class ForkThr(threading.Thread):
    def __init__(self, fileName):
        threading.Thread.__init__(self)
        self.fileName = fileName
Exemple #15
0
import optparse
import pandaserver.userinterface.Client as Client

optP = optparse.OptionParser(conflict_handler="resolve")
options,args = optP.parse_args()

jediTaskID = args[0]

s,o = Client.finishTask(jediTaskID)
print(o)
Exemple #16
0
proxyS = DBProxy()
proxyS.connect(panda_config.dbhost,panda_config.dbpasswd,panda_config.dbuser,panda_config.dbname)

jobs = []

varMap = {}
varMap[':prodSourceLabel']  = 'managed'
varMap[':taskID']   = args[0]
varMap[':pandaIDl'] = args[1]
varMap[':pandaIDu'] = args[2]
sql = "SELECT PandaID FROM %s WHERE prodSourceLabel=:prodSourceLabel AND taskID=:taskID AND PandaID BETWEEN :pandaIDl AND :pandaIDu ORDER BY PandaID"
for table in ['ATLAS_PANDA.jobsActive4','ATLAS_PANDA.jobsWaiting4','ATLAS_PANDA.jobsDefined4']:
    status,res = proxyS.querySQLS(sql % table,varMap)
    if res is not None:
        for id, in res:
            if not id in jobs:
                jobs.append(id)

print('The number of jobs to be killed : %s' % len(jobs))
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print('kill %s' % str(jobs[iJob:iJob+nJob]))
        if options.forceKill:
            Client.killJobs(jobs[iJob:iJob+nJob],9,useMailAsID=useMailAsIDV)
        else:
            Client.killJobs(jobs[iJob:iJob+nJob],useMailAsID=useMailAsIDV)
        iJob += nJob
        time.sleep(1)
Exemple #17
0
job.destinationDBlock = datasetName
job.destinationSE = 'local'
job.currentPriority = 1000
job.prodSourceLabel = 'panda'
job.jobParameters = ' --lsstJobParams="%s" ' % lsstJobParams
if prodUserName is not None:
    job.prodUserName = prodUserName
else:
    job.prodUserName = prodUserNameDefault
if PIPELINE_PROCESSINSTANCE is not None:
    job.taskID = PIPELINE_PROCESSINSTANCE
if PIPELINE_EXECUTIONNUMBER is not None:
    job.attemptNr = PIPELINE_EXECUTIONNUMBER
if PIPELINE_TASK is not None:
    job.processingType = PIPELINE_TASK
job.computingSite = site
job.VO = "lsst"

fileOL = FileSpec()
fileOL.lfn = "%s.job.log.tgz" % job.jobName
fileOL.destinationDBlock = job.destinationDBlock
fileOL.destinationSE = job.destinationSE
fileOL.dataset = job.destinationDBlock
fileOL.type = 'log'
job.addFile(fileOL)

s, o = Client.submitJobs([job], srvID=aSrvID)
print(s)
for x in o:
    print("PandaID=%s" % x[0])
Exemple #18
0
    sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsDefined4 "
else:
    sql = "SELECT PandaID,lockedby FROM ATLAS_PANDA.jobsActive4 "
sql += "WHERE jobStatus=:jobStatus AND computingSite=:computingSite AND modificationTime<:modificationTime AND prodSourceLabel=:prodSourceLabel ORDER BY PandaID"
status, res = proxyS.querySQLS(sql, varMap)

print("got {0} jobs".format(len(res)))

jobs = []
jediJobs = []
if res is not None:
    for (id, lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print('reassign  %s' % str(jobs[iJob:iJob + nJob]))
        Client.reassignJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print('kill JEDI jobs %s' % str(jediJobs[iJob:iJob + nJob]))
        Client.killJobs(jediJobs[iJob:iJob + nJob], 51)
        iJob += nJob
Exemple #19
0
import pandaserver.userinterface.Client as Client
from pandaserver.userinterface.Client import baseURLSSL

from pandaserver.taskbuffer.TaskBuffer import taskBuffer
from pandaserver.brokerage.SiteMapper import SiteMapper
from pandaserver.config import panda_config
from pandaserver.dataservice import DataServiceUtils
from pandasever.dataservice.DataServiceUtils import select_scope

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)
# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

id = sys.argv[1]
s, o = Client.getJobStatus([id])

if s != 0:
    print("failed to get job with:%s" % s)
    sys.exit(0)

job = o[0]

if job is None:
    print("got None")
    sys.exit(0)

xml = """<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<!-- ATLAS file meta-data catalog -->
<!DOCTYPE POOLFILECATALOG SYSTEM "InMemory">
<POOLFILECATALOG>
jediTaskID = int(options.tid)

if True:
    if options.resurrectDS:
        sd, so = taskBuffer.querySQLS(
            'SELECT datasetName FROM ATLAS_PANDA.JEDI_Datasets WHERE jediTaskID=:id AND type IN (:t1,:t2)',
            {
                ':id': jediTaskID,
                ':t1': 'output',
                ':t2': 'log'
            })
        rc = RucioClient()
        for datasetName, in so:
            for i in range(3):
                try:
                    scope, name = rucioAPI.extract_scope(datasetName)
                    rc.get_did(scope, name)
                    break
                except DataIdentifierNotFound:
                    print('resurrect {0}'.format(datasetName))
                    rc.resurrect([{'scope': scope, 'name': name}])
                    try:
                        rc.set_metadata(scope, name, 'lifetime', None)
                    except:
                        pass
    print(Client.reloadInput(jediTaskID)[-1])
    print('done for jediTaskID={0}'.format(jediTaskID))
else:
    print('failed')
Exemple #21
0
if options.forceKill:
    codeV = 9
elif options.killUserJobs:
    codeV = 91
else:
    try:
        codeV = int(options.codeV)
    except Exception:
        pass
if options.killOwnProdJobs:
    useMailAsIDV = True

if len(args) == 1:
    Client.killJobs([args[0]],
                    code=codeV,
                    useMailAsID=useMailAsIDV,
                    keepUnmerged=options.keepUnmerged,
                    jobSubStatus=options.jobSubStatus)
else:
    startID = int(args[0])
    endID = int(args[1])
    if startID > endID:
        print('%d is less than %d' % (endID, startID))
        sys.exit(1)
    Client.killJobs(range(startID, endID + 1),
                    code=codeV,
                    useMailAsID=useMailAsIDV,
                    keepUnmerged=options.keepUnmerged,
                    jobSubStatus=options.jobSubStatus)
Exemple #22
0
# time limit
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(hours=1)

# instantiate DB proxies
proxyS = DBProxy()
proxyS.connect(panda_config.dbhost, panda_config.dbpasswd, panda_config.dbuser,
               panda_config.dbname)

while True:
    # get PandaIDs
    varMap = {}
    varMap[':modificationTime'] = timeLimit
    sql = "SELECT PandaID FROM ATLAS_PANDA.jobsWaiting4 WHERE modificationTime<:modificationTime ORDER BY PandaID"
    status, res = proxyS.querySQLS(sql, varMap)

    # escape
    if len(res) == 0:
        break
    # convert to list
    jobs = []
    for id, in res:
        jobs.append(id)
    # reassign
    nJob = 300
    iJob = 0
    while iJob < len(jobs):
        print('reassignJobs(%s)' % jobs[iJob:iJob + nJob])
        Client.reassignJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
        time.sleep(60)
Exemple #23
0
def putFile(req, file):
    if not Protocol.isSecure(req):
        return False
    if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']:
        return False
    _logger.debug("putFile : start %s %s" %
                  (req.subprocess_env['SSL_CLIENT_S_DN'], file.filename))
    # size check
    fullSizeLimit = 768 * 1024 * 1024
    if not file.filename.startswith('sources.'):
        noBuild = True
        sizeLimit = 100 * 1024 * 1024
    else:
        noBuild = False
        sizeLimit = fullSizeLimit
    # get file size
    contentLength = 0
    try:
        contentLength = long(req.headers_in["content-length"])
    except Exception:
        if "content-length" in req.headers_in:
            _logger.error("cannot get CL : %s" %
                          req.headers_in["content-length"])
        else:
            _logger.error("no CL")
    _logger.debug("size %s" % contentLength)
    if contentLength > sizeLimit:
        errStr = "ERROR : Upload failure. Exceeded size limit %s>%s." % (
            contentLength, sizeLimit)
        if noBuild:
            errStr += " Please submit the job without --noBuild/--libDS since those options impose a tighter size limit"
        else:
            errStr += " Please remove redundant files from your workarea"
        _logger.error(errStr)
        _logger.debug("putFile : end")
        return errStr
    try:
        fileFullPath = '%s/%s' % (panda_config.cache_dir,
                                  file.filename.split('/')[-1])
        # avoid overwriting
        if os.path.exists(fileFullPath):
            # touch
            os.utime(fileFullPath, None)
            # send error message
            errStr = "ERROR : Cannot overwrite file"
            _logger.debug('putFile : cannot overwrite file %s' % file.filename)
            _logger.debug("putFile : end")
            return errStr
        # write
        fo = open(fileFullPath, 'wb')
        fileContent = file.file.read()
        fo.write(fileContent)
        fo.close()
    except Exception:
        errStr = "ERROR : Cannot write file"
        _logger.error(errStr)
        _logger.debug("putFile : end")
        return errStr
    # checksum
    try:
        # decode Footer
        footer = fileContent[-8:]
        checkSum, isize = struct.unpack("II", footer)
        _logger.debug("CRC from gzip Footer %s" % checkSum)
    except Exception:
        # calculate on the fly
        """
        import zlib
        checkSum = zlib.adler32(fileContent) & 0xFFFFFFFF
        """
        # use None to avoid delay for now
        checkSum = None
        _logger.debug("CRC calculated %s" % checkSum)
    # file size
    fileSize = len(fileContent)
    # user name
    username = cleanUserID(req.subprocess_env['SSL_CLIENT_S_DN'])
    _logger.debug("putFile : written dn=%s file=%s size=%s crc=%s" % \
                  (username,file.filename,fileSize,checkSum))
    # put file info to DB
    statClient, outClient = Client.insertSandboxFileInfo(
        username, file.filename, fileSize, checkSum)
    if statClient != 0 or outClient.startswith("ERROR"):
        _logger.error("putFile : failed to put sandbox to DB with %s %s" %
                      (statClient, outClient))
        #_logger.debug("putFile : end")
        #return "ERROR : Cannot insert sandbox to DB"
    else:
        _logger.debug("putFile : inserted sandbox to DB with %s" % outClient)
    # store to cassandra
    if hasattr(panda_config,
               'cacheUseCassandra') and panda_config.cacheUseCassandra == True:
        try:
            # time-stamp
            timeNow = datetime.datetime.utcnow()
            creationTime = timeNow.strftime('%Y-%m-%d %H:%M:%S')
            # user name
            username = req.subprocess_env['SSL_CLIENT_S_DN']
            username = username.replace('/CN=proxy', '')
            username = username.replace('/CN=limited proxy', '')
            # file size
            fileSize = len(fileContent)
            # key
            fileKeyName = file.filename.split('/')[-1]
            sizeCheckSum = '%s:%s' % (fileSize, checkSum)
            # insert to cassandra
            import pycassa
            pool = pycassa.ConnectionPool(panda_config.cacheKeySpace)
            filefamily = pycassa.ColumnFamily(pool,
                                              panda_config.cacheFileTable)
            # avoid overwriting
            gotoNextCassa = True
            if filefamily.get_count(fileKeyName) > 0:
                # touch
                touchFlag = touchFileCassa(filefamily, fileKeyName, timeNow)
                if touchFlag:
                    gotoNextCassa = False
                    # send error message
                    errStr = "ERROR : Cannot overwrite file in Cassandra"
                    _logger.error(errStr)
                    if not panda_config.cacheIgnoreCassandraError:
                        _logger.debug("putFile : end")
                        return errStr
            # check uniqueness with size and checksum
            if gotoNextCassa:
                try:
                    uniqExp = pycassa.index.create_index_expression(
                        'uniqID', sizeCheckSum)
                    userExp = pycassa.index.create_index_expression(
                        'user', username)
                    tmpClause = pycassa.index.create_index_clause(
                        [uniqExp, userExp])
                    tmpResults = filefamily.get_indexed_slices(
                        tmpClause, columns=['creationTime'])
                    for oldFileKeyName, tmpDict in tmpResults:
                        _logger.debug('The same size and chksum %s found in old:%s and new:%s' % \
                                      (sizeCheckSum,oldFileKeyName,fileKeyName))
                        # touch
                        touchFlag = touchFileCassa(filefamily, oldFileKeyName,
                                                   timeNow)
                        if touchFlag:
                            # make alias
                            _logger.debug('Making alias %s->%s' %
                                          (fileKeyName, oldFileKeyName))
                            insertWithRetryCassa(
                                filefamily, fileKeyName, {
                                    'alias': oldFileKeyName,
                                    'creationTime': creationTime,
                                    'nSplit': 0,
                                },
                                'putFile : make alias for %s' % file.filename)
                            # set time
                            touchFileCassa(filefamily, fileKeyName, timeNow)
                            _logger.debug("putFile : end")
                            return True
                except Exception:
                    gotoNextCassa = False
                    errType, errValue = sys.exc_info()[:2]
                    errStr = "cannot make alias for %s due to %s %s" % (
                        fileKeyName, errType, errValue)
                    _logger.error(errStr)
                    if not panda_config.cacheIgnoreCassandraError:
                        _logger.debug("putFile : end")
                        return errStr
            # insert new record
            if gotoNextCassa:
                splitIdx = 0
                splitSize = 5 * 1024 * 1024
                nSplit, tmpMod = divmod(len(fileContent), splitSize)
                if tmpMod != 0:
                    nSplit += 1
                _logger.debug('Inserting %s with %s blocks' %
                              (fileKeyName, nSplit))
                for splitIdx in range(nSplit):
                    # split to small chunks since cassandra is not good at large files
                    tmpFileContent = fileContent[splitSize *
                                                 splitIdx:splitSize *
                                                 (splitIdx + 1)]
                    tmpFileKeyName = fileKeyName
                    tmpAttMap = {
                        'file': tmpFileContent,
                        'user': username,
                        'creationTime': creationTime,
                    }
                    if splitIdx == 0:
                        tmpAttMap['size'] = fileSize
                        tmpAttMap['nSplit'] = nSplit
                        tmpAttMap['uniqID'] = sizeCheckSum
                        tmpAttMap['checkSum'] = str(checkSum)
                    else:
                        tmpFileKeyName += '_%s' % splitIdx
                        tmpAttMap['size'] = 0
                        tmpAttMap['nSplit'] = 0
                    # insert with retry
                    insertWithRetryCassa(filefamily, tmpFileKeyName, tmpAttMap,
                                         'putFile : insert %s' % file.filename)
                # set time
                touchFileCassa(filefamily, fileKeyName, timeNow)
        except Exception:
            errType, errValue = sys.exc_info()[:2]
            errStr = "cannot put %s into Cassandra due to %s %s" % (
                fileKeyName, errType, errValue)
            _logger.error(errStr)
            # send error message
            errStr = "ERROR : " + errStr
            if not panda_config.cacheIgnoreCassandraError:
                _logger.debug("putFile : end")
                return errStr
    _logger.debug("putFile : %s end" % file.filename)
    return True
Exemple #24
0
import sys

import pandaserver.userinterface.Client as Client

if len(sys.argv) == 2:
    jobDefIDs = [sys.argv[1]]
else:
    startID = int(sys.argv[1])
    endID   = int(sys.argv[2])
    if startID > endID:
        print('%d is less than %d' % (endID,startID))
        sys.exit(1)
    jobDefIDs = range(startID,endID+1)

# quesry PandaID
status, ids = Client.queryPandaIDs(jobDefIDs)

if status != 0:
    sys.exit(0)

# remove None
while True:
    if None not in ids:
        break
    ids.remove(None)

# kill
if len(ids) != 0:
    Client.killJobs(ids)
 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue):
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug('vo={0} label={1} queue={2}'.format(
         vo, prodSourceLabel, workQueue.queue_name))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = 'managed'
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (
                 taskSpec.getOutDiskSize() +
                 taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(
                         datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     jobSpec.currentPriority)
                 if tmpRW == None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(
                 jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error(
                     'failed to calculate RW for jediTaskID={0}'.format(
                         jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # get fullRWs
     fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(
         vo, prodSourceLabel, None, None)
     if fullRWs == None:
         tmpLog.error('failed to calculate full RW')
         return retTmpError
     # set metadata
     for jobSpec in jobSpecList:
         rwValues = allRwMap[jobSpec.currentPriority]
         jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
             jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap),
             str(fullRWs), str(tt2Map))
     tmpLog.debug('run task assigner for {0} tasks'.format(
         len(jobSpecList)))
     nBunchTask = 0
     while nBunchTask < len(jobSpecList):
         # get a bunch
         jobsBunch = jobSpecList[nBunchTask:nBunchTask + maxBunchTask]
         strIDs = 'jediTaskID='
         for tmpJobSpec in jobsBunch:
             strIDs += '{0},'.format(tmpJobSpec.taskID)
         strIDs = strIDs[:-1]
         tmpLog.debug(strIDs)
         # increment index
         nBunchTask += maxBunchTask
         # run task brokerge
         stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
         tmpLog.debug('{0}:{1}'.format(stS, str(outSs)))
     # return
     tmpLog.debug('done')
     return self.SC_SUCCEEDED
Exemple #26
0
import optparse

import pandaserver.userinterface.Client as Client

aSrvID = None

optP = optparse.OptionParser(conflict_handler="resolve")
options,args = optP.parse_args()

jediTaskID = args[0]

s,o = Client.killTask(jediTaskID)
print(o)
Exemple #27
0
status, res = proxyS.querySQLS(sql, varMap)
if res is not None:
    for (id, lockedby) in res:
        if lockedby == 'jedi':
            jediJobs.append(id)
        else:
            jobs.append(id)

# reassign
jobs.sort()
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print('reassign  %s' % str(jobs[iJob:iJob + nJob]))
        Client.reassignJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
        time.sleep(10)

if len(jediJobs) != 0:
    nJob = 100
    iJob = 0
    while iJob < len(jediJobs):
        print('kill JEDI jobs %s' % str(jediJobs[iJob:iJob + nJob]))
        Client.killJobs(jediJobs[iJob:iJob + nJob],
                        codeV,
                        keepUnmerged=options.keepUnmerged)
        iJob += nJob

print('\nreassigned {0} jobs'.format(len(jobs + jediJobs)))
Exemple #28
0
srcSQL += ')'

jobs = []
tables = [
    'ATLAS_PANDA.jobsActive4', 'ATLAS_PANDA.jobsWaiting4',
    'ATLAS_PANDA.jobsDefined4'
]
for table in tables:
    sql = "SELECT PandaID FROM %s WHERE prodUserName=:prodUserName AND prodSourceLabel IN %s " % (
        table, srcSQL)
    if options.jobID is not None:
        sql += "AND jobDefinitionID=:jobDefinitionID "
    if not options.jobsetID in (None, 'all'):
        sql += "AND jobsetID=:jobsetID "
    sql += "ORDER BY PandaID "
    status, res = proxyS.querySQLS(sql, varMap)
    if res is not None:
        for id, in res:
            if not id in jobs:
                jobs.append(id)
if len(jobs):
    iJob = 0
    nJob = 1000
    while iJob < len(jobs):
        subJobs = jobs[iJob:iJob + nJob]
        print("kill %s %s/%s" % (str(subJobs), iJob, len(jobs)))
        Client.killJobs(subJobs, code=9)
        iJob += nJob
else:
    print("no job was killed")
Exemple #29
0
    job.cloud = 'US'
    job.cmtConfig = 'i686-slc4-gcc34-opt'

    file = FileSpec()
    file.lfn = "%s.evgen.pool.root" % job.jobName
    file.destinationDBlock = job.destinationDBlock
    file.destinationSE = job.destinationSE
    file.dataset = job.destinationDBlock
    file.destinationDBlockToken = 'ATLASDATADISK'
    file.type = 'output'
    job.addFile(file)

    fileOL = FileSpec()
    fileOL.lfn = "%s.job.log.tgz" % job.jobName
    fileOL.destinationDBlock = job.destinationDBlock
    fileOL.destinationSE = job.destinationSE
    fileOL.dataset = job.destinationDBlock
    fileOL.destinationDBlockToken = 'ATLASDATADISK'
    fileOL.type = 'log'
    job.addFile(fileOL)

    job.jobParameters = "5144 1 5000 1 CSC.005144.PythiaZee.py %s NONE NONE NONE" % file.lfn
    jobList.append(job)

for i in range(1):
    s, o = Client.submitJobs(jobList)
    print("---------------------")
    print(s)
    for x in o:
        print("PandaID=%s" % x[0])
Exemple #30
0
 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue,
                 resource_name):
     # list with a lock
     inputListWorld = ListWithLock([])
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug(
         'vo={0} label={1} queue={2} resource_name={3} nTasks={4}'.format(
             vo, prodSourceLabel, workQueue.queue_name, resource_name,
             len(inputList)))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # collect tasks for WORLD
             if taskSpec.useWorldCloud():
                 inputListWorld.append((taskSpec, inputChunk))
                 continue
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = 'managed'
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (
                 taskSpec.getOutDiskSize() +
                 taskSpec.getWorkDiskSize()) // 1024 // 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(
                         datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock is False and prodDBlock is not None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if jobSpec.currentPriority not in allRwMap:
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     jobSpec.currentPriority)
                 if tmpRW is None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(
                 jobSpec.jediTaskID)
             if expRW is None:
                 tmpLog.error(
                     'failed to calculate RW for jediTaskID={0}'.format(
                         jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # for old clouds
     if jobSpecList != []:
         # get fullRWs
         fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(
             vo, prodSourceLabel, None, None)
         if fullRWs is None:
             tmpLog.error('failed to calculate full RW')
             return retTmpError
         # set metadata
         for jobSpec in jobSpecList:
             rwValues = allRwMap[jobSpec.currentPriority]
             jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
                 jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap),
                 str(fullRWs), str(tt2Map))
         tmpLog.debug('run task assigner for {0} tasks'.format(
             len(jobSpecList)))
         nBunchTask = 0
         while nBunchTask < len(jobSpecList):
             # get a bunch
             jobsBunch = jobSpecList[nBunchTask:nBunchTask + maxBunchTask]
             strIDs = 'jediTaskID='
             for tmpJobSpec in jobsBunch:
                 strIDs += '{0},'.format(tmpJobSpec.taskID)
             strIDs = strIDs[:-1]
             tmpLog.debug(strIDs)
             # increment index
             nBunchTask += maxBunchTask
             # run task brokerge
             stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
             tmpLog.debug('{0}:{1}'.format(stS, str(outSs)))
     # for WORLD
     if len(inputListWorld) > 0:
         # thread pool
         threadPool = ThreadPool()
         # get full RW for WORLD
         fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(
             vo, prodSourceLabel, None, None)
         if fullRWs is None:
             tmpLog.error('failed to calculate full WORLD RW')
             return retTmpError
         # get RW per priority
         for taskSpec, inputChunk in inputListWorld:
             if taskSpec.currentPriority not in allRwMap:
                 tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     taskSpec.currentPriority)
                 if tmpRW is None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             taskSpec.currentPriority))
                     return retTmpError
                 allRwMap[taskSpec.currentPriority] = tmpRW
         # live counter for RWs
         liveCounter = MapWithLock(allRwMap)
         # make workers
         ddmIF = self.ddmIF.getInterface(vo)
         for iWorker in range(4):
             thr = AtlasProdTaskBrokerThread(inputListWorld, threadPool,
                                             self.taskBufferIF, ddmIF,
                                             fullRWs, liveCounter,
                                             workQueue)
             thr.start()
         threadPool.join(60 * 10)
     # return
     tmpLog.debug('doBrokerage done')
     return self.SC_SUCCEEDED
Exemple #31
0
                const=True,
                dest='modeOn',
                default=False,
                help='turn the debug mode on')
optP.add_option('--off',
                action='store_const',
                const=True,
                dest='modeOff',
                default=False,
                help='turn the debug mode off')
options, args = optP.parse_args()

if (options.modeOn and options.modeOff) or (not options.modeOn
                                            and not options.modeOff):
    print("ERROR: please set --on or --off")
    sys.exit(1)

if options.modeOn:
    s, o = Client.setDebugMode(args[0], True)
else:
    s, o = Client.setDebugMode(args[0], False)

if o == 'Succeeded':
    print(o)
else:
    print("ERROR:", o)
if s != 0:
    print("ERROR: communication failure to the panda server")
    sys.exit(1)
sys.exit(0)
Exemple #32
0
def putFile(req, file):
    tmpLog = LogWrapper(_logger, 'putFile-{}'.format(datetime.datetime.utcnow().isoformat('/')))
    if not Protocol.isSecure(req):
        tmpLog.error('No SSL_CLIENT_S_DN')
        return False
    if '/CN=limited proxy' in req.subprocess_env['SSL_CLIENT_S_DN']:
        return False
    # user name
    username = CoreUtils.clean_user_id(req.subprocess_env['SSL_CLIENT_S_DN'])
    tmpLog.debug("start %s %s" % (username, file.filename))
    # size check
    fullSizeLimit = 768*1024*1024
    if not file.filename.startswith('sources.'):
        noBuild = True
        sizeLimit = 100*1024*1024
    else:
        noBuild = False
        sizeLimit = fullSizeLimit
    # get file size
    contentLength = 0
    try:
        contentLength = long(req.headers_in["content-length"])
    except Exception:
        if "content-length" in req.headers_in:
            tmpLog.error("cannot get CL : %s" % req.headers_in["content-length"])
        else:
            tmpLog.error("no CL")
    tmpLog.debug("size %s" % contentLength)
    if contentLength > sizeLimit:
        errStr = "ERROR : Upload failure. Exceeded size limit %s>%s." % (contentLength,sizeLimit)
        if noBuild:
            errStr += " Please submit the job without --noBuild/--libDS since those options impose a tighter size limit"
        else:
            errStr += " Please remove redundant files from your workarea"
        tmpLog.error(errStr)
        tmpLog.debug("end")
        return errStr
    try:
        fileName = file.filename.split('/')[-1]
        fileFullPath = '%s/%s' % (panda_config.cache_dir, fileName)

        # avoid overwriting
        if os.path.exists(fileFullPath):
            # touch
            os.utime(fileFullPath,None)
            # send error message
            errStr = "ERROR : Cannot overwrite file"
            tmpLog.debug('cannot overwrite file %s' % fileName)
            tmpLog.debug("end")
            return errStr
        # write
        fo = open(fileFullPath,'wb')
        fileContent = file.file.read()
        if hasattr(panda_config, 'compress_file_names') and \
                [True for patt in panda_config.compress_file_names.split(',') if re.search(patt, fileName) is not None]:
            fileContent = gzip.compress(fileContent)
        fo.write(fileContent)
        fo.close()
    except Exception:
        errStr = "ERROR : Cannot write file"
        tmpLog.error(errStr)
        tmpLog.debug("end")
        return errStr
    # checksum
    try:
        # decode Footer
        footer = fileContent[-8:]
        checkSum,isize = struct.unpack("II",footer)
        tmpLog.debug("CRC from gzip Footer %s" % checkSum)
    except Exception:
        # calculate on the fly
        """
        import zlib
        checkSum = zlib.adler32(fileContent) & 0xFFFFFFFF
        """
        # use None to avoid delay for now
        checkSum = None
        tmpLog.debug("CRC calculated %s" % checkSum)
    # file size
    fileSize = len(fileContent)
    tmpLog.debug("written dn=%s file=%s size=%s crc=%s" % \
                  (username, fileFullPath, fileSize, checkSum))
    # put file info to DB
    if panda_config.record_sandbox_info:
        to_insert = True
        for patt in IGNORED_SUFFIX:
            if file.filename.endswith(patt):
                to_insert = False
                break
        if not to_insert:
            tmpLog.debug("skipped to insert to DB")
        else:
            statClient,outClient = Client.insertSandboxFileInfo(username,file.filename,
                                                                fileSize,checkSum)
            if statClient != 0 or outClient.startswith("ERROR"):
                tmpLog.error("failed to put sandbox to DB with %s %s" % (statClient,outClient))
                #_logger.debug("putFile : end")
                #return "ERROR : Cannot insert sandbox to DB"
            else:
                tmpLog.debug("inserted sandbox to DB with %s" % outClient)
    tmpLog.debug("end")
    return True
Exemple #33
0
            if id not in jobsMap[prio]:
                jobsMap[prio].append(id)

# order by PandaID and currentPriority
jobs = []
prioList = list(jobsMap)
prioList.sort()
for prio in prioList:
    # reverse order by PandaID to kill newer jobs
    ids = jobsMap[prio]
    ids.sort()
    ids.reverse()
    jobs += ids

if options.maxJobs is not None:
    jobs = jobs[:int(options.maxJobs)]

print('The number of jobs with priorities below %s : %s' %
      (args[0], len(jobs)))
if len(jobs):
    nJob = 100
    iJob = 0
    while iJob < len(jobs):
        print('kill %s' % str(jobs[iJob:iJob + nJob]))
        if options.forceKill:
            Client.killJobs(jobs[iJob:iJob + nJob], 9)
        else:
            Client.killJobs(jobs[iJob:iJob + nJob])
        iJob += nJob
        time.sleep(1)
Exemple #34
0
import argparse

from pandaserver.userinterface import Client

# parse option
parser = argparse.ArgumentParser()
parser.add_argument('--panda_id',
                    action='store',
                    dest='panda_id',
                    required=True,
                    help='PandaID of the job')
parser.add_argument(
    '--com_str',
    action='store',
    dest='com',
    required=True,
    help='The command string passed to the pilot. max 250 chars')

options = parser.parse_args()

s, o = Client.send_command_to_job(options.panda_id, options.com)
if s != 0:
    print(o)
else:
    if not o[0]:
        print('ERROR: {}'.format(o[1]))
    else:
        print('INFO: {}'.format(o[1]))
 def doBrokerage(self,inputList,vo,prodSourceLabel,workQueue):
     # list with a lock
     inputListWorld = ListWithLock([])
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal    = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug('vo={0} label={1} queue={2} nTasks={3}'.format(vo,prodSourceLabel,
                                                                 workQueue.queue_name,
                                                                 len(inputList)))
     # loop over all tasks
     allRwMap    = {}
     prioMap     = {}
     tt2Map      = {}
     expRWs      = {}
     jobSpecList = []
     for tmpJediTaskID,tmpInputList in inputList:
         for taskSpec,cloudName,inputChunk in tmpInputList:
             # collect tasks for WORLD
             if taskSpec.useWorldCloud():
                 inputListWorld.append((taskSpec,inputChunk))
                 continue
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID     = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel  = 'managed'
             jobSpec.processingType   = taskSpec.processingType
             jobSpec.workingGroup     = taskSpec.workingGroup
             jobSpec.metadata         = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority  = taskSpec.currentPriority
             jobSpec.maxDiskCount     = (taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID]  = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,workQueue,
                                                                    jobSpec.currentPriority) 
                 if tmpRW == None:
                     tmpLog.error('failed to calculate RW with prio={0}'.format(jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error('failed to calculate RW for jediTaskID={0}'.format(jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # for old clouds
     if jobSpecList != []:
         # get fullRWs
         fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,None,None)
         if fullRWs == None:
             tmpLog.error('failed to calculate full RW')
             return retTmpError
         # set metadata
         for jobSpec in jobSpecList:
             rwValues = allRwMap[jobSpec.currentPriority]
             jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (jobSpec.metadata,
                                                       str(rwValues),str(expRWs),
                                                       str(prioMap),str(fullRWs),
                                                       str(tt2Map))
         tmpLog.debug('run task assigner for {0} tasks'.format(len(jobSpecList)))
         nBunchTask = 0
         while nBunchTask < len(jobSpecList):
             # get a bunch
             jobsBunch = jobSpecList[nBunchTask:nBunchTask+maxBunchTask]
             strIDs = 'jediTaskID='
             for tmpJobSpec in jobsBunch:
                 strIDs += '{0},'.format(tmpJobSpec.taskID)
             strIDs = strIDs[:-1]
             tmpLog.debug(strIDs)
             # increment index
             nBunchTask += maxBunchTask
             # run task brokerge
             stS,outSs = PandaClient.runTaskAssignment(jobsBunch)
             tmpLog.debug('{0}:{1}'.format(stS,str(outSs)))
     # for WORLD
     if len(inputListWorld) > 0:
         # thread pool
         threadPool = ThreadPool()
         # get full RW for WORLD
         fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,None,None)
         if fullRWs == None:
             tmpLog.error('failed to calculate full WORLD RW')
             return retTmpError
         # get RW per priority
         for taskSpec,inputChunk in inputListWorld:
             if not taskSpec.currentPriority in allRwMap:
                 tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,workQueue,
                                                                         taskSpec.currentPriority)
                 if tmpRW == None:
                     tmpLog.error('failed to calculate RW with prio={0}'.format(taskSpec.currentPriority))
                     return retTmpError
                 allRwMap[taskSpec.currentPriority] = tmpRW
         # live counter for RWs
         liveCounter = MapWithLock(allRwMap)
         # make workers
         ddmIF = self.ddmIF.getInterface(vo)
         for iWorker in range(4):
             thr = AtlasProdTaskBrokerThread(inputListWorld,threadPool,
                                             self.taskBufferIF,ddmIF,
                                             fullRWs,liveCounter)
             thr.start()
         threadPool.join(60*10)
     # return
     tmpLog.debug('doBrokerage done')
     return self.SC_SUCCEEDED
Exemple #36
0
#!/usr/bin/python

from __future__ import print_function

from pprint import pprint
import json, sys
import requests
import cPickle as pickle
from datetime import datetime

from pandaserver.taskbuffer import JobSpec
from pandaserver.userinterface import Client

# this is an example
job = Client.getJobStatus([4242299116])

spec = job[1][0]
att = spec.valuesMap()

pprint(att)