def __init__(self, config): """ __init__ """ self.config = config #self.processingRealJob = False #configurable #self.heartBeatMsg = True self.cacheFiles = [] self.otherPilots = {} self.pilotId = None self.ttl = config["TTL"] #get hostname of the pilot job self.pilotHost = socket.getfqdn() self.pilotSite = None self.pilotCacheDir = None self.pilotDir = os.getcwd() #Address of the TaskQueue machine self.taskQAddress = config["tqaddress"] #start communication module self.commPlugin = Communication(False, self) #heartbeat thread self.heartbeat = HeartBeat(self.commPlugin, self) #self.heartbeat.start() msg = "PilotJob Started:\n" print(msg)
def __init__(self, config): """ __init__ """ self.config = config self.processingRealJob = False #configurable self.heartBeatMsg = True self.cacheFiles = [] #self.pilotid = config["pilotID"] #TODO: get it from TaskQueue self.pilotId = 12311 self.ttl = config["TTL"] #get hostname of the pilot job self.pilotHost = socket.getfqdn() #Address of the TaskQueue machine self.taskQAddress = config["tqaddress"] #start communication module self.commPlugin = Communication(False, self) self.commPlugin.start() msg = "PilotJob Started:\n" print(msg)
class PilotJob: """ _PilotJob_ a scripts that will start init n then get the job from taskqueue """ def __init__(self, config): """ __init__ """ self.config = config #self.processingRealJob = False #configurable #self.heartBeatMsg = True self.cacheFiles = [] self.otherPilots = {} self.pilotId = None self.ttl = config["TTL"] #get hostname of the pilot job self.pilotHost = socket.getfqdn() self.pilotSite = None self.pilotCacheDir = None self.pilotDir = os.getcwd() #Address of the TaskQueue machine self.taskQAddress = config["tqaddress"] #start communication module self.commPlugin = Communication(False, self) #heartbeat thread self.heartbeat = HeartBeat(self.commPlugin, self) #self.heartbeat.start() msg = "PilotJob Started:\n" print(msg) def getPilotSite(self): """ __getPilotSite__ get pilot site information using config file """ print 'PilotJob: getPilotSite()' if ( isVariableSet('CMS_PATH') ): configPath = os.path.join(os.environ.get('CMS_PATH'),\ 'SITECONF/local/JobConfig/site-local-config.xml') try: doc = xml.dom.minidom.parse(configPath) sites = doc.getElementsByTagName('site') if ( sites != None): site = sites[0] siteName = site.getAttribute('name') localStageOut = site.getElementsByTagName('local-stage-out')[0] node = localStageOut.getElementsByTagName('se-name')[0] self.pilotSite = node.getAttribute('value') return True else: print 'there is no site tag in site-local-config.xml' return False except: print 'getPilotSite():Problem %s:%s' \ (sys.exc_info()[0], sys.exc_info()[1]) return False else: print 'could not find CMS_PATH variable' return False ################################### #setPilotCacheDir ################################### def setPilotCacheDir( self ): """ __setPilotCacheDir__ creates cachearea for this pilot and set the pilot variable """ print 'setPilotCacheDir' #pilotDir = os.getcwd() pilotCacheDir = "%s/%s" % ( self.pilotDir, "CACHE_AREA") try: os.mkdir(pilotCacheDir) self.pilotCacheDir = pilotCacheDir print 'cacheDir %s' % self.pilotCacheDir return True except: print 'Error setPilotCacheDir %s,%s'% \ (sys.exc_info()[0], sys.exc_info()[1]) return False ################################################### #TODO: this function will try to recover data cache ################################################### def dataCacheRecovery(self): """ __dataCacheRecovery__ """ #logging.debug( 'dataCacheRecovery()' ) print ( 'dataCacheRecovery()' ) print (self.cacheFiles) ################################################# #TODO: this function will try to recover old jobs #which were not finished by pilot ################################################# def jobRecovery(self): """ __jobRecovery__ """ #logging.debug( 'jobRecovery()') print ( 'jobRecovery() %s '% self.pilotId) #otherwise return some job list return None ################################### # pilotEnvironmentCheck ################################### def pilotEnvironmentCheck ( self ): """ __pilotEnvironmentCheck__ """ envList = ["CMS_PATH", "VO_CMS_SW_DIR", "HOME"] notSetEnv = [] for env in envList: if ( not isVariableSet ( env ) ): notSetEnv.append(env) #if notSetEnv list is not empty if ( len( notSetEnv ) > 0 ): print "Some of env variables are not set" print "Env Not Found :%s" % notSetEnv return False return True ########################### # registerPilot ########################### def registerPilot(self): """ __registerPilot__ """ #add cmssw info with the registration request global CMSSW_INFO, CMS_ARCH #print CMSSW_INFO if ( CMSSW_INFO is None ): return False #print 'CMSSW_INFO %s' % CMSSW_INFO #print 'SCRAM %s' % CMS_ARCH #use plugin to register this pilot with PA print 'going for pilot registration' jsonResult = self.commPlugin.register(self.pilotCacheDir, self.pilotSite, \ self.ttl, CMS_ARCH, CMSSW_INFO) print jsonResult if ( jsonResult == 'NoData' or jsonResult == 'ConnectionError'): #exit and return if ( jsonResult['msg']['msgType'] == 'registerResponse' and \ jsonResult['msg']['payload']['registerStatus'] == 'RegisterDone'): #print jsonResult self.pilotId = jsonResult['msg']['payload']['pilotId'] self.otherPilots = jsonResult['msg']['payload']['otherPilots'] print 'pilot gets register successfully wid id %s' % self.pilotId ############################################# # realTaskExecutionScript ############################################# def realTaskExecutionScript(self, taskDir, sandboxUrl, specUrl, logDir, jobWF): """ __realTaskExecutionScript__ """ print "taskDir %s " % taskDir tarName = 'NoTarName.tar.gz' jobspecFile = 'NoSpecName' tarNameWOExt = 'NoTarName' #print tarName #print jobspecFile rind = sandboxUrl.rfind('/') if ( rind != -1 ): tarName = sandboxUrl[rind+1:] #TODO: extract it from the taskqueue information rind = tarName.rfind('-%s'%jobWF) if ( rind < 0): rind = tarName.rfind("-"); tarNameWOExt = tarName[0:rind] #tarNameWOExt = tarName jind = specUrl.rfind('/') if ( jind != -1 ): jobspecFile = specUrl[jind+1:] print ('tarName %s'% tarName ) print ('tarNameWOExt %s' % tarNameWOExt) print ('jobspecfile %s' %jobspecFile) fwReportFile = 'FrameworkJobReport.xml' scriptlines = '#!/usr/bin/bash \n' scriptlines += '#for the testing on 32bit machine \n' #scriptlines += 'source /afs/cern.ch/cms/sw/cmsset_default.sh \n' scriptlines += 'PILOT_DIR="%s" \n' % self.pilotDir scriptlines += 'myDate=`date "+%G%m%d_%k%M%S"` \n' scriptlines += 'JOB_SPEC_FILE="%s/%s" \n' % (taskDir, jobspecFile) #scriptlines += 'JOBDIR="$PILOT_DIR/%s/%s" \n'%(tarName #create task directory scriptlines += 'cd $PILOT_DIR \n' scriptlines += 'mkdir %s \n' % taskDir scriptlines += 'cd %s \n' % taskDir #generate the log collection area scriptlines += 'mkdir -p JobLogArea/%s \n' % logDir #download spec and sandbox scriptlines += 'wget %s \n' % specUrl scriptlines += 'wget %s \n' % sandboxUrl # untar the sandbox #scriptlines += 'tar -zxf $PILOT_DIR/%s/%s > /dev/null 2>&1 \n' % (taskDir, tarName) scriptlines += 'tar -zxf %s > /dev/null 2>&1 \n' % tarName scriptlines += 'cd %s \n' % tarNameWOExt scriptlines += 'ls -l $PILOT_DIR/$JOB_SPEC_FILE \n' scriptlines += 'echo "$PILOT_DIR/$JOB_SPEC_FILE" \n' scriptlines += 'echo "Running the actual job" \n' scriptlines += '( /usr/bin/time ./run.sh $PILOT_DIR/$JOB_SPEC_FILE 2>&1' scriptlines += ' ) | gzip > ./run.log.gz\n' #scriptlines += 'rfcp run.log.gz vocms13.cern.ch:/data/khawar/prototype/run.log.gz \n' scriptlines += ' cp run.log.gz ../JobLogArea/%s \n' % logDir #scriptlines += ' find . -name "FrameworkJobReport.xml"' #scriptlines += ' find . -name "*root"' #print scriptlines result = [scriptlines, tarName, tarNameWOExt] return result # save the script in the filename def save(self, filename, script ): """ __save__ save pilot job executable script """ try: handle = open(filename, 'w') handle.write(script) handle.close() except IOError, ioinst: print 'save():problem in saving : %s, %s' % \ (sys.exc_info()[0], sys.exc_info()[1]) print str(ioinst) raise ioinst
class PilotJob: """ _PilotJob_ a scripts that will start init n then get the job from taskqueue """ def __init__(self, config): """ __init__ """ self.config = config self.processingRealJob = False #configurable self.heartBeatMsg = True self.cacheFiles = [] #self.pilotid = config["pilotID"] #TODO: get it from TaskQueue self.pilotId = 12311 self.ttl = config["TTL"] #get hostname of the pilot job self.pilotHost = socket.getfqdn() #Address of the TaskQueue machine self.taskQAddress = config["tqaddress"] #start communication module self.commPlugin = Communication(False, self) self.commPlugin.start() msg = "PilotJob Started:\n" print(msg) ################################################### #TODO: this function will try to recover data cache ################################################### def dataCacheRecovery(self): """ __dataCacheRecovery__ """ #logging.debug( 'dataCacheRecovery()' ) print ( 'dataCacheRecovery()' ) print (self.cacheFiles) ################################################# #TODO: this function will try to recover old jobs #which were not finished by pilot ################################################# def jobRecovery(self): """ __jobRecovery__ """ #logging.debug( 'jobRecovery()') print ( 'jobRecovery() %s '% self.pilotId) #otherwise return some job list return None ################################### # pilotEnvironmentCheck ################################### def pilotEnvironmentCheck ( self ): """ __pilotEnvironmentCheck__ """ envList = ["VO_CMS_SW_DIR", "HOME"] notSetEnv = [] for env in envList: if ( not isVariableSet ( env ) ): notSetEnv.append(env) #if notSetEnv list is not empty if ( len( notSetEnv ) > 0 ): print "Some of env variables are not set" print "Env Not Found :%s" % notSetEnv return False return True ########################### # registerPilot ########################### def registerPilot(self): """ __registerPilot__ """ #use plugin to register this pilot with PA jsonResult = self.commPlugin.register() if ( jsonResult == 'NoData' or jsonResult == 'ConnectionError'): #exit and return if ( jsonResult['msg']['msgType'] == 'RegisterResponse' ): self.pilotId = jsonResult['msg']['payload']['pilotId'] print 'pilot gets register successfully wid id %s' % self.pilotId ############################################# # realTaskExecutionScript ############################################# def realTaskExecutionScript(self, taskDir, sandboxUrl, specUrl): """ __realTaskExecutionScript__ """ print "taskDir %s " % taskDir tarName = 'NoTarName.tar.gz' jobspecFile = 'NoSpecName' tarNameWOExt = 'NoTarName' print tarName print jobspecFile rind = sandboxUrl.rfind('/') if ( rind != -1 ): tarName = sandboxUrl[rind+1:] #TODO: extract it from the taskqueue information rind = tarName.find('-Processing.') tarNameWOExt = tarName[0:rind] jind = specUrl.rfind('/') if ( jind != -1 ): jobspecFile = specUrl[jind+1:] print ('tarName %s'% tarName ) print ('tarNameWOExt %s' % tarNameWOExt) print ('jobspecfile %s' %jobspecFile) fwReportFile = 'FrameworkJobReport.xml' scriptlines = '#!/usr/bin/bash \n' scriptlines += '#for the testing on 32bit machine \n' scriptlines += 'source /afs/cern.ch/cms/sw/cmsset_default.sh \n' scriptlines += 'PILOT_DIR=`pwd` \n' scriptlines += 'JOB_SPEC_FILE="%s" \n' % jobspecFile scriptlines += 'wget %s \n' % specUrl scriptlines += 'wget %s \n' % sandboxUrl scriptlines += 'tar -zxf $PILOT_DIR/%s > /dev/null 2>&1 \n' % tarName scriptlines += 'cd %s \n' % tarNameWOExt scriptlines += 'ls $PILOT_DIR/$JOB_SPEC_FILE \n' scriptlines += '( /usr/bin/time ./run.sh $PILOT_DIR/$JOB_SPEC_FILE 2>&1' scriptlines += ' ) | gzip > ./run.log.gz\n' #scriptlines += 'ls \n' #scriptlines += 'rfcp ./run.log.gz %s:%s/run.log.gz \n' % \ # ('vocms13.cern.ch','/data/khawar') #scriptlines += 'rfcp ./%s %s:%s/%s\n' % (fwReportFile, \ # 'vocms13.cern.ch', '/data/khawar', fwReportFile ) #print scriptlines result = [scriptlines, tarName, tarNameWOExt] return result # save the script in the filename def save(self, filename, script ): """ __save__ save pilot job executable script """ try: handle = open(filename, 'w') handle.write(script) handle.close() except IOError, ioinst: print 'save():problem in saving : %s, %s' % \ (sys.exc_info()[0], sys.exc_info()[1]) print str(ioinst) raise ioinst