Exemple #1
0
    def __init__(self, config):
        """ 
        __init__ 
        """

        self.config = config
        #self.processingRealJob = False

        #configurable
        #self.heartBeatMsg = True
        self.cacheFiles = []
        self.otherPilots = {}

        self.pilotId = None 
        self.ttl = config["TTL"]
        #get hostname of the pilot job
        self.pilotHost = socket.getfqdn()
        self.pilotSite = None
        self.pilotCacheDir = None
        self.pilotDir = os.getcwd()
        #Address of the TaskQueue machine		
        self.taskQAddress = config["tqaddress"] 

        #start communication module
        self.commPlugin = Communication(False, self)
	
	#heartbeat thread
	self.heartbeat = HeartBeat(self.commPlugin, self) 
        #self.heartbeat.start()	
	
        msg = "PilotJob Started:\n"
        print(msg)
Exemple #2
0
    def __init__(self, config):
        """ 
        __init__ 
        """

        self.config = config
        self.processingRealJob = False

        #configurable
        self.heartBeatMsg = True
        self.cacheFiles = []

        #self.pilotid = config["pilotID"]
        #TODO: get it from TaskQueue
        self.pilotId = 12311
        self.ttl = config["TTL"]
		
        #get hostname of the pilot job
        self.pilotHost = socket.getfqdn()

        #Address of the TaskQueue machine		
        self.taskQAddress = config["tqaddress"] 

        #start communication module
        self.commPlugin = Communication(False, self)
        self.commPlugin.start()
	
        msg = "PilotJob Started:\n"
        print(msg)
Exemple #3
0
class PilotJob:  
    """ 
    _PilotJob_
    
    a scripts that will start init n 
    then get the job from taskqueue 

    """
    def __init__(self, config):
        """ 
        __init__ 
        """

        self.config = config
        #self.processingRealJob = False

        #configurable
        #self.heartBeatMsg = True
        self.cacheFiles = []
        self.otherPilots = {}

        self.pilotId = None 
        self.ttl = config["TTL"]
        #get hostname of the pilot job
        self.pilotHost = socket.getfqdn()
        self.pilotSite = None
        self.pilotCacheDir = None
        self.pilotDir = os.getcwd()
        #Address of the TaskQueue machine		
        self.taskQAddress = config["tqaddress"] 

        #start communication module
        self.commPlugin = Communication(False, self)
	
	#heartbeat thread
	self.heartbeat = HeartBeat(self.commPlugin, self) 
        #self.heartbeat.start()	
	
        msg = "PilotJob Started:\n"
        print(msg)
   

    def getPilotSite(self):
        """ 
        __getPilotSite__ 
        get pilot site information using config file
        """ 
        print 'PilotJob: getPilotSite()' 
        if ( isVariableSet('CMS_PATH') ):
            configPath = os.path.join(os.environ.get('CMS_PATH'),\
                       'SITECONF/local/JobConfig/site-local-config.xml')
            try:
                doc = xml.dom.minidom.parse(configPath)
                sites = doc.getElementsByTagName('site')
                if ( sites != None):
                    site = sites[0]
                    siteName = site.getAttribute('name')
                    localStageOut = site.getElementsByTagName('local-stage-out')[0]
                    node = localStageOut.getElementsByTagName('se-name')[0]
                    self.pilotSite = node.getAttribute('value')
                    return True
                else:
                    print 'there is no site tag in site-local-config.xml'  
                    return False
            except:
                print 'getPilotSite():Problem %s:%s' \
                (sys.exc_info()[0], sys.exc_info()[1])
                return False
        else: 
            print 'could not find CMS_PATH variable' 
            return False

 

    ###################################
    #setPilotCacheDir
    ###################################     
    def setPilotCacheDir( self ):
        """ __setPilotCacheDir__ 

        creates cachearea for this pilot
        and set the pilot variable
        """
        print 'setPilotCacheDir'
        #pilotDir = os.getcwd()
        pilotCacheDir = "%s/%s" % ( self.pilotDir, "CACHE_AREA")
        try:
            os.mkdir(pilotCacheDir)
            self.pilotCacheDir = pilotCacheDir
            print 'cacheDir %s' % self.pilotCacheDir
            return True
        except:
            print 'Error setPilotCacheDir %s,%s'% \
                  (sys.exc_info()[0], sys.exc_info()[1])
            return False
       

    ###################################################	
    #TODO: this function will try to recover data cache
    ###################################################	
    def dataCacheRecovery(self):
        """
        __dataCacheRecovery__ 
        """
        #logging.debug( 'dataCacheRecovery()' )
        print ( 'dataCacheRecovery()' )
        print (self.cacheFiles)

    #################################################	
    #TODO: this function will try to recover old jobs 
    #which were not finished by pilot
    #################################################
    def jobRecovery(self):
        """ 
        __jobRecovery__ 
        """
    
        #logging.debug( 'jobRecovery()')
        print ( 'jobRecovery() %s '% self.pilotId)
        #otherwise return some job list
        return None

    
    ###################################
    # pilotEnvironmentCheck
    ###################################
    def pilotEnvironmentCheck ( self ):
        """ 
        __pilotEnvironmentCheck__ 
        """
        envList = ["CMS_PATH", "VO_CMS_SW_DIR", "HOME"]
        notSetEnv = []
        for env in envList:
            if ( not isVariableSet ( env ) ): 
                notSetEnv.append(env)
        #if notSetEnv list is not empty
        if ( len( notSetEnv ) > 0 ):
            print "Some of env variables are not set"
            print "Env Not Found :%s" % notSetEnv 
            return False

        return True 
    
    ###########################
    # registerPilot
    ###########################
    def registerPilot(self):
        """
	__registerPilot__
        """
        #add cmssw info with the registration request
        global CMSSW_INFO, CMS_ARCH

        #print CMSSW_INFO 
        if ( CMSSW_INFO is None ):
            return False

        #print 'CMSSW_INFO %s' % CMSSW_INFO
        #print 'SCRAM %s' % CMS_ARCH

        #use plugin to register this pilot with PA
        print 'going for pilot registration'
        jsonResult = self.commPlugin.register(self.pilotCacheDir, self.pilotSite, \
                     self.ttl, CMS_ARCH, CMSSW_INFO)

        print jsonResult
        
        if ( jsonResult == 'NoData' or jsonResult == 'ConnectionError'):
            #exit and 
            return
        if ( jsonResult['msg']['msgType'] == 'registerResponse' and \
             jsonResult['msg']['payload']['registerStatus'] == 'RegisterDone'):
            #print jsonResult
            self.pilotId = jsonResult['msg']['payload']['pilotId']
            self.otherPilots = jsonResult['msg']['payload']['otherPilots']

            print 'pilot gets register successfully wid id %s' % self.pilotId 
    
    #############################################    
    # realTaskExecutionScript
    #############################################
    def realTaskExecutionScript(self, taskDir, sandboxUrl, specUrl, logDir, jobWF):
        """
	__realTaskExecutionScript__
        """
        print "taskDir %s " % taskDir
        tarName = 'NoTarName.tar.gz'
        jobspecFile = 'NoSpecName'
        tarNameWOExt = 'NoTarName'
        #print tarName
        #print jobspecFile

        rind = sandboxUrl.rfind('/')
        if ( rind != -1 ):
            tarName = sandboxUrl[rind+1:]
            #TODO: extract it from the taskqueue information 
            rind = tarName.rfind('-%s'%jobWF)
            if ( rind < 0):
                rind = tarName.rfind("-");

            tarNameWOExt = tarName[0:rind]
            #tarNameWOExt = tarName

        jind = specUrl.rfind('/')
        if ( jind != -1 ):
            jobspecFile = specUrl[jind+1:]

        print ('tarName %s'% tarName )
        print ('tarNameWOExt %s' % tarNameWOExt)
        print ('jobspecfile %s' %jobspecFile)   
        fwReportFile = 'FrameworkJobReport.xml'
        
        scriptlines = '#!/usr/bin/bash \n'
        scriptlines += '#for the testing on 32bit machine \n'
        #scriptlines += 'source /afs/cern.ch/cms/sw/cmsset_default.sh \n'
        scriptlines += 'PILOT_DIR="%s" \n' % self.pilotDir
        scriptlines += 'myDate=`date "+%G%m%d_%k%M%S"` \n'
        scriptlines += 'JOB_SPEC_FILE="%s/%s" \n' % (taskDir, jobspecFile)
        #scriptlines += 'JOBDIR="$PILOT_DIR/%s/%s" \n'%(tarName
        #create task directory
        scriptlines += 'cd $PILOT_DIR \n'
        scriptlines += 'mkdir %s \n' % taskDir
        scriptlines += 'cd %s \n' % taskDir 
        #generate the log collection area
        scriptlines += 'mkdir -p JobLogArea/%s \n' % logDir
        #download spec and sandbox
        scriptlines += 'wget %s \n' % specUrl
        scriptlines += 'wget %s \n' % sandboxUrl
        # untar the sandbox
          
        #scriptlines += 'tar -zxf $PILOT_DIR/%s/%s > /dev/null 2>&1 \n' % (taskDir, tarName)
        scriptlines += 'tar -zxf %s > /dev/null 2>&1 \n' % tarName
        scriptlines += 'cd %s \n' % tarNameWOExt
        scriptlines += 'ls -l $PILOT_DIR/$JOB_SPEC_FILE \n'
        scriptlines += 'echo "$PILOT_DIR/$JOB_SPEC_FILE" \n'
        scriptlines += 'echo "Running the actual job" \n'
        scriptlines += '( /usr/bin/time ./run.sh $PILOT_DIR/$JOB_SPEC_FILE 2>&1'
        scriptlines += ' ) | gzip > ./run.log.gz\n'
        #scriptlines += 'rfcp run.log.gz vocms13.cern.ch:/data/khawar/prototype/run.log.gz \n'
        scriptlines += ' cp run.log.gz ../JobLogArea/%s \n' % logDir
        #scriptlines += ' find . -name "FrameworkJobReport.xml"' 
        #scriptlines += ' find . -name "*root"' 
        #print scriptlines
        result = [scriptlines, tarName, tarNameWOExt]
        return result
    
    # save the script in the filename
    def save(self, filename, script ):
        """ 
	__save__ 

	save pilot job executable script
	"""
        try:
            handle = open(filename, 'w')
            handle.write(script)
            handle.close()
        except IOError, ioinst:
            print 'save():problem in saving : %s, %s' % \
                  (sys.exc_info()[0], sys.exc_info()[1])
            print str(ioinst)
            raise ioinst
Exemple #4
0
class PilotJob:  
    """ 
    _PilotJob_
    
    a scripts that will start init n 
    then get the job from taskqueue 

    """
    def __init__(self, config):
        """ 
        __init__ 
        """

        self.config = config
        self.processingRealJob = False

        #configurable
        self.heartBeatMsg = True
        self.cacheFiles = []

        #self.pilotid = config["pilotID"]
        #TODO: get it from TaskQueue
        self.pilotId = 12311
        self.ttl = config["TTL"]
		
        #get hostname of the pilot job
        self.pilotHost = socket.getfqdn()

        #Address of the TaskQueue machine		
        self.taskQAddress = config["tqaddress"] 

        #start communication module
        self.commPlugin = Communication(False, self)
        self.commPlugin.start()
	
        msg = "PilotJob Started:\n"
        print(msg)
    
    ###################################################	
    #TODO: this function will try to recover data cache
    ###################################################	
    def dataCacheRecovery(self):
        """
        __dataCacheRecovery__ 
        """
        #logging.debug( 'dataCacheRecovery()' )
        print ( 'dataCacheRecovery()' )
        print (self.cacheFiles)

    #################################################	
    #TODO: this function will try to recover old jobs 
    #which were not finished by pilot
    #################################################
    def jobRecovery(self):
        """ 
        __jobRecovery__ 
        """
    
        #logging.debug( 'jobRecovery()')
        print ( 'jobRecovery() %s '% self.pilotId)
        #otherwise return some job list
        return None

    ###################################
    # pilotEnvironmentCheck
    ###################################
    def pilotEnvironmentCheck ( self ):
        """ 
        __pilotEnvironmentCheck__ 
        """
        envList = ["VO_CMS_SW_DIR", "HOME"]
        notSetEnv = []
        for env in envList:
            if ( not isVariableSet ( env ) ): 
                notSetEnv.append(env)
        #if notSetEnv list is not empty
        if ( len( notSetEnv ) > 0 ):
            print "Some of env variables are not set"
            print "Env Not Found :%s" % notSetEnv 
            return False

        return True 
    
    ###########################
    # registerPilot
    ###########################
    def registerPilot(self):
        """
	__registerPilot__
        """
        #use plugin to register this pilot with PA
        jsonResult = self.commPlugin.register()
        if ( jsonResult == 'NoData' or jsonResult == 'ConnectionError'):
            #exit and 
            return
        if ( jsonResult['msg']['msgType'] == 'RegisterResponse' ):
            self.pilotId = jsonResult['msg']['payload']['pilotId']
            print 'pilot gets register successfully wid id %s' % self.pilotId 
    
    #############################################    
    # realTaskExecutionScript
    #############################################
    def realTaskExecutionScript(self, taskDir, sandboxUrl, specUrl):
        """
	__realTaskExecutionScript__
        """
        print "taskDir %s " % taskDir
        tarName = 'NoTarName.tar.gz'
        jobspecFile = 'NoSpecName'
        tarNameWOExt = 'NoTarName'
        print tarName
        print jobspecFile

        rind = sandboxUrl.rfind('/')
        if ( rind != -1 ):
            tarName = sandboxUrl[rind+1:]
            #TODO: extract it from the taskqueue information 
            rind = tarName.find('-Processing.')
            tarNameWOExt = tarName[0:rind]

        jind = specUrl.rfind('/')
        if ( jind != -1 ):
            jobspecFile = specUrl[jind+1:]

        print ('tarName %s'% tarName )
        print ('tarNameWOExt %s' % tarNameWOExt)
        print ('jobspecfile %s' %jobspecFile)   
        fwReportFile = 'FrameworkJobReport.xml'

        scriptlines = '#!/usr/bin/bash \n'
        scriptlines += '#for the testing on 32bit machine \n'
        scriptlines += 'source /afs/cern.ch/cms/sw/cmsset_default.sh \n'
        scriptlines += 'PILOT_DIR=`pwd` \n'
        scriptlines += 'JOB_SPEC_FILE="%s" \n' % jobspecFile
        scriptlines += 'wget %s \n' % specUrl
        scriptlines += 'wget %s \n' % sandboxUrl
        scriptlines += 'tar -zxf $PILOT_DIR/%s > /dev/null 2>&1 \n' % tarName
        scriptlines += 'cd %s \n' % tarNameWOExt
        scriptlines += 'ls $PILOT_DIR/$JOB_SPEC_FILE \n'
        scriptlines += '( /usr/bin/time ./run.sh $PILOT_DIR/$JOB_SPEC_FILE 2>&1'
        scriptlines += ' ) | gzip > ./run.log.gz\n'
        #scriptlines += 'ls \n'
        #scriptlines += 'rfcp ./run.log.gz %s:%s/run.log.gz \n' % \
        #               ('vocms13.cern.ch','/data/khawar')
        #scriptlines += 'rfcp ./%s %s:%s/%s\n' % (fwReportFile, \
        #               'vocms13.cern.ch', '/data/khawar', fwReportFile )
         
        #print scriptlines
        result = [scriptlines, tarName, tarNameWOExt]
        return result
    
    # save the script in the filename
    def save(self, filename, script ):
        """ 
	__save__ 

	save pilot job executable script
	"""
        try:
            handle = open(filename, 'w')
            handle.write(script)
            handle.close()
        except IOError, ioinst:
            print 'save():problem in saving : %s, %s' % \
                  (sys.exc_info()[0], sys.exc_info()[1])
            print str(ioinst)
            raise ioinst