Esempio n. 1
0
    def load_argv(self, argv):
        if len(argv) == 6 and not any(map(lambda flag : flag.startswith('-'), argv)):
            _parse_old_argv(self, argv)
            return
        valid_flags = ['--clean']

        valid_kv_settings = [
            '--cfg', '-cfg', '--config', 
            '--workdir',
            '--webstagedir', 
            '--glideinwmsdir', 
            '--gridmapfile', '--mapfile', '--gridmap',
            '-url', '--weburl', '--url'
        ]
        
        # Our flags are not case sensitive; first, remove the program's name, then
        # map them into the correct format.
        normed_argv = list( map(lambda arg: arg.strip().lower() if arg.startswith('--') else arg, argv[1:]) )
        flags = parse_argv(normed_argv, valid_flags=valid_flags, valid_kv_settings=valid_kv_settings)
        self.shouldClean = flags.get('--clean')
        self.cfgFile = flags.get('--cfg') or flags.get('--config') or flags.get('-cfg')
        self.workDir = flags.get('--workdir')
        self.webStageDir = flags.get('--webstagedir')
        self.glideinWMSDir = flags.get('--glideinwmsdir')
        self.gridmapFile = flags.get('--gridmapfile') or flags.get('--gridmap') or flags.get('--mapfile')
        self.webURL = flags.get('--weburl') or flags.get('--url') or flags.get('-url')


        self.load_cfg()
        self._setup_logger()
        ilog('Created Ilan WebStructBuilder: %s'%str(self))
Esempio n. 2
0
    def __init__(self,argv):

        # glideTester.cfg values
        self.runId=None
        self.glideinWMSDir = None
        self.configDir = None
        self.proxyFile = None
        self.pilotFile = None
        self.delegateProxy = None
        self.collectorNode = None
        self.gfactoryNode = None
        self.gfactoryConstraint = None
        self.gfactoryClassadID = None
        self.myClassadID = None
        self.mySecurityName = None

        # parameters.cfg values
        self.executable = None
        self.inputFile = None
        self.outputFile = None
        self.environment = None
        self.getenv = None
        self.arguments = None
        self.x509userproxy = None
        self.concurrencyLevel = None
        self.runs = 1
        self.gfactoryAdditionalConstraint=None
        self.additionalClassAds = []

        # parse arguments
        valid_keys = ['-config', '-cfg', '--config', '-params', '-runId']
        arg_map = parse_argv(argv[1:], valid_kv_settings=valid_keys)
        passed_config_path = arg_map.get('-cfg') or arg_map.get('--config') or arg_map.get('-config')
        passed_params_path = arg_map.get('-params')
        self.cfg_paths = get_config_file_list(file_name='glideTester.cfg', arg_path=passed_config_path)
        self.params_path = get_config_file_list(file_name='parameters.cfg', arg_path=passed_params_path)
        self.runId = arg_map.get('-runId')

        # check and fix the attributes
        if self.runId==None:
            # not defined, create one specific for the account
            # should not be too random, or you polute the factory namespace
            self.runId="u%i"%os.getuid()
        
        # load external values
        self.load_cfg()
        self.verify_cfg()

        # set search path
        if self.glideinWMSDir is not None:
            sys.path.insert(0, self.glideinWMSDir)
            sys.path.insert(0,os.path.join(self.glideinWMSDir,".."))

        self.load_config_dir()

        self.load_params()
        self.setup_logger()

        ilog("Made glideTester: \n\n%s\n"%dbgp(self, 4))
Esempio n. 3
0
 def cleanStructs(self):
     import shutil
     if self.workDir is not None and os.path.exists(self.workDir):
         ilog("Cleaning old workdir: %s" % self.workDir)
         shutil.rmtree(self.workDir)
     if self.webStageDir is not None and os.path.exists(self.webStageDir):
         ilog("Cleaning old webStageDir: %s" % self.webStageDir)
         shutil.rmtree(self.webStageDir)
Esempio n. 4
0
 def cleanStructs(self):
     import shutil
     if self.workDir is not None and os.path.exists(self.workDir):
         ilog("Cleaning old workdir: %s"%self.workDir)
         shutil.rmtree(self.workDir)
     if self.webStageDir is not None and os.path.exists(self.webStageDir):
         ilog("Cleaning old webStageDir: %s"%self.webStageDir)
         shutil.rmtree(self.webStageDir)
Esempio n. 5
0
 def reload_proxy(self):
     ilog('Reloading proxy from fname: %s'%str(self.proxy_fname))
     if self.proxy_fname==None:
         self.proxy_data=None
         return
     proxy_fd=open(self.proxy_fname,'r')
     try:
         self.proxy_data=proxy_fd.read()
         (self.public_cert, self.private_cert) = self._parse_proxy_certs(self.proxy_data)
     finally:
         proxy_fd.close()
     return
Esempio n. 6
0
 def reload_proxy(self):
     ilog('Reloading proxy from fname: %s' % str(self.proxy_fname))
     if self.proxy_fname == None:
         self.proxy_data = None
         return
     proxy_fd = open(self.proxy_fname, 'r')
     try:
         self.proxy_data = proxy_fd.read()
         (self.public_cert,
          self.private_cert) = self._parse_proxy_certs(self.proxy_data)
     finally:
         proxy_fd.close()
     return
Esempio n. 7
0
    def cleanup_glideins(self):
        ilog('Thread is cleaning up glideins.')
        from glideinwms.frontend import glideinFrontendInterface
        from glideinwms.lib import condorMonitor, condorExe

        # Deadvertize my add, so the factory knows we are gone
        for factory_pool in self.factory_pools:
            factory_pool_node=factory_pool[0]
            ilog('Deadvertising for node %s'%dbgp(factory_pool_node))
            try:
                glideinFrontendInterface.deadvertizeAllWork(factory_pool_node,self.client_name)
            except RuntimeError, e:
                self.errors.append((time.time(),"Deadvertizing failed: %s"%e))
            except:
Esempio n. 8
0
    def cleanup_glideins(self):
        ilog('Thread is cleaning up glideins.')
        from glideinwms.frontend import glideinFrontendInterface
        from glideinwms.lib import condorMonitor, condorExe

        # Deadvertize my add, so the factory knows we are gone
        for factory_pool in self.factory_pools:
            factory_pool_node = factory_pool[0]
            ilog('Deadvertising for node %s' % dbgp(factory_pool_node))
            try:
                glideinFrontendInterface.deadvertizeAllWork(
                    factory_pool_node, self.client_name)
            except RuntimeError, e:
                self.errors.append(
                    (time.time(), "Deadvertizing failed: %s" % e))
            except:
Esempio n. 9
0
    def load_argv(self, argv):
        if len(argv) == 6 and not any(
                map(lambda flag: flag.startswith('-'), argv)):
            _parse_old_argv(self, argv)
            return
        valid_flags = ['--clean']

        valid_kv_settings = [
            '--cfg', '-cfg', '--config', '--workdir', '--webstagedir',
            '--glideinwmsdir', '--gridmapfile', '--mapfile', '--gridmap',
            '-url', '--weburl', '--url'
        ]

        # Our flags are not case sensitive; first, remove the program's name, then
        # map them into the correct format.
        normed_argv = list(
            map(
                lambda arg: arg.strip().lower()
                if arg.startswith('--') else arg, argv[1:]))
        flags = parse_argv(normed_argv,
                           valid_flags=valid_flags,
                           valid_kv_settings=valid_kv_settings)
        self.shouldClean = flags.get('--clean')
        self.cfgFile = flags.get('--cfg') or flags.get(
            '--config') or flags.get('-cfg')
        self.workDir = flags.get('--workdir')
        self.webStageDir = flags.get('--webstagedir')
        self.glideinWMSDir = flags.get('--glideinwmsdir')
        self.gridmapFile = flags.get('--gridmapfile') or flags.get(
            '--gridmap') or flags.get('--mapfile')
        self.webURL = flags.get('--weburl') or flags.get('--url') or flags.get(
            '-url')

        self.load_cfg()
        self._setup_logger()
        ilog('Created Ilan WebStructBuilder: %s' % str(self))
Esempio n. 10
0
    def createStructs(self):
        ilog('Running createStructs for builder: %s' % str(self))
        self._setup_gwms_path()
        import cgkWDictFile
        try:
            import inspect
            srcf = inspect.getsourcefile(cgkWDictFile)
        except:
            srcf = 'ERROR'
        ilog("Imported cgkWDictFile from %s" % srcf)

        #Create the config files
        ilog('Creating struct.')
        dicts = cgkWDictFile.glideKeeperDicts(self.workDir, self.webStageDir)
        dicts.populate(self.webURL, self.gridmapFile)
        dicts.create_dirs()
        dicts.save()
        self._create_empty_web_index()
        ilog('Done.')

        print "Created config files in %s\n" % dicts.work_dir
        print "Web files in %s" % dicts.stage_dir
        print "If needed, move them so they are accessible from\n  %s" % self.webURL
Esempio n. 11
0
    def createStructs(self):
        ilog('Running createStructs for builder: %s'%str(self))
        self._setup_gwms_path()
        import cgkWDictFile
        try:
            import inspect
            srcf = inspect.getsourcefile(cgkWDictFile)
        except:
            srcf = 'ERROR'
        ilog("Imported cgkWDictFile from %s"%srcf)

        #Create the config files
        ilog('Creating struct.')
        dicts=cgkWDictFile.glideKeeperDicts(self.workDir,self.webStageDir)
        dicts.populate(self.webURL,self.gridmapFile)
        dicts.create_dirs()
        dicts.save()
        self._create_empty_web_index()
        ilog('Done.')

        print "Created config files in %s\n"%dicts.work_dir
        print "Web files in %s"%dicts.stage_dir
        print "If needed, move them so they are accessible from\n  %s"%self.webURL
Esempio n. 12
0
    def __init__(self,
                 web_url,descript_fname,descript_signature,
                 group_name,group_descript_fname,group_descript_signature,
                 security_name,instance_id,
                 classad_id,
                 factory_pools,factory_constraint,
                 collector_node,
                 proxy_fname,
                 session_id=None): # session_id should be a uniq string

        ilog("Initting new GlideKeeperThread.")

        threading.Thread.__init__(self)

        # consts
        self.signature_type = "sha1"
        self.max_request=100

        # strings, describe Web downloadable info
        self.web_url=web_url
        self.descript_fname=descript_fname
        self.descript_signature=descript_signature

        ilog("Thread web info: \n\tweb_url: %s\n\tdescript_fname: %s\n\tdescript_signature: %s"%(web_url, descript_fname, descript_signature))

        self.group_name=group_name
        self.group_descript_fname=group_descript_fname
        self.group_descript_signature=group_descript_signature
        
        ilog("Thread group info: \n\tgroup_name: %s\n\tdescript_fname: %s\n\tdescript_signature: %s"%(group_name, group_descript_fname, group_descript_signature))

        # string, used for identification
        self.security_name=security_name
        self.instance_id=instance_id
        glidekeeper_id="%s_%s"%(security_name,instance_id)
        self.glidekeeper_id=glidekeeper_id
        client_name="%s.%s"%(glidekeeper_id,self.group_name)
        self.client_name=client_name

        ilog('Thread security info: \n\tsecurity_name: %s\n\tinstance_id: %s\n\tglidekeeper_id: %s\n\tclient_name: %s'%(security_name, instance_id, glidekeeper_id, client_name))

        if session_id==None:
            # should be as unique as possible
            # in the context of the instance_id
            session_id="%s_%s"%(time.time(),os.getpid())
        self.session_id=session_id

        ilog('Thread session_id: %s'%session_id)

        self.instance_constraint='GLIDETESTER_InstanceID=?="%s"'%self.glidekeeper_id
        self.session_constraint='GLIDETESTER_SessionID=?="%s"'%self.session_id

        self.glidekeeper_constraint="(%s)&&(%s)"%(self.instance_constraint,self.session_constraint)
        
        ilog('Thread glidein constraints: %s'%self.glidekeeper_constraint)
        
        # string, what our ads will be identified at the factories
        self.classad_id=classad_id
        ilog('Thread classad_id: %s'%classad_id)
        
        # factory pools is a list of pairs, where
        #  [0] is factory node
        #  [1] is factory identity
        self.factory_pools=factory_pools

        # string or None
        self.factory_constraint=factory_constraint

        # string
        self.collector_node = collector_node

        self.proxy_fname=proxy_fname
        self.reload_proxy() # provides proxy_data

        ilog('Backend info:\n\tfactory_pools: %s\n\tfactory_constraint: %s\n\tcollector_node: %s\n\tproxy_fname: %s'%(dbgp(factory_pools), factory_constraint, collector_node, proxy_fname))

        #############################
        
        # keep it simple, start with 0, requests will come later
        self.needed_glideins=0

        self.need_cleanup = False # if never requested more than 0, then no need to do cleanup

        self.running_glideins=0
        self.errors=[]

        ##############################
        self.shutdown=False
Esempio n. 13
0
    def go_request_glideins(self):
        ilog('Entered go_request_glideins.')
        from glideinwms.frontend import glideinFrontendInterface
        from glideinwms.lib import condorMonitor, condorExe, pubCrypto
        from glideinwms.frontend.glideinFrontendPlugins import proxy_plugins, createCredentialList
        # query job collector
        ilog('Checking the condor pool.')
        try:
            pool_status = condorMonitor.CondorStatus()
            pool_status.load(
                '(IS_MONITOR_VM=!=True)&&(%s)' % self.glidekeeper_constraint,
                [('State', 's')])
            running_glideins = len(pool_status.fetchStored())
            del pool_status
            self.running_glideins = running_glideins
            ilog('Found %d glideins in the pool.' % running_glideins)
        except:
            self.errors.append((time.time(), "condor_status failed"))
            return

        # query WMS collector
        ilog('Checking factory glideins.')
        glidein_dict = {}
        for factory_pool in self.factory_pools:
            factory_pool_node = factory_pool[0]
            factory_identity = factory_pool[1]
            try:
                if self.proxy_data != None:
                    full_constraint = self.factory_constraint + ' && (PubKeyType=?="RSA") && (GlideinAllowx509_Proxy=!=False)'
                else:
                    full_constraint = self.factory_constraint + ' && (GlideinRequirex509_Proxy=!=True)'
                ilog(
                    'Running findGlideins with these params: \n\tpool: %s\n\tident: %s\n\tsigtype: %s\n\tconstraints: %s'
                    % (
                        str(factory_pool_node), str(None),
                        str(self.signature_type), str(full_constraint)
                        #str(self.proxy_data!=None),
                        #str(True)
                    ))
                factory_glidein_dict = glideinFrontendInterface.findGlideins(
                    factory_pool_node,
                    None,  #factory_identity, #TODO: How do we authenticate with the factory? 
                    self.signature_type,
                    full_constraint
                    #self.proxy_data!=None,
                    #get_only_matching=True
                )
            except RuntimeError, e:
                factory_glidein_dict = {
                }  # in case of error, treat as there is nothing there
                ilog('Error from findGlideins: %s' % str(e))
            ilog('Found %d possible in factory_pool %s' %
                 (len(factory_glidein_dict.keys()), dbgp(factory_pool)))

            for glidename in factory_glidein_dict.keys():
                ilog('Now testing glidein with name %s' % glidename)
                glidein_el = factory_glidein_dict[glidename]
                ilog('Glidein stats: \n\n %s \n\n' % dbgp(glidein_el))
                if not glidein_el['attrs'].has_key(
                        'PubKeyType'):  # no pub key at all, skip
                    ilog('%s has no PubKeyType -- skipping.' % glidename)
                    continue
                elif glidein_el['attrs'][
                        'PubKeyType'] == 'RSA':  # only trust RSA for now
                    try:
                        # augment
                        glidein_el['attrs']['PubKeyObj'] = pubCrypto.PubRSAKey(
                            str(
                                re.sub(r"\\+n", r"\n",
                                       glidein_el['attrs']['PubKeyValue'])))
                        # and add
                        glidein_dict[(factory_pool_node,
                                      glidename)] = glidein_el
                        ilog('Adding %s to glidein_dict' % glidename)
                    except RuntimeError, e:
                        ilog('Hit error when adding %s to glidein_dict:\n%s' %
                             (glidename, str(e)))
                        continue  # skip
                    except:
Esempio n. 14
0
            data=check1.fetchStored()
            ilog('Success!')
        except RuntimeError,e:
            main_log.write("%s %s\n"%(ctime(), "condor_q failed (%s)... ignoring for now"%e))

            main_log.flush()
            sleep(2)
            continue # retry the while loop
        except:
            main_log.write("%s %s\n"%(ctime(), "condor_q failed (reason unknown)... ignoring for now"))

            main_log.flush()
            sleep(2)
            continue # retry the while loop
        runningGlideins = len(data)
        ilog('Found %s jobs running.'%len(data.keys()))
        main_log.write("%s %s %s\n"%(ctime(), runningGlideins, 'jobs running'))
        main_log.flush()
        if runningGlideins == 0:
            main_log.write("%s %s\n"%(ctime(), "no more running jobs"))
            break
        else:
            sleep(10)

def parse_result(config,workingDir,concurrencyLevel):
    # Create a loop to parse each log file into a summaries directory
    summDir = workingDir + '/summaries/'
    os.makedirs(summDir)
    for l in range(0, config.runs, 1):
        for k in range(0, len(concurrencyLevel), 1):
Esempio n. 15
0
def parse_result(config,workingDir,concurrencyLevel):
    # Create a loop to parse each log file into a summaries directory
    summDir = workingDir + '/summaries/'
    os.makedirs(summDir)
    for l in range(0, config.runs, 1):
        for k in range(0, len(concurrencyLevel), 1):

            # Initialize empty arrays for data
            results=[]
            hours=[]
            minutes=[]
            seconds=[]
            jobStartInfo=[]
            jobExecuteInfo=[]
            jobFinishInfo=[]
            jobStatus=[]

            # Parse each log file
            logFile = workingDir + '/con_' + concurrencyLevel[k] + '_run_' + str(l) + '.log'
            if not os.path.exists(logFile):
                # If the log file doesn't exist, then the run failed. 
                # Report that in the summaries. 
                filePath = summDir + 'con_' + concurrencyLevel[k] + '_run_' + str(l) + '.txt'
                file=open(filePath, 'w')
                header = "# Test Results for " + config.executable + " run at concurrency Level " + concurrencyLevel[k] + '\n\nJob\tExec\tFinish\tReturn\nNumber\tTime\tTime\tValue\n'
                file.write(header)
                file.write('#ERROR: Could not read log file. Did this level actually run?')
                file.close()

                filepath = summDir + 'results.txt'
                file=open(filepath, 'a')
                times = "Concurrency_Level = " + concurrencyLevel[k] + "\t  Execute_Time_(Ave/Min/Max) = " + 'ERROR: Failed' + '/' + 'ERROR: Failed' + '/' + 'ERROR: Failed' + "\t  Finish_Time_(Ave/Min/Max) = " + 'ERROR: Failed' + "/" + 'ERROR: Failed' + "/" + 'ERROR: Failed' + '\n'
                file.write(times)
                file.close()
                continue
            lf = open(logFile, 'r')
            try:
                lines1 = lf.readlines()
            finally:
                lf.close()
            jobsSubmitted = 0
            for line in lines1:
                line = line.strip()
                if line[0:1] not in ('0','1','2','3','4','5','6','7','8','9','('):
                    continue # ignore unwanted text lines
                arr1=line.split(' ',7)
                if len(arr1) < 5:
                    ilog('ERROR: Line too small for parsing: %s'%(str(arr1)))
                if arr1[5] == "Bytes" or arr1[4] =="Image":
                    continue
                if arr1[5] == "submitted":
                    jobNum = arr1[1].strip('()')
                    jobStartInfo.append(jobNum)
                    jobStartInfo.append(arr1[3])
                    jobsSubmitted=jobsSubmitted+1
                if arr1[5] == "executing":
                    jobNum = arr1[1].strip('()')
                    jobExecuteInfo.append(jobNum)
                    jobExecuteInfo.append(arr1[3])
                if arr1[5] == "terminated.":
                    jobNum = arr1[1].strip('()')
                    jobFinishInfo.append(jobNum)
                    jobFinishInfo.append(arr1[3])
                if arr1[4] == "value":
                    status=arr1[5].split(')',1)
                    jobFinishInfo.append(status[0])

            # Set some variables
            minExeTime=1e20
            maxExeTime=0
            minFinTime=1e20
            maxFinTime=0
            iter=0
            for i in range(0, len(jobStartInfo), 2):
                if jobStartInfo[i] in jobExecuteInfo:
                    index = jobExecuteInfo.index(jobStartInfo[i])
                    timeJobStart = jobStartInfo[i + 1]
                    timeJobExecute = jobExecuteInfo[index + 1]
                    timeStart = timeJobStart.split(':', 2)
                    timeExecute = timeJobExecute.split(':', 2)
                    diffHours = (int(timeExecute[0]) - int(timeStart[0])) * 3600
                    diffMinutes = (int(timeExecute[1]) - int(timeStart[1])) * 60
                    diffSeconds = int(timeExecute[2]) - int(timeStart[2])
                    executeTime = diffHours + diffMinutes + diffSeconds
                    index2 = jobFinishInfo.index(jobStartInfo[i])
                    timeJobFinish = jobFinishInfo[index2 + 1]
                    stat = jobFinishInfo[index2 +2]
                    timeFinish = timeJobFinish.split(':', 2)
                    diffHours2 = (int(timeFinish[0]) - int(timeExecute[0])) * 3600
                    diffMinutes2 = (int(timeFinish[1]) - int(timeExecute[1])) * 60
                    diffSeconds2 = int(timeFinish[2]) - int(timeExecute[2])
                    finishTime = diffHours2 + diffMinutes2 + diffSeconds2
                    resultData = [iter, executeTime, finishTime, stat]
                    results.append(resultData)
                    iter = iter + 1
                    if executeTime > maxExeTime:
                        maxExeTime = executeTime
                    if executeTime < minExeTime:
                        minExeTime = executeTime
                    if finishTime > maxFinTime:
                        maxFinTime = finishTime
                    if finishTime < minFinTime:
                        minFinTime = finishTime

            # Create summary directory structure
            filePath = summDir + 'con_' + concurrencyLevel[k] + '_run_' + str(l) + '.txt'
            file=open(filePath, 'w')
            header = "# Test Results for " + config.executable + " run at concurrency Level " + concurrencyLevel[k] + '\n\nJob\tExec\tFinish\tReturn\nNumber\tTime\tTime\tValue\n'
            file.write(header)
            exeTime=0
            finTime=0
            for i in range(0, int(concurrencyLevel[k])):
                exeTime = exeTime + results[i][1]
                finTime = finTime + results[i][2]
                writeData = str(results[i][0]) + '\t' + str(results[i][1]) + '\t' + str(results[i][2]) + '\t' + results[i][3] + '\n'
                file.write(writeData)

            aveExeTime = exeTime/int(concurrencyLevel[k])
            aveFinTime = finTime/int(concurrencyLevel[k])
            file.close()

            filepath = summDir + 'results.txt'
            file=open(filepath, 'a')
            times = "Concurrency_Level = " + concurrencyLevel[k] + "\t  Execute_Time_(Ave/Min/Max) = " + str(aveExeTime) + '/' + str(minExeTime) + '/' + str(maxExeTime) + "\t  Finish_Time_(Ave/Min/Max) = " + str(aveFinTime) + "/" + str(minFinTime) + "/" + str(maxFinTime) + '\n'
            file.write(times)
            file.close()
Esempio n. 16
0
    def __init__(self,
                 web_url,
                 descript_fname,
                 descript_signature,
                 group_name,
                 group_descript_fname,
                 group_descript_signature,
                 security_name,
                 instance_id,
                 classad_id,
                 factory_pools,
                 factory_constraint,
                 collector_node,
                 proxy_fname,
                 session_id=None):  # session_id should be a uniq string

        ilog("Initting new GlideKeeperThread.")

        threading.Thread.__init__(self)

        # consts
        self.signature_type = "sha1"
        self.max_request = 100

        # strings, describe Web downloadable info
        self.web_url = web_url
        self.descript_fname = descript_fname
        self.descript_signature = descript_signature

        ilog(
            "Thread web info: \n\tweb_url: %s\n\tdescript_fname: %s\n\tdescript_signature: %s"
            % (web_url, descript_fname, descript_signature))

        self.group_name = group_name
        self.group_descript_fname = group_descript_fname
        self.group_descript_signature = group_descript_signature

        ilog(
            "Thread group info: \n\tgroup_name: %s\n\tdescript_fname: %s\n\tdescript_signature: %s"
            % (group_name, group_descript_fname, group_descript_signature))

        # string, used for identification
        self.security_name = security_name
        self.instance_id = instance_id
        glidekeeper_id = "%s_%s" % (security_name, instance_id)
        self.glidekeeper_id = glidekeeper_id
        client_name = "%s.%s" % (glidekeeper_id, self.group_name)
        self.client_name = client_name

        ilog(
            'Thread security info: \n\tsecurity_name: %s\n\tinstance_id: %s\n\tglidekeeper_id: %s\n\tclient_name: %s'
            % (security_name, instance_id, glidekeeper_id, client_name))

        if session_id == None:
            # should be as unique as possible
            # in the context of the instance_id
            session_id = "%s_%s" % (time.time(), os.getpid())
        self.session_id = session_id

        ilog('Thread session_id: %s' % session_id)

        self.instance_constraint = 'GLIDETESTER_InstanceID=?="%s"' % self.glidekeeper_id
        if len(self.session_id) != 0:
            self.session_constraint = 'GLIDETESTER_SessionID=?="%s"' % self.session_id
            self.glidekeeper_constraint = "(%s)&&(%s)" % (
                self.instance_constraint, self.session_constraint)
        else:
            self.session_constraint = 'TRUE'
            self.glidekeeper_constraint = self.instance_constraint

        ilog('Thread glidein constraints: %s' % self.glidekeeper_constraint)

        # string, what our ads will be identified at the factories
        self.classad_id = classad_id
        ilog('Thread classad_id: %s' % classad_id)

        # factory pools is a list of pairs, where
        #  [0] is factory node
        #  [1] is factory identity
        self.factory_pools = factory_pools

        # string or None
        self.factory_constraint = factory_constraint

        # string
        self.collector_node = collector_node

        self.proxy_fname = proxy_fname
        self.reload_proxy()  # provides proxy_data

        ilog(
            'Backend info:\n\tfactory_pools: %s\n\tfactory_constraint: %s\n\tcollector_node: %s\n\tproxy_fname: %s'
            % (dbgp(factory_pools), factory_constraint, collector_node,
               proxy_fname))

        #############################

        # keep it simple, start with 0, requests will come later
        self.needed_glideins = 0

        self.need_cleanup = False  # if never requested more than 0, then no need to do cleanup

        self.running_glideins = 0
        self.errors = []

        ##############################
        self.shutdown = False
Esempio n. 17
0
 def soft_kill(self):
     ilog('Requesting a soft kill from the thread.')
     self.shutdown = True
Esempio n. 18
0
    def load_params(self):
        file_paths = self.params_path

        for fl in file_paths:
            config = parse_kv_file(fl)
            self.load_additional_classads(config)
            if self.has_params():
                continue

            if self.executable is None:
                exec_path = config.settings.get('executable')
                if exec_path is None:
                    pass
                elif not os.path.exists(exec_path):
                    raise RuntimeError, "%s '%s' is not a valid executable"%('executable',exec_path)
                else:
                    self.executable = exec_path
            if self.inputFile is None:
                input_files = config.settings.get('transfer_input_files')
                if input_files is not None:
                    arr = input_files.split(',')
                    newarr = []
                    for f in arr:
                        if not os.path.exists(f):
                            raise RuntimeError, "'%s' is not a valid file"%f
                        newarr.append(os.path.abspath(f))
                    self.inputFile = string.join(newarr,',')
            if self.outputFile is None:
                output_files = config.settings.get('transfer_output_files')
                if output_files is not None:
                    self.outputFile = output_files
            if self.environment is None:
                self.environment = config.settings.get('environment')
            if self.getenv is None:
                self.getenv = config.settings.get('getenv')
            if self.arguments is None:
                self.arguments = config.settings.get('arguments')
            if self.x509userproxy is None:
                val = config.settings.get('x509userproxy')
                if (val is not None) and (val!='') and (not os.path.exists(val)):
                    raise RuntimeError, "'%s' is not a valid proxy"%val
                self.x509userproxy = val
            if self.concurrencyLevel is None:
                concurrency = config.settings.get('concurrency')
                self.concurrencyLevel = concurrency.split()
            if self.runs is None: 
                runs = config.settings.get('runs')
                if runs is not None:
                    self.runs = int(runs)
            if self.gfactoryAdditionalConstraint is None:
                self.gfactoryAdditionalConstraint = config.settings.get('gfactoryAdditionalConstraint')
            if self.reuseOldGlideins is None:
                if not 'reuse_old_glideins' in config.settings:
                    ilog(config.settings)
                    continue
                new_rog_raw = config.settings.get('reuse_old_glideins').strip().lower()
                if len(new_rog_raw) == 0:
                    self.reuseOldGlideins = True 
                elif new_rog_raw[0] == 't':
                    self.reuseOldGlideins = True 
                else:
                    self.reuseOldGlideins = False
            if self.jobOutFormat is None:
                self.jobOutFormat = config.settings.get('initialDirFormat')
                if self.jobOutFormat is not None: 
                    self.verify_job_out_format()
            if self.prescript is None: 
                self.prescript = config.settings.get('prescript')
            if self.postscript is None: 
                self.postscript = config.settings.get('postscript')
            self.verify_prepostscript()
        if self.runs is None or type(self.runs) != int: 
            self.runs = 1 
Esempio n. 19
0
def parse_result(config,workingDir,concurrencyLevel):
    # Create a loop to parse each log file into a summaries directory
    summDir = workingDir + '/summaries/'
    os.makedirs(summDir)
    for l in range(0, config.runs, 1):
        for k in range(0, len(concurrencyLevel), 1):

            # Initialize empty arrays for data
            results=[]
            hours=[]
            minutes=[]
            seconds=[]
            jobStartInfo=[]
            jobExecuteInfo=[]
            jobFinishInfo=[]
            jobStatus=[]

            # Parse each log file
            logFile = workingDir + '/con_' + concurrencyLevel[k] + '_run_' + str(l) + '.log'
            if not os.path.exists(logFile):
                # If the log file doesn't exist, then the run failed. 
                # Report that in the summaries. 
                filePath = summDir + 'con_' + concurrencyLevel[k] + '_run_' + str(l) + '.txt'
                file=open(filePath, 'w')
                header = "# Test Results for " + config.executable + " run at concurrency Level " + concurrencyLevel[k] + '\n\nJob\tExec\tFinish\tReturn\nNumber\tTime\tTime\tValue\n'
                file.write(header)
                file.write('#ERROR: Could not read log file. Did this level actually run?')
                file.close()

                filepath = summDir + 'results.txt'
                file=open(filepath, 'a')
                times = "Concurrency_Level = " + concurrencyLevel[k] + "\t  Execute_Time_(Ave/Min/Max) = " + 'ERROR: Failed' + '/' + 'ERROR: Failed' + '/' + 'ERROR: Failed' + "\t  Finish_Time_(Ave/Min/Max) = " + 'ERROR: Failed' + "/" + 'ERROR: Failed' + "/" + 'ERROR: Failed' + '\n'
                file.write(times)
                file.close()
                continue
            lf = open(logFile, 'r')
            try:
                lines1 = lf.readlines()
            finally:
                lf.close()
            jobsSubmitted = 0
            for line in lines1:
                line = line.strip()
                if line[0:1] not in ('0','1','2','3','4','5','6','7','8','9','('):
                    continue # ignore unwanted text lines
                arr1=line.split(' ',7)
                if len(arr1) < 5:
                    ilog('ERROR: Line too small for parsing: %s'%(str(arr1)))
                if arr1[5] == "Bytes" or arr1[4] =="Image":
                    continue
                if arr1[5] == "submitted":
                    jobNum = arr1[1].strip('()')
                    jobStartInfo.append(jobNum)
                    jobStartInfo.append(arr1[3])
                    jobsSubmitted=jobsSubmitted+1
                if arr1[5] == "executing":
                    jobNum = arr1[1].strip('()')
                    jobExecuteInfo.append(jobNum)
                    jobExecuteInfo.append(arr1[3])
                if arr1[5] == "terminated.":
                    jobNum = arr1[1].strip('()')
                    jobFinishInfo.append(jobNum)
                    jobFinishInfo.append(arr1[3])
                if arr1[4] == "value":
                    status=arr1[5].split(')',1)
                    jobFinishInfo.append(status[0])

            # Set some variables
            minExeTime=1e20
            maxExeTime=0
            minFinTime=1e20
            maxFinTime=0
            iter=0
            for i in range(0, len(jobStartInfo), 2):
                if jobStartInfo[i] in jobExecuteInfo:
                    index = jobExecuteInfo.index(jobStartInfo[i])
                    timeJobStart = jobStartInfo[i + 1]
                    timeJobExecute = jobExecuteInfo[index + 1]
                    timeStart = timeJobStart.split(':', 2)
                    timeExecute = timeJobExecute.split(':', 2)
                    diffHours = (int(timeExecute[0]) - int(timeStart[0])) * 3600
                    diffMinutes = (int(timeExecute[1]) - int(timeStart[1])) * 60
                    diffSeconds = int(timeExecute[2]) - int(timeStart[2])
                    executeTime = diffHours + diffMinutes + diffSeconds
                    index2 = jobFinishInfo.index(jobStartInfo[i])
                    timeJobFinish = jobFinishInfo[index2 + 1]
                    stat = jobFinishInfo[index2 +2]
                    timeFinish = timeJobFinish.split(':', 2)
                    diffHours2 = (int(timeFinish[0]) - int(timeExecute[0])) * 3600
                    diffMinutes2 = (int(timeFinish[1]) - int(timeExecute[1])) * 60
                    diffSeconds2 = int(timeFinish[2]) - int(timeExecute[2])
                    finishTime = diffHours2 + diffMinutes2 + diffSeconds2
                    resultData = [iter, executeTime, finishTime, stat]
                    results.append(resultData)
                    iter = iter + 1
                    if executeTime > maxExeTime:
                        maxExeTime = executeTime
                    if executeTime < minExeTime:
                        minExeTime = executeTime
                    if finishTime > maxFinTime:
                        maxFinTime = finishTime
                    if finishTime < minFinTime:
                        minFinTime = finishTime

            # Create summary directory structure
            filePath = summDir + 'con_' + concurrencyLevel[k] + '_run_' + str(l) + '.txt'
            file=open(filePath, 'w')
            header = "# Test Results for " + config.executable + " run at concurrency Level " + concurrencyLevel[k] + '\n\nJob\tExec\tFinish\tReturn\nNumber\tTime\tTime\tValue\n'
            file.write(header)
            exeTime=0
            finTime=0
            for i in range(0, int(concurrencyLevel[k])):
                exeTime = exeTime + results[i][1]
                finTime = finTime + results[i][2]
                writeData = str(results[i][0]) + '\t' + str(results[i][1]) + '\t' + str(results[i][2]) + '\t' + results[i][3] + '\n'
                file.write(writeData)

            aveExeTime = exeTime/int(concurrencyLevel[k])
            aveFinTime = finTime/int(concurrencyLevel[k])
            file.close()

            filepath = summDir + 'results.txt'
            file=open(filepath, 'a')
            times = "Concurrency_Level = " + concurrencyLevel[k] + "\t  Execute_Time_(Ave/Min/Max) = " + str(aveExeTime) + '/' + str(minExeTime) + '/' + str(maxExeTime) + "\t  Finish_Time_(Ave/Min/Max) = " + str(aveFinTime) + "/" + str(minFinTime) + "/" + str(maxFinTime) + '\n'
            file.write(times)
            file.close()
Esempio n. 20
0
def process_concurrency(config,gktid,main_log,workingDir,concurrencyLevel,run,k):

    ilog('Processing concurrency level %s => %s run number %s.\n\tgktid: %s\n\tworkingDir: %s\n\t log: %s'%(str(k), str(concurrencyLevel[k]), str(run), str(gktid), str(workingDir), str(main_log)))
    from glideinwms.lib import condorMonitor
    from glideinwms.lib import condorManager

    # request the glideins
    # we want 10% more glideins than the concurrency level
    requestedGlideins = int(concurrencyLevel[k])
    totalGlideins = int(requestedGlideins + .1 * requestedGlideins)
    gktid.request_glideins(totalGlideins)
    main_log.write("%s %i Glideins requested\n"%(ctime(),totalGlideins))

    # now we create the directories for each job and a submit file
    filename =  workingDir + "/" + config.executable.replace('/', '__') + '_concurrency_' + concurrencyLevel[k] + '_run_' + str(run ) + '_submit.condor'
    filecontent = make_submit_file_content(config, gktid, main_log, workingDir, concurrencyLevel[k], run)
    condorSubmitFile=open(filename, "w")
    ilog('Creating condor file %s:\n%s'%(filename, filecontent ))
    condorSubmitFile.write(filecontent)
    condorSubmitFile.close()

    # Need to figure out when we have all the glideins
    # Ask the glidekeeper object
    ilog('Now waiting until the thread retrieves enough glideins.')
    numberGlideins = 0
    while numberGlideins < requestedGlideins:
        errors=[]
        while 1:
            # since gktid runs in a different thread, pop is the only atomic operation I have
            try:
                errors.append(gktid.errors.pop())
            except IndexError:
                break

        errors.reverse()
        if not len(errors) == 0:
            ilog('Have errors!')
        for err  in errors:
            main_log.write("%s Error: %s\n"%(ctime(err[0]),err[1]))
            ilog('Found an error: %s'%err[1])
        if not gktid.isAlive():
            raise RuntimeError, "The glidekeeper thread unexpectedly died!"

        numberGlideins = gktid.get_running_glideins()
        ilog('Currently have %s running glideins out of %s.'%(numberGlideins, requestedGlideins))
        main_log.write("%s %s %s %s %s\n"%(ctime(), 'we have', numberGlideins, 'glideins, need', requestedGlideins))
        main_log.flush()
        sleep(5)

    # Now we begin submission and monitoring
    ilog('Got the glideins. Now submitting %s.'%filename)
    submission = condorManager.condorSubmitOne(filename)
    main_log.write("%s %s\n"%(ctime(), "file submitted"))
    runningGlideins = numberGlideins
    while runningGlideins > 0:
        if gktid.session_id is not None and len(gktid.session_id) > 0:
            qconstraint = '(JobStatus<3)&&(GK_InstanceId=?="%s")&&(GK_SessionId=?="%s")'%(gktid.glidekeeper_id,gktid.session_id)
        else:
            qconstraint = '(JobStatus<3)&&(GK_InstanceId=?="%s")'%(gktid.glidekeeper_id)
        ilog('Running condorQ to get the running jobs. Constraints: %s'%(qconstraint))
        check1 = condorMonitor.CondorQ()
        try:
            # i actually want to see all jos, not only running ones
            check1.load(qconstraint, [("JobStatus","s")])
            data=check1.fetchStored()
            ilog('Success!')
        except RuntimeError,e:
            main_log.write("%s %s\n"%(ctime(), "condor_q failed (%s)... ignoring for now"%e))

            main_log.flush()
            sleep(2)
            continue # retry the while loop
        except:
Esempio n. 21
0
    def go_request_glideins(self):
        ilog('Entered go_request_glideins.')
        from glideinwms.frontend import glideinFrontendInterface
        from glideinwms.lib import condorMonitor, condorExe
        from glideinwms.frontend.glideinFrontendPlugins import proxy_plugins, createCredentialList
        # query job collector
        ilog('Checking the condor pool.')
        try:
          pool_status=condorMonitor.CondorStatus()
          pool_status.load()#'(IS_MONITOR_VM=!=True)&&(%s)'%self.glidekeeper_constraint,[('State','s')])
          running_glideins=len(pool_status.fetchStored())
          del pool_status
          self.running_glideins=running_glideins
          ilog('Found %d glideins in the pool.'%running_glideins)
        except:
          self.errors.append((time.time(),"condor_status failed"))
          return

        # query WMS collector
        ilog('Checking factory glideins.')
        glidein_dict={}
        for factory_pool in self.factory_pools:
            factory_pool_node=factory_pool[0]
            factory_identity=factory_pool[1]
            try:
                if self.proxy_data != None:
                    full_constraint = self.factory_constraint +' && (PubKeyType=?="RSA") && (GlideinAllowx509_Proxy=!=False)'
                else:
                    full_constraint = self.factory_constraint + ' && (GlideinRequirex509_Proxy=!=True)'
                ilog('Running findGlideins with these params: \n\tpool: %s\n\tident: %s\n\tsigtype: %s\n\tconstraints: %s'%(
                    str(factory_pool_node),
                    str(None),
                    str(self.signature_type),
                    str(full_constraint)
                    #str(self.proxy_data!=None),
                    #str(True)
                ))
                factory_glidein_dict=glideinFrontendInterface.findGlideins(
                    factory_pool_node,
                    None, #factory_identity, #TODO: How do we authenticate with the factory? 
                    self.signature_type,
                    full_constraint
                    #self.proxy_data!=None,
                    #get_only_matching=True
                )
            except RuntimeError, e:
                factory_glidein_dict={} # in case of error, treat as there is nothing there
                ilog('Error from findGlideins: %s'%str(e))
            ilog('Found %d possible in factory_pool %s'%(len(factory_glidein_dict.keys()), dbgp(factory_pool)))

            for glidename in factory_glidein_dict.keys():
                ilog('Now testing glidein with name %s'%glidename)
                glidein_el=factory_glidein_dict[glidename]
                ilog('Glidein stats: \n\n %s \n\n'%dbgp(glidein_el))
                if not glidein_el['attrs'].has_key('PubKeyType'): # no pub key at all, skip
                    ilog('%s has no PubKeyType -- skipping.'% glidename)
                    continue
                elif glidein_el['attrs']['PubKeyType']=='RSA': # only trust RSA for now
                    try:
                        # augment
                        glidein_el['attrs']['PubKeyObj']=glideinFrontendInterface.pubCrypto.PubRSAKey(str(re.sub(r"\\+n", r"\n", glidein_el['attrs']['PubKeyValue'])))
                        # and add
                        glidein_dict[(factory_pool_node,glidename)]=glidein_el
                        ilog('Adding %s to glidein_dict'%glidename)
                    except:
                        ilog('Hit error when adding %s to glidein_dict'%glidename)
                        continue # skip
                else: # invalid key type, skip
                    ilog('%s has invalid PubKeyType -- skipping.'% glidename)
                    continue
Esempio n. 22
0
def run(config):
    os.environ['_CONDOR_SEC_DEFAULT_AUTHENTICATION_METHODS']='FS,GSI'
    os.environ['X509_USER_PROXY']=config.proxyFile
    import glideKeeper
    from glideinwms.lib import condorMonitor
    from glideinwms.lib import condorManager

    delegated_proxy=None
    if config.delegateProxy:
        if config.pilotFile is None:
            # use the service proxy as a backup solution
            delegated_proxy=config.proxyFile
        else:
            # use the pilto proxy, if available
            delegated_proxy=config.pilotFile
    
    if config.gfactoryAdditionalConstraint==None:
        gfactoryConstraint=config.gfactoryConstraint
    else:
        gfactoryConstraint="(%s)&&(%s)"%(config.gfactoryConstraint,config.gfactoryAdditionalConstraint)
    
    session_id_param = None
    if config.reuseOldGlideins is True:
        session_id_param = ''
    gktid=glideKeeper.GlideKeeperThread(config.webURL,config.descriptFile,config.descriptSignature,
                                        config.groupName,config.groupDescriptFile,config.groupDescriptSignature,
                                        config.mySecurityName,config.runId,
                                        config.myClassadID,
                                        [(config.gfactoryNode,config.gfactoryClassadID)],gfactoryConstraint,
                                        config.collectorNode,
                                        delegated_proxy, session_id = session_id_param)
    gktid.start()
    startupDir = os.getcwd()
    workingDir=startupDir + '/run_' + startTime
    
    os.makedirs(workingDir)
    main_log_fname=workingDir + '/glideTester.log'
    main_log=open(main_log_fname,'w')

    try:
        main_log.write("Starting at: %s\n\n"%ctime())

        main_log.write("Factory:       %s\n"%config.gfactoryNode)
        main_log.write("Constraint:    %s\n"%gfactoryConstraint)
        main_log.write("Service Proxy: %s\n"%config.proxyFile)
        main_log.write("Pilot Proxy:   %s\n"%delegated_proxy)
        main_log.write("InstanceID:    %s\n"%gktid.glidekeeper_id)
        main_log.write("SessionID:     %s\n\n"%gktid.session_id)

        concurrencyLevel=config.concurrencyLevel

        try:
            prescript_args = {
                'wd' : str(workingDir),
                'sd' : os.getcwd(), 
                'ts' : startTime,
                'cmd' : ' '.join(sys.argv)
            }
            if config.prescript is not None: 
                prescript = construct_from_format(config.prescript, prescript_args)
                ilog('Running prescript: %s'%(prescript))
                err_code = os.system(prescript)
                if err_code is not 0:
                    msg = 'Bad error code: '+str(err_code)
                    raise RuntimeError(msg)

            # Create a testing loop for each run
            for l in range(0, config.runs, 1):
                main_log.write("Iteration %i\n"%l)

                # Create a testing loop for each concurrency
                for k in range(0, len(concurrencyLevel), 1):
                    main_log.write("Concurrency %i\n"%int(concurrencyLevel[k]))
                    process_concurrency(config,gktid,main_log,workingDir,concurrencyLevel,l,k)
            postscript_args = {
                'wd' : str(workingDir),
                'sd' : os.getcwd(), 
                'ts' : startTime,
                'cmd' : ' '.join(sys.argv)
            }
            if config.postscript is not None: 
                postscript = construct_from_format(config.postscript, postscript_args)
                ilog('Running postscript: %s'%(postscript))
                err_code = os.system(postscript)
                if err_code is not 0:
                    msg = 'Bad error code: '+str(err_code)
                    raise RuntimeError(msg)
            main_log.write("%s %s\n"%(ctime(), "Done"))
        except:
            tb = traceback.format_exception(sys.exc_info()[0],sys.exc_info()[1],
                                            sys.exc_info()[2])
            main_log.write("%s %s\n"%(ctime(), "Exception: %s"%string.join(tb,'')))
            

        # Now we parse the log files
        parse_result(config,workingDir,concurrencyLevel)
    finally:
        main_log.write("%s %s\n"%(ctime(), "cleaning, then getting out"))
        main_log.flush()
        gktid.soft_kill()
        gktid.join()
        # print out any last minute errors
        for err  in gktid.errors:
            main_log.write("%s Error: %s\n"%(ctime(err[0]),err[1]))
            ilog("%s Error: %s\n"%(ctime(err[0]),err[1]))
        main_log.write("Terminated at: %s\n"%ctime())
    
    return
Esempio n. 23
0
def process_concurrency(config,gktid,main_log,workingDir,concurrencyLevel,l,k):

    ilog('Processing concurrency level %s => %s run number %s.\n\tgktid: %s\n\tworkingDir: %s\n\t log: %s'%(str(k), str(concurrencyLevel[k]), str(l), str(gktid), str(workingDir), str(main_log)))
    from glideinwms.lib import condorMonitor
    from glideinwms.lib import condorManager

    universe = 'vanilla'
    transfer_executable = "True"
    when_to_transfer_output = "ON_EXIT"
    # disable the check for architecture, we are running a script
    # only match to our own glideins
    requirements = '(Arch =!= "fake")&&(%s)'%gktid.glidekeeper_constraint
    owner = 'Undefined'
    notification = 'Never'

    # request the glideins
    # we want 10% more glideins than the concurrency level
    requestedGlideins = int(concurrencyLevel[k])
    totalGlideins = int(requestedGlideins + .1 * requestedGlideins)
    gktid.request_glideins(totalGlideins)
    main_log.write("%s %i Glideins requested\n"%(ctime(),totalGlideins))

    # now we create the directories for each job and a submit file
    loop = 0
    dir1 = workingDir + '/concurrency_' + concurrencyLevel[k] + '_run_' + str(l) + '/'
    os.makedirs(dir1)
    logfile = workingDir + '/con_' + concurrencyLevel[k] + '_run_' + str(l) + '.log'
    outputfile = 'concurrency_' + concurrencyLevel[k] + '.out'
    errorfile = 'concurrency_' + concurrencyLevel[k] + '.err'
    filename =  workingDir + "/" + config.executable.replace('/', '__') + '_concurrency_' + concurrencyLevel[k] + '_run_' + str(l) + '_submit.condor'
    filecontent = ''
    condorSubmitFile=open(filename, "w")
    filecontent += ('universe = ' + universe + '\n' +
                           'executable = ' + config.executable + '\n' +
                           'transfer_executable = ' + transfer_executable + '\n' +
                           'when_to_transfer_output = ' + when_to_transfer_output + '\n' +
                           'Requirements = ' + requirements + '\n' +
         #                  '+Owner = ' + owner + '\n' +
                           'log = ' + logfile + '\n' +
                           'output = ' +  outputfile + '\n' +
                           'error = ' + errorfile + '\n' +
                           'notification = ' + notification + '\n' +
                           'periodic_remove = ((JobStatus!=2)&&(JobRunCount>0))||(JobRunCount>1)\n' +
                           '+GK_InstanceId = "' + gktid.glidekeeper_id + '"\n' +
                           '+GK_SessionId = "' + gktid.session_id + '"\n' +
                           '+IsSleep = 1\n')
    if config.inputFile != None:
        filecontent += ('transfer_input_files = ' + config.inputFile + '\n')
    if config.outputFile != None:
        filecontent += ('transfer_output_files = ' + config.outputFile + '\n')
    if config.environment != None:
        filecontent += ('environment = ' + config.environment + '\n')
    if config.getenv != None:
        filecontent += ('getenv = ' + config.getenv + '\n')
    if config.arguments != None:
        filecontent += ('arguments = ' + config.arguments + '\n')
    if config.x509userproxy!=None:
        filecontent += ('x509userproxy = ' + config.x509userproxy + '\n\n')
    else:
        filecontent += ('x509userproxy = ' + config.proxyFile + '\n\n')
    #Added support for additional classAdds:
    for classAdd in config.additionalClassAds:
        name = classAdd[0]
        value = classAdd[1]
        filecontent += (name + ' = ' + value +'\n')
    for j in range(0, int(concurrencyLevel[k]), 1):
        filecontent += ('Initialdir = ' + dir1 + 'job' + str(loop) + '\n')
        filecontent += ('Queue\n\n')
        loop = loop + 1
    for i in range(0, int(concurrencyLevel[k]), 1):
        dir2 = dir1 + 'job' + str(i) + '/'
        os.makedirs(dir2)
    ilog('Creating condor file %s:\n%s'%(filename, filecontent ))
    condorSubmitFile.write(filecontent)
    condorSubmitFile.close()

    # Need to figure out when we have all the glideins
    # Ask the glidekeeper object
    ilog('Now waiting until the thread retrieves enough glideins.')
    finished = "false"
    while finished != "true":
        errors=[]
        while 1:
            # since gktid runs in a different thread, pop is the only atomic operation I have
            try:
                errors.append(gktid.errors.pop())
            except IndexError:
                break

        errors.reverse()
        if not len(errors) == 0:
            ilog('Have errors!')
        for err  in errors:
            main_log.write("%s Error: %s\n"%(ctime(err[0]),err[1]))
            ilog('Found an error: %s'%err[1])
        if not gktid.isAlive():
            raise RuntimeError, "The glidekeeper thread unexpectedly died!"

        numberGlideins = gktid.get_running_glideins()
        ilog('Currently have %s running glideins out of %s.'%(numberGlideins, requestedGlideins))
        main_log.write("%s %s %s %s %s\n"%(ctime(), 'we have', numberGlideins, 'glideins, need', requestedGlideins))
        main_log.flush()
        sleep(5)
        if numberGlideins >= requestedGlideins:
            finished = "true"

    # Now we begin submission and monitoring
    ilog('Got the glideins. Now submitting %s.'%filename)
    submission = condorManager.condorSubmitOne(filename)
    main_log.write("%s %s\n"%(ctime(), "file submitted"))
    running = "true"
    while running != "false":
        ilog('Running condorQ to get the running jobs.')
        check1 = condorMonitor.CondorQ()
        try:
            # i actually want to see all jos, not only running ones
            check1.load('(JobStatus<3)&&(GK_InstanceId=?="%s")&&(GK_SessionId=?="%s")'%(gktid.glidekeeper_id,gktid.session_id), [("JobStatus","s")])
            data=check1.fetchStored()
            ilog('Success!')
        except RuntimeError,e:
            main_log.write("%s %s\n"%(ctime(), "condor_q failed (%s)... ignoring for now"%e))

            main_log.flush()
            sleep(2)
            continue # retry the while loop
        except:
Esempio n. 24
0
def make_submit_file_content(config,gktid,main_log,workingDir,concurrency, run):
    universe = 'vanilla'
    transfer_executable = "True"
    when_to_transfer_output = "ON_EXIT_OR_EVICT"
    # disable the check for architecture, we are running a script
    # only match to our own glideins
    requirements = '(Arch =!= "fake")&&(%s)'%gktid.glidekeeper_constraint
    owner = 'Undefined'
    notification = 'Never'

    logfile = workingDir + '/con_' + concurrency + '_run_' + str(run) + '.log'
    outputfile = 'concurrency_' + concurrency + '.out'
    errorfile = 'concurrency_' + concurrency + '.err'
    filecontent = ('universe = ' + universe + '\n' +
                           'executable = ' + config.executable + '\n' +
                           'transfer_executable = ' + transfer_executable + '\n' +
                           'when_to_transfer_output = ' + when_to_transfer_output + '\n' +
                           'Requirements = ' + requirements + '\n' +
         #                  '+Owner = ' + owner + '\n' +
                           'log = ' + logfile + '\n' +
                           'output = ' +  outputfile + '\n' +
                           'error = ' + errorfile + '\n' +
                           'notification = ' + notification + '\n' +
                           'periodic_remove = ((JobStatus!=2)&&(JobRunCount>0))||(JobRunCount>1)\n' +
                           '+GK_InstanceId = "' + gktid.glidekeeper_id + '"\n' +
                           '+GK_SessionId = "' + gktid.session_id + '"\n' +
                           '+IsSleep = 1\n')
    if config.inputFile != None:
        filecontent += ('transfer_input_files = ' + config.inputFile + '\n')
    if config.outputFile != None:
        filecontent += ('transfer_output_files = ' + config.outputFile + '\n')
    if config.environment != None:
        filecontent += ('environment = ' + config.environment + '\n')
    if config.getenv != None:
        filecontent += ('getenv = ' + config.getenv + '\n')
    if config.arguments != None:
        filecontent += ('arguments = ' + config.arguments + '\n')
    if config.x509userproxy!=None:
        filecontent += ('x509userproxy = ' + config.x509userproxy + '\n\n')
    elif config.pilotFile!=None:
        filecontent += ('x509userproxy = '+config.pilotFile + '\n\n')
    else:
        filecontent += ('x509userproxy = ' + config.proxyFile + '\n\n')
    #Added support for additional classAdds:
    for classAdd in config.additionalClassAds:
        name = classAdd[0]
        value = classAdd[1]
        filecontent += (name + ' = ' + value +'\n')
    # Now we create the directories for each job and a submit file
    config.verify_job_out_format()
    ilog('Using job output format: %s'%(config.jobOutFormat))
    for i in range(0, int(concurrency), 1):
        args = {
            'j' : str(i),
            'c' : str(concurrency), 
            'wd' : str(workingDir),
            'r' : str(run),
            'sd' : os.getcwd(), 
            'ts' : startTime,
        }
        jobdir = construct_from_format(config.jobOutFormat, args)
        filecontent += ('Initialdir = ' +jobdir+ '\n')
        filecontent += ('Queue\n\n')
        os.makedirs(jobdir+'/')
    return filecontent
Esempio n. 25
0
 def soft_kill(self):
     ilog('Requesting a soft kill from the thread.')
     self.shutdown=True
Esempio n. 26
0
 def request_glideins(self,needed_glideins):
     ilog('Requesting %d glidens from thread.'%needed_glideins)
     self.needed_glideins=needed_glideins
Esempio n. 27
0
        # Deadvertize my add, so the factory knows we are gone
        for factory_pool in self.factory_pools:
            factory_pool_node=factory_pool[0]
            ilog('Deadvertising for node %s'%dbgp(factory_pool_node))
            try:
                glideinFrontendInterface.deadvertizeAllWork(factory_pool_node,self.client_name)
            except RuntimeError, e:
                self.errors.append((time.time(),"Deadvertizing failed: %s"%e))
            except:
                tb = traceback.format_exception(sys.exc_info()[0],sys.exc_info()[1],
                                                sys.exc_info()[2])
                self.errors.append((time.time(),"Deadvertizing failed: %s"%string.join(tb,'')))

        
        # Stop all the glideins I can see
        ilog('Getting glidein pool status data.')
        try:
          pool_status=condorMonitor.CondorStatus()
          pool_status.load(self.glidekeeper_constraint,[('GLIDEIN_COLLECTOR_NAME','s'),('GLIDEIN_MASTER_NAME','s')])
          pool_data=pool_status.fetchStored()
        except:
          self.errors.append((time.time(),"condor_status failed"))

        for k in pool_data.keys():
            el=pool_data[k]
            ilog('Now killing pool with data: (%s -> %s)'%(dbgp(k), dbgp(el)))
            try:
                condorExe.exe_cmd("../sbin/condor_off","-master -pool %s %s"%(el['GLIDEIN_COLLECTOR_NAME'],el['GLIDEIN_MASTER_NAME']))
            except RuntimeError, e:
                self.errors.append((time.time(),"condor_off failed: %s"%e))
            except:
Esempio n. 28
0
            try:
                glideinFrontendInterface.deadvertizeAllWork(
                    factory_pool_node, self.client_name)
            except RuntimeError, e:
                self.errors.append(
                    (time.time(), "Deadvertizing failed: %s" % e))
            except:
                tb = traceback.format_exception(sys.exc_info()[0],
                                                sys.exc_info()[1],
                                                sys.exc_info()[2])
                self.errors.append(
                    (time.time(),
                     "Deadvertizing failed: %s" % string.join(tb, '')))

        # Stop all the glideins I can see
        ilog('Getting glidein pool status data.')
        try:
            pool_status = condorMonitor.CondorStatus()
            pool_status.load(self.glidekeeper_constraint,
                             [('GLIDEIN_COLLECTOR_NAME', 's'),
                              ('GLIDEIN_MASTER_NAME', 's'),
                              ('MyAddress', 's')])
            pool_data = pool_status.fetchStored()
        except:
            self.errors.append((time.time(), "condor_status failed"))

        for k in pool_data.keys():
            el = pool_data[k]
            ilog('Now killing pool with data: (%s -> %s)' %
                 (dbgp(k), dbgp(el)))
            try:
Esempio n. 29
0
 def request_glideins(self, needed_glideins):
     ilog('Requesting %d glidens from thread.' % needed_glideins)
     self.needed_glideins = needed_glideins
Esempio n. 30
0
            check1.load('(JobStatus<3)&&(GK_InstanceId=?="%s")&&(GK_SessionId=?="%s")'%(gktid.glidekeeper_id,gktid.session_id), [("JobStatus","s")])
            data=check1.fetchStored()
            ilog('Success!')
        except RuntimeError,e:
            main_log.write("%s %s\n"%(ctime(), "condor_q failed (%s)... ignoring for now"%e))

            main_log.flush()
            sleep(2)
            continue # retry the while loop
        except:
            main_log.write("%s %s\n"%(ctime(), "condor_q failed (reason unknown)... ignoring for now"))

            main_log.flush()
            sleep(2)
            continue # retry the while loop
        ilog('Found %s jobs running.'%len(data.keys()))
        main_log.write("%s %s %s\n"%(ctime(), len(data.keys()), 'jobs running'))
        main_log.flush()
        if len(data.keys()) == 0:
            running = "false"
            main_log.write("%s %s\n"%(ctime(), "no more running jobs"))
        else:
            sleep(10)

def parse_result(config,workingDir,concurrencyLevel):
    # Create a loop to parse each log file into a summaries directory
    summDir = workingDir + '/summaries/'
    os.makedirs(summDir)
    for l in range(0, config.runs, 1):
        for k in range(0, len(concurrencyLevel), 1):
Esempio n. 31
0
    def __init__(self,argv):

        # glideTester.cfg values
        self.runId=None
        self.glideinWMSDir = None
        self.configDir = None
        self.proxyFile = None
        self.pilotFile = None
        self.delegateProxy = None
        self.collectorNode = None
        self.gfactoryNode = None
        self.gfactoryConstraint = None
        self.gfactoryClassadID = None
        self.myClassadID = None
        self.mySecurityName = None

        # parameters.cfg values
        self.executable = None
        self.inputFile = None
        self.outputFile = None
        self.environment = None
        self.getenv = None
        self.arguments = None
        self.x509userproxy = None
        self.concurrencyLevel = None
        self.runs = None
        self.gfactoryAdditionalConstraint=None
        self.additionalClassAds = []
        self.reuseOldGlideins = None
        self.jobOutFormat=None
        self.prescript = None 
        self.postscript = None 

        # parse arguments
        valid_keys = ['-config', '-cfg', '--config', '-params', '-runId']
        arg_map = parse_argv(argv[1:], valid_kv_settings=valid_keys)
        passed_config_path = arg_map.get('-cfg') or arg_map.get('--config') or arg_map.get('-config')
        passed_params_path = arg_map.get('-params')
        self.cfg_paths = get_config_file_list(file_name='glideTester.cfg', arg_path=passed_config_path)
        self.params_path = get_config_file_list(file_name='parameters.cfg', arg_path=passed_params_path)
        self.runId = arg_map.get('-runId')

        # check and fix the attributes
        if self.runId==None:
            # not defined, create one specific for the account
            # should not be too random, or you polute the factory namespace
            self.runId="u%i"%os.getuid()
        
        # load external values
        self.load_cfg()
        self.verify_cfg()

        # set search path
        if self.glideinWMSDir is not None:
            sys.path.insert(0, self.glideinWMSDir)
            sys.path.insert(0,os.path.join(self.glideinWMSDir,".."))

        self.load_config_dir()

        self.load_params()
        self.setup_logger()

        ilog("Made glideTester: \n\n%s\n"%dbgp(self, 4))