def get_graph_image(graph_dot): proc = LocalProcess('twopi', '-Tpng') proc.stdin.write(graph_dot) proc.stdin.close() if proc.status(timeout = 20) is None: return 'Unable to render graph!' return proc.stdout.read_log() or 'Empty render result!'
def _parseTickets(self, cached = True): # Return cached results if requested if cached and self._cache: return self._cache # Call klist and parse results proc = LocalProcess(self._klistExec) self._cache = {} try: for line in proc.stdout.iter(timeout = 10): if line.count('@') and (line.count(':') > 1): issued_expires, principal = rsplit(line, ' ', 1) issued_expires = issued_expires.replace('/', ' ').split() assert(len(issued_expires) % 2 == 0) issued_str = str.join(' ', issued_expires[:int(len(issued_expires) / 2)]) expires_str = str.join(' ', issued_expires[int(len(issued_expires) / 2):]) parseDate = lambda value, format: time.mktime(time.strptime(value, format)) if expires_str.count(' ') == 3: if len(expires_str.split()[2]) == 2: expires = parseDate(expires_str, '%m %d %y %H:%M:%S') else: expires = parseDate(expires_str, '%m %d %Y %H:%M:%S') elif expires_str.count(' ') == 2: # year information is missing currentYear = int(time.strftime('%Y')) expires = parseDate(expires_str + ' %d' % currentYear, '%b %d %H:%M:%S %Y') issued = parseDate(issued_str + ' %d' % currentYear, '%b %d %H:%M:%S %Y') if expires < issued: # wraparound at new year expires = parseDate(expires_str + ' %d' % (currentYear + 1), '%b %d %H:%M:%S %Y') self._cache.setdefault('tickets', {})[principal] = expires elif line.count(':') == 1: key, value = lmap(str.strip, line.split(':', 1)) self._cache[key.lower()] = value except Exception: raise AccessTokenError('Unable to parse kerberos ticket information!') proc.status_raise(timeout = 0) return self._cache
def _script_thread(self, script, task, jobnum=None, job_obj=None, add_dict=None): # Get both task and job config / state dicts try: tmp = {} if job_obj is not None: for key, value in job_obj.get_dict().items(): tmp[key.upper()] = value tmp['GC_WORKDIR'] = self._path_work if jobnum is not None: tmp.update(task.get_job_dict(jobnum)) tmp.update(add_dict or {}) env = dict(os.environ) for key, value in tmp.items(): if not key.startswith('GC_'): key = 'GC_' + key env[key] = str(value) script = task.substitute_variables('monitoring script', script, jobnum, tmp) if not self._silent: proc = LocalProcess(*shlex.split(script), **{'env_dict': env}) proc_output = proc.get_output(timeout=self._script_timeout) if proc_output.strip(): self._log.info(proc_output.strip()) else: os.system(script) except Exception: self._log.exception('Error while running user script') clear_current_exception()
def _scriptThread(self, script, jobNum = None, jobObj = None, allDict = None): try: tmp = {} if jobObj is not None: for key, value in jobObj.get_dict().items(): tmp[key.upper()] = value tmp['WORKDIR'] = self._workPath tmp.update(self._task.getTaskConfig()) if jobNum is not None: tmp.update(self._task.getJobConfig(jobNum)) tmp.update(allDict or {}) env = dict(os.environ) for key, value in tmp.items(): if not key.startswith('GC_'): key = 'GC_' + key env[key] = str(value) script = self._task.substVars('monitoring script', script, jobNum, tmp) if not self._silent: proc = LocalProcess(*shlex.split(script), **{'environment': env}) proc_output = proc.get_output(timeout = self._runningMax) if proc_output.strip(): self._log.info(proc_output.strip()) else: os.system(script) except Exception: self._log.exception('Error while running user script')
def _submitJob(self, jobNum, module): fd, jdl = tempfile.mkstemp('.jdl') try: jdlData = self.makeJDL(jobNum, module) utils.safeWrite(os.fdopen(fd, 'w'), jdlData) except Exception: utils.removeFiles([jdl]) raise BackendError('Could not write jdl data to %s.' % jdl) try: submitArgs = [] for key_value in utils.filterDict(self._submitParams, vF = lambda v: v).items(): submitArgs.extend(key_value) submitArgs.append(jdl) activity = Activity('submitting job %d' % jobNum) proc = LocalProcess(self._submitExec, '--nomsg', '--noint', '--logfile', '/dev/stderr', *submitArgs) gcID = None for line in ifilter(lambda x: x.startswith('http'), imap(str.strip, proc.stdout.iter(timeout = 60))): gcID = line retCode = proc.status(timeout = 0, terminate = True) activity.finish() if (retCode != 0) or (gcID is None): if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files = {'jdl': SafeFile(jdl).read()}) finally: utils.removeFiles([jdl]) return (jobNum, utils.QM(gcID, self._createId(gcID), None), {'jdl': str.join('', jdlData)})
def _get_cms_cert(config): config = config.change_view(set_sections=['cms', 'access', 'proxy']) try: access = AccessToken.create_instance('VomsAccessToken', config, 'cms-proxy') except Exception: if os.environ.get('X509_USER_PROXY'): return os.environ['X509_USER_PROXY'] raise CMSAuthenticationException('Unable to find grid environment') can_submit = ignore_exception(Exception, False, access.can_submit, 5 * 60, True) if not can_submit: logging.getLogger('access.cms').warning('The grid proxy has expired or is invalid!') role = config.get_list('new proxy roles', '', on_change=None) timeout = config.get_time('new proxy timeout', 10, on_change=None) lifetime = config.get_time('new proxy lifetime', 192 * 60, on_change=None) # password in variable name removes it from debug log password = getpass.getpass('Please enter proxy password: '******'voms-proxy-init') proc = LocalProcess(proxy_init_exec, '--voms', str.join(':', ['cms'] + role), '--valid', '%d:%d' % (lifetime / 60, lifetime % 60), logging=False) if password: proc.stdin.write(password + '\n') proc.stdin.close() proc.get_output(timeout=timeout) except Exception: raise CMSAuthenticationException('Unable to create new grid proxy') access = AccessToken.create_instance('VomsAccessToken', config, 'cms-proxy') # new instance can_submit = ignore_exception(Exception, False, access.can_submit, 5 * 60, True) if not can_submit: raise CMSAuthenticationException('Newly created grid proxy is also invalid') return access.get_auth_fn_list()[0]
def cancelJobs(self, allIds): if len(allIds) == 0: raise StopIteration waitFlag = False for ids in imap(lambda x: allIds[x:x+5], irange(0, len(allIds), 5)): # Delete jobs in groups of 5 - with 5 seconds between groups if waitFlag and not utils.wait(5): break waitFlag = True jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = utils.ActivityLog('cancelling jobs') proc = LocalProcess(self._cancelExec, '--noint', '--logfile', '/dev/stderr', '-i', jobs) retCode = proc.status(timeout = 60, terminate = True) del activity # select cancelled jobs for deletedWMSId in ifilter(lambda x: x.startswith('- '), proc.stdout.iter()): deletedWMSId = self._createId(deletedWMSId.strip('- \n')) yield (jobNumMap.get(deletedWMSId), deletedWMSId) if retCode != 0: if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files = {'jobs': utils.safeRead(jobs)}) utils.removeFiles([jobs])
def _get_version(self, value): old_wd = os.getcwd() os.chdir(clean_path(value)) git_proc = LocalProcess('git', 'rev-parse', '--short', 'HEAD') version = git_proc.get_output(timeout=10, raise_errors=False) os.chdir(old_wd) return version.strip() or 'undefined'
def _scriptThread(self, script, jobNum = None, jobObj = None, allDict = None): try: tmp = {} if jobNum is not None: tmp.update(self._task.getSubmitInfo(jobNum)) if jobObj is not None: tmp.update(jobObj.getAll()) tmp['WORKDIR'] = self._workPath tmp.update(self._task.getTaskConfig()) if jobNum is not None: tmp.update(self._task.getJobConfig(jobNum)) tmp.update(self._task.getSubmitInfo(jobNum)) tmp.update(allDict or {}) for key, value in tmp.items(): if not key.startswith('GC_'): key = 'GC_' + key os.environ[key] = str(value) script = self._task.substVars(script, jobNum, tmp) if not self._silent: proc = LocalProcess(script) self._log.info(proc.get_output(timeout = self._runningMax)) else: os.system(script) except Exception: self._log.exception('Error while running user script!')
def _get_version(self, value): svn_proc = LocalProcess('svnversion', clean_path(value)) version = svn_proc.get_output(timeout=10, raise_errors=False).strip().lower() # different SVN versions yield different output for unversioned directories: if ('exported' in version) or ('unversioned' in version): version = None return version or 'undefined'
def discover(self): proc = LocalProcess(self._exec) for line in proc.stdout.iter(timeout=self._timeout): if not line.startswith(' ') and len(line) > 1: node = line.strip() if ('state = ' in line) and ('down' not in line) and ('offline' not in line): yield {'name': node} proc.status_raise(timeout=0)
def ping_host(host): proc = LocalProcess('ping', '-Uqnc', 1, '-W', 1, host) try: tmp = proc.get_output(timeout=1).splitlines() assert (tmp[-1].endswith('ms')) return float(tmp[-1].split('/')[-2]) / 1000. except Exception: return None
def _list_endpoint_all(self): result = [] proc = LocalProcess(self._lcg_infosites_exec, 'wms') for line in proc.stdout.iter(timeout=10): result.append(line.strip()) proc.status_raise(timeout=0) random.shuffle(result) return result
def ping_host(host): proc = LocalProcess('ping', '-Uqnc', 1, '-W', 1, host) try: tmp = proc.get_output(timeout = 1).splitlines() assert(tmp[-1].endswith('ms')) return float(tmp[-1].split('/')[-2]) / 1000. except Exception: return None
def listWMS_all(self): result = [] proc = LocalProcess(self._exeLCGInfoSites, 'wms') for line in proc.stdout.iter(timeout = 10): result.append(line.strip()) proc.status_raise(timeout = 0) random.shuffle(result) return result
def _getJobsOutput(self, ids): if len(ids) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(ids) == 1: # For single jobs create single subdir tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest()) else: tmpPath = basePath utils.ensureDirExists(tmpPath) except Exception: raise BackendError('Temporary path "%s" could not be created.' % tmpPath, BackendError) jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = Activity('retrieving %d job outputs' % len(ids)) proc = LocalProcess(self._outputExec, '--noint', '--logfile', '/dev/stderr', '-i', jobs, '--dir', tmpPath) # yield output dirs todo = jobNumMap.values() currentJobNum = None for line in imap(str.strip, proc.stdout.iter(timeout = 60)): if line.startswith(tmpPath): todo.remove(currentJobNum) outputDir = line.strip() if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: self._log.error('Can\'t unpack output files contained in %s', wildcardTar) yield (currentJobNum, line.strip()) currentJobNum = None else: currentJobNum = jobNumMap.get(self._createId(line), currentJobNum) retCode = proc.status(timeout = 0, terminate = True) activity.finish() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.stderr.read(timeout = 0): utils.removeFiles([jobs, basePath]) raise StopIteration else: self._log.log_process(proc, files = {'jobs': SafeFile(jobs).read()}) self._log.error('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) # return unretrievable jobs for jobNum in todo: yield (jobNum, None) utils.removeFiles([jobs, basePath])
def discover(self): proc = LocalProcess(self._exec) for line in proc.stdout.iter(timeout=10): if not line.startswith(' ') and len(line) > 1: node = line.strip() if ('state = ' in line) and ('down' not in line) and ('offline' not in line): yield {'name': node} proc.status_raise(timeout=0)
def _get_git_version(path): from grid_control.utils.process_base import LocalProcess from grid_control.utils import clean_path path = clean_path(path) old_wd = os.getcwd() os.chdir(path) git_proc = LocalProcess('git', 'rev-parse', '--short', 'HEAD') version = git_proc.get_output(timeout=10, raise_errors=False) os.chdir(old_wd) return version.strip() or None
def getNodes(self): result = [] proc = LocalProcess(self._nodesExec) for line in proc.stdout.iter(): if not line.startswith(' ') and len(line) > 1: node = line.strip() if ('state = ' in line) and ('down' not in line) and ('offline' not in line): result.append(node) proc.status_raise(timeout = 0) if len(result) > 0: return result
def _get_jobs_output(self, gc_id_jobnum_list): # Get output of jobs and yield output dirs if len(gc_id_jobnum_list) == 0: raise StopIteration root_dn = os.path.join(self._path_output, 'tmp') try: if len(gc_id_jobnum_list) == 1: # For single jobs create single subdir tmp_dn = os.path.join(root_dn, md5_hex(gc_id_jobnum_list[0][0])) else: tmp_dn = root_dn ensure_dir_exists(tmp_dn) except Exception: raise BackendError('Temporary path "%s" could not be created.' % tmp_dn, BackendError) map_gc_id2jobnum = dict(gc_id_jobnum_list) jobs = self._write_wms_id_list(gc_id_jobnum_list) activity = Activity('retrieving %d job outputs' % len(gc_id_jobnum_list)) proc = LocalProcess(self._output_exec, '--noint', '--logfile', '/dev/stderr', '-i', jobs, '--dir', tmp_dn) # yield output dirs todo = map_gc_id2jobnum.values() current_jobnum = None for line in imap(str.strip, proc.stdout.iter(timeout=60)): if line.startswith(tmp_dn): todo.remove(current_jobnum) output_dn = line.strip() unpack_wildcard_tar(self._log, output_dn) yield (current_jobnum, output_dn) current_jobnum = None else: current_jobnum = map_gc_id2jobnum.get(self._create_gc_id(line), current_jobnum) exit_code = proc.status(timeout=0, terminate=True) activity.finish() if exit_code != 0: if 'Keyboard interrupt raised by user' in proc.stderr.read(timeout=0): remove_files([jobs, root_dn]) raise StopIteration else: self._log.log_process(proc, files={'jobs': SafeFile(jobs).read()}) self._log.error('Trying to recover from error ...') for dn in os.listdir(root_dn): yield (None, os.path.join(root_dn, dn)) # return unretrievable jobs for jobnum in todo: yield (jobnum, None) remove_files([jobs, tmp_dn])
def _purge_done_jobs(self, wms_id_list_done): purge_log_fn = tempfile.mktemp('.log') purge_proc = LocalProcess(resolve_install_path('glite-ce-job-purge'), '--noint', '--logfile', purge_log_fn, str.join(' ', wms_id_list_done)) exit_code = purge_proc.status(timeout=60) if exit_code != 0: if self._explain_error(purge_proc, exit_code): pass else: self._log.log_process(purge_proc) remove_files([purge_log_fn])
def _parseProxy(self, cached = True): # Return cached results if requested if cached and self._cache: return self._cache # Call voms-proxy-info and parse results proc = LocalProcess(self._infoExec, *self._getProxyArgs()) (retCode, stdout, stderr) = proc.finish(timeout = 10) if (retCode != 0) and not self._ignoreWarning: msg = ('%s output:\n%s\n%s\n' % (self._infoExec, stdout, stderr)).replace('\n\n', '\n') msg += 'If job submission is still possible, you can set [access] ignore warnings = True\n' raise AccessTokenError(msg + '%s failed with return code %d' % (self._infoExec, retCode)) self._cache = DictFormat(':').parse(stdout) return self._cache
def getVersion(): try: proc_ver = LocalProcess('svnversion', '-c', pathPKG()) version = proc_ver.get_output(timeout = 10).strip() if version != '': assert(lfilter(str.isdigit, version)) proc_branch = LocalProcess('svn info', pathPKG()) if 'stable' in proc_branch.get_output(timeout = 10): return '%s - stable' % version return '%s - testing' % version except Exception: clear_current_exception() return __import__('grid_control').__version__ + ' or later'
def _parse_tickets(self, cached=True): # Return cached results if requested if cached and self._cache: return self._cache # Call klist and parse results proc = LocalProcess(self._klist_exec) self._cache = {} try: for line in proc.stdout.iter(timeout=10): if line.count('@') and (line.count(':') > 1): issued_expires, principal = rsplit(line, ' ', 1) issued_expires = issued_expires.replace('/', ' ').split() assert len(issued_expires) % 2 == 0 issued_str = str.join( ' ', issued_expires[:int(len(issued_expires) / 2)]) expires_str = str.join( ' ', issued_expires[int(len(issued_expires) / 2):]) if expires_str.count(' ') == 3: if len(expires_str.split()[2]) == 2: expires = _parse_date(expires_str, '%m %d %y %H:%M:%S') elif len(expires_str.split()[2]) == 4: expires = _parse_date(expires_str, '%m %d %Y %H:%M:%S') # here else: # On NAF, we get an entirely different format now: Sep 2 12:31:34 2021 expires = _parse_date(expires_str, '%b %d %H:%M:%S %Y') elif expires_str.count( ' ') == 2: # year information is missing cur_year = int(time.strftime('%Y')) expires = _parse_date(expires_str + ' %d' % cur_year, '%b %d %H:%M:%S %Y') issued = _parse_date(issued_str + ' %d' % cur_year, '%b %d %H:%M:%S %Y') if expires < issued: # wraparound at new year expires = _parse_date( expires_str + ' %d' % (cur_year + 1), '%b %d %H:%M:%S %Y') self._cache.setdefault('tickets', {})[principal] = expires elif line.count(':') == 1: (key, value) = lmap(str.strip, line.split(':', 1)) self._cache[key.lower()] = value except Exception: raise AccessTokenError( 'Unable to parse kerberos ticket information!') proc.status_raise(timeout=0) return self._cache
def _parseProxy(self, cached=True): # Return cached results if requested if cached and self._cache: return self._cache # Call voms-proxy-info and parse results proc = LocalProcess(self._infoExec, *self._getProxyArgs()) (retCode, stdout, stderr) = proc.finish(timeout=10) if (retCode != 0) and not self._ignoreWarning: msg = ('%s output:\n%s\n%s\n' % (self._infoExec, stdout, stderr)).replace('\n\n', '\n') msg += 'If job submission is still possible, you can set [access] ignore warnings = True\n' raise AccessTokenError(msg + '%s failed with return code %d' % (self._infoExec, retCode)) self._cache = DictFormat(':').parse(stdout) return self._cache
def _parseProxy(self, cached = True): # Return cached results if requested if cached and self._cache: return self._cache # Call voms-proxy-info and parse results args = ['--all'] if self._proxyPath: args.extend(['--file', self._proxyPath]) proc = LocalProcess(self._infoExec, *args) (retCode, stdout, stderr) = proc.finish(timeout = 10) if (retCode != 0) and not self._ignoreWarning: msg = ('voms-proxy-info output:\n%s\n%s\n' % (stdout, stderr)).replace('\n\n', '\n') msg += 'If job submission is still possible, you can set [access] ignore warnings = True\n' raise AccessTokenError(msg + 'voms-proxy-info failed with return code %d' % retCode) self._cache = utils.DictFormat(':').parse(stdout) return self._cache
def _se_runcmd(cmd, *urls, **kwargs): def _clean_se_path(url): return url.replace('dir://', 'file://') url_iter = imap(_clean_se_path, imap(_norm_se_path, urls)) return LocalProcess(get_path_share('gc-storage-tool'), cmd, *url_iter, **kwargs)
def _parse_proxy(self, cached=True): # Return cached results if requested if cached and self._cache: return self._cache # Call voms-proxy-info and parse results proc = LocalProcess(self._proxy_info_exec, *self._get_proxy_info_arguments()) (exit_code, stdout, stderr) = proc.finish(timeout=10) if (exit_code != 0) and not self._ignore_warning: msg = ('%s output:\n%s\n%s\n' % (self._proxy_info_exec, stdout, stderr)).replace('\n\n', '\n') msg += 'If job submission is still possible, you can set [access] ignore warnings = True\n' msg += '%s failed with return code %d' % (self._proxy_info_exec, exit_code) raise AccessTokenError(msg) self._cache = DictFormat(':').parse(stdout) if not self._cache: msg = 'Unable to parse access token information:\n\t%s\n\t%s\n' raise AccessTokenError(msg % (stdout.strip(), stderr.strip())) return self._cache
def _get_cms_cert(config): config = config.change_view(set_sections=['cms', 'access', 'proxy']) try: access = AccessToken.create_instance('VomsAccessToken', config, 'cms-proxy') except Exception: if os.environ.get('X509_USER_PROXY'): return os.environ['X509_USER_PROXY'] raise CMSAuthenticationException('Unable to find grid environment') can_submit = ignore_exception(Exception, False, access.can_submit, 5 * 60, True) if not can_submit: logging.getLogger('access.cms').warning( 'The grid proxy has expired or is invalid!') role = config.get_list('new proxy roles', '', on_change=None) timeout = config.get_time('new proxy timeout', 10, on_change=None) lifetime = config.get_time('new proxy lifetime', 192 * 60, on_change=None) signal_handler = signal.signal(signal.SIGINT, signal.SIG_DFL) # reset Ctrl+C handler # password in variable name removes it from debug log password = getpass.getpass('Please enter proxy password: '******'voms-proxy-init') proc = LocalProcess(proxy_init_exec, '--voms', str.join(':', ['cms'] + role), '--valid', '%d:%d' % (lifetime / 60, lifetime % 60), logging=False) if password: proc.stdin.write(password + '\n') proc.stdin.close() proc.get_output(timeout=timeout) except Exception: raise CMSAuthenticationException('Unable to create new grid proxy') access = AccessToken.create_instance('VomsAccessToken', config, 'cms-proxy') # new instance can_submit = ignore_exception(Exception, False, access.can_submit, 5 * 60, True) if not can_submit: raise CMSAuthenticationException( 'Newly created grid proxy is also invalid') return access.get_auth_fn_list()[0]
def discover(self): active = False keys = [WMS.MEMORY, WMS.CPUTIME, WMS.WALLTIME] parser = dict(izip(keys, [int, parse_time, parse_time])) proc = LocalProcess(self._exec, '-q') for line in proc.stdout.iter(timeout=10): if line.startswith('-'): active = True elif line.startswith(' '): active = False elif active: fields = lmap(str.strip, line.split()[:4]) queue_dict = {'name': fields[0]} for key, value in ifilter(lambda k_v: not k_v[1].startswith('-'), izip(keys, fields[1:])): queue_dict[key] = parser[key](value) yield queue_dict proc.status_raise(timeout=0)
def getQueues(self): (queues, active) = ({}, False) keys = [WMS.MEMORY, WMS.CPUTIME, WMS.WALLTIME] parser = dict(izip(keys, [int, parseTime, parseTime])) proc = LocalProcess(self.statusExec, '-q') for line in proc.stdout.iter(timeout = 10): if line.startswith('-'): active = True elif line.startswith(' '): active = False elif active: fields = lmap(str.strip, line.split()[:4]) queueInfo = {} for key, value in ifilter(lambda k_v: not k_v[1].startswith('-'), izip(keys, fields[1:])): queueInfo[key] = parser[key](value) queues[fields[0]] = queueInfo proc.status_raise(timeout = 0) return queues
def _parseProxy(self, cached=True): # Return cached results if requested if cached and self._cache: return self._cache # Call voms-proxy-info and parse results args = ['--all'] if self._proxyPath: args.extend(['--file', self._proxyPath]) proc = LocalProcess(self._infoExec, *args) (retCode, stdout, stderr) = proc.finish(timeout=10) if (retCode != 0) and not self._ignoreWarning: msg = ('voms-proxy-info output:\n%s\n%s\n' % (stdout, stderr)).replace('\n\n', '\n') msg += 'If job submission is still possible, you can set [access] ignore warnings = True\n' raise AccessTokenError( msg + 'voms-proxy-info failed with return code %d' % retCode) self._cache = utils.DictFormat(':').parse(stdout) return self._cache
def matchSites(self, endpoint): activity = Activity('Discovering available WMS services - testing %s' % endpoint) checkArgs = ['-a'] if endpoint: checkArgs.extend(['-e', endpoint]) checkArgs.append(utils.pathShare('null.jdl')) proc = LocalProcess(self._exeGliteWMSJobListMatch, *checkArgs) result = [] for line in proc.stdout.iter(timeout = 3): if line.startswith(' - '): result.append(line[3:].strip()) activity.finish() if proc.status(timeout = 0) is None: self.wms_timeout[endpoint] = self.wms_timeout.get(endpoint, 0) + 1 if self.wms_timeout.get(endpoint, 0) > 10: # remove endpoints after 10 failures self.wms_all.remove(endpoint) return [] return result
def discover(self): nodes = set() proc = LocalProcess(self._configExec, '-shgrpl') for group in proc.stdout.iter(timeout=10): yield {'name': group.strip()} proc_g = LocalProcess(self._configExec, '-shgrp_resolved', group) for host_list in proc_g.stdout.iter(timeout=10): nodes.update(host_list.split()) proc_g.status_raise(timeout=0) for host in sorted(nodes): yield {'name': host.strip()} proc.status_raise(timeout=0)
def _match_sites(self, endpoint): activity = Activity('Discovering available WMS services - testing %s' % endpoint) check_arg_list = ['-a'] if endpoint: check_arg_list.extend(['-e', endpoint]) check_arg_list.append(get_path_share('null.jdl')) proc = LocalProcess(self._job_list_match_exec, *check_arg_list) result = [] for line in proc.stdout.iter(timeout=3): if line.startswith(' - '): result.append(line[3:].strip()) activity.finish() if proc.status(timeout=0) is None: self._wms_timeout_dict[endpoint] = self._wms_timeout_dict.get(endpoint, 0) + 1 if self._wms_timeout_dict.get(endpoint, 0) > 10: # remove endpoints after 10 failures self._wms_list_all.remove(endpoint) return [] return result
def getNodes(self): result = set() proc = LocalProcess(self._configExec, '-shgrpl') for group in proc.stdout.iter(timeout = 10): result.add(group.strip()) proc_g = LocalProcess(self._configExec, '-shgrp_resolved %s' % group) for host in proc_g.stdout.iter(timeout = 10): result.update(host.split()) proc_g.status_raise(timeout = 0) proc.status_raise(timeout = 0) if len(result) > 0: return list(result)
def discover(self): active = False keys = [WMS.MEMORY, WMS.CPUTIME, WMS.WALLTIME] parser = dict(izip(keys, [int, parseTime, parseTime])) proc = LocalProcess(self._exec, '-q') for line in proc.stdout.iter(timeout=10): if line.startswith('-'): active = True elif line.startswith(' '): active = False elif active: fields = lmap(str.strip, line.split()[:4]) queueInfo = {'name': fields[0]} for key, value in ifilter( lambda k_v: not k_v[1].startswith('-'), izip(keys, fields[1:])): queueInfo[key] = parser[key](value) yield queueInfo proc.status_raise(timeout=0)
def se_runcmd(cmd, varDict, *urls): runLib = utils.pathShare('gc-run.lib') args = str.join( ' ', imap(lambda x: '"%s"' % ensurePrefix(x).replace('dir://', 'file://'), urls)) varString = str.join( ' ', imap(lambda x: 'export %s="%s";' % (x, varDict[x]), varDict)) return LocalProcess( '/bin/bash', '-c', '. %s || exit 99; %s %s %s' % (runLib, varString, cmd, args))
def _submit_job(self, jobnum, task): # Submit job and yield (jobnum, WMS ID, other data) jdl_fd, jdl_fn = tempfile.mkstemp('.jdl') try: jdl_line_list = self._make_jdl(jobnum, task) safe_write(os.fdopen(jdl_fd, 'w'), jdl_line_list) except Exception: remove_files([jdl_fn]) raise BackendError('Could not write jdl data to %s.' % jdl_fn) try: submit_arg_list = [] for key_value in filter_dict(self._submit_args_dict, value_filter=identity).items(): submit_arg_list.extend(key_value) submit_arg_list.append(jdl_fn) activity = Activity('submitting job %d' % jobnum) proc = LocalProcess(self._submit_exec, '--nomsg', '--noint', '--logfile', '/dev/stderr', *submit_arg_list) wms_id = None stripped_stdout_iter = imap(str.strip, proc.stdout.iter(timeout=60)) for line in ifilter(lambda x: x.startswith('http'), stripped_stdout_iter): wms_id = line exit_code = proc.status(timeout=0, terminate=True) activity.finish() if (exit_code != 0) or (wms_id is None): if self._explain_error(proc, exit_code): pass else: self._log.log_process( proc, files={'jdl': SafeFile(jdl_fn).read()}) finally: remove_files([jdl_fn]) job_data = {'jdl': str.join('', jdl_line_list)} return (jobnum, self._create_gc_id(wms_id), job_data)
def checkJobs(self, ids): if len(ids) == 0: raise StopIteration jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = utils.ActivityLog('checking job status') proc = LocalProcess(self._statusExec, '--verbosity', 1, '--noint', '--logfile', '/dev/stderr', '-i', jobs) for data in self._parseStatus(proc.stdout.iter(timeout = 60)): data['id'] = self._createId(data['id']) yield (jobNumMap.get(data['id']), data['id'], self._statusMap[data['status']], data) retCode = proc.status(timeout = 0, terminate = True) del activity if retCode != 0: if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files = {'jobs': utils.safeRead(jobs)}) utils.removeFiles([jobs])
def _parse_proxy(self, cached=True): # Return cached results if requested if cached and self._cache: return self._cache # Call voms-proxy-info and parse results proc = LocalProcess(self._proxy_info_exec, *self._get_proxy_info_arguments()) (exit_code, stdout, stderr) = proc.finish(timeout=10) if (exit_code != 0) and not self._ignore_warning: msg = ('%s output:\n%s\n%s\n' % (self._proxy_info_exec, stdout, stderr)).replace( '\n\n', '\n') msg += 'If job submission is still possible, you can set [access] ignore warnings = True\n' msg += '%s failed with return code %d' % (self._proxy_info_exec, exit_code) raise AccessTokenError(msg) self._cache = DictFormat(':').parse(stdout) if not self._cache: msg = 'Unable to parse access token information:\n\t%s\n\t%s\n' raise AccessTokenError(msg % (stdout.strip(), stderr.strip())) return self._cache
def bulkSubmissionBegin(self): self._submitParams.update({ '-d': None }) if self._discovery_module: self._submitParams.update({ '-e': self._discovery_module.getWMS() }) if self._useDelegate is False: self._submitParams.update({ '-a': ' ' }) return True dID = 'GCD' + md5_hex(str(time.time()))[:10] activity = utils.ActivityLog('creating delegate proxy for job submission') deletegateArgs = [] if self._configVO: deletegateArgs.extend(['--config', self._configVO]) proc = LocalProcess(self._delegateExec, '-d', dID, '--noint', '--logfile', '/dev/stderr', *deletegateArgs) output = proc.get_output(timeout = 10, raise_errors = False) if ('glite-wms-job-delegate-proxy Success' in output) and (dID in output): self._submitParams.update({ '-d': dID }) del activity if proc.status(timeout = 0, terminate = True) != 0: self._log.log_process(proc) return (self._submitParams.get('-d', None) is not None)
def _get_submit_proc(self, jobnum, sandbox, job_name, reqs): (stdout, stderr) = (os.path.join(sandbox, 'gc.stdout'), os.path.join(sandbox, 'gc.stderr')) submit_args = list(self._submit_opt_list) submit_args.extend( shlex.split( self._get_submit_arguments(jobnum, job_name, reqs, sandbox, stdout, stderr))) submit_args.append(get_path_share('gc-local.sh')) submit_args.extend( shlex.split(self._get_job_arguments(jobnum, sandbox))) return LocalProcess(self._submit_exec, *submit_args)
def _begin_bulk_submission(self): self._submit_args_dict.update({'-D': None}) if self._use_delegate is False: self._submit_args_dict.update({'-a': ' '}) return True delegate_id = 'GCD' + md5_hex(str(time.time()))[:10] activity = Activity('creating delegate proxy for job submission') delegate_arg_list = ['-e', self._ce[:self._ce.rfind("/")]] if self._config_fn: delegate_arg_list.extend(['--config', self._config_fn]) proc = LocalProcess(self._delegate_exec, '-d', delegate_id, '--logfile', '/dev/stderr', *delegate_arg_list) output = proc.get_output(timeout=10, raise_errors=False) if ('succesfully delegated to endpoint' in output) and (delegate_id in output): self._submit_args_dict.update({'-D': delegate_id}) activity.finish() if proc.status(timeout=0, terminate=True) != 0: self._log.log_process(proc) return self._submit_args_dict.get('-D') is not None
def _match_sites(self, endpoint): activity = Activity('Discovering available WMS services - testing %s' % endpoint) check_arg_list = ['-a'] if endpoint: check_arg_list.extend(['-e', endpoint]) check_arg_list.append(get_path_share('null.jdl')) proc = LocalProcess(self._job_list_match_exec, *check_arg_list) result = [] for line in proc.stdout.iter(timeout=3): if line.startswith(' - '): result.append(line[3:].strip()) activity.finish() if proc.status(timeout=0) is None: self._wms_timeout_dict[endpoint] = self._wms_timeout_dict.get( endpoint, 0) + 1 if self._wms_timeout_dict.get( endpoint, 0) > 10: # remove endpoints after 10 failures self._wms_list_all.remove(endpoint) return [] return result
def _begin_bulk_submission(self): self._submit_args_dict.update({'-d': None}) if self._discovery_plugin: self._submit_args_dict.update({'-e': self._discovery_plugin.get_endpoint()}) if self._use_delegate is False: self._submit_args_dict.update({'-a': ' '}) return True delegate_id = 'GCD' + md5_hex(str(time.time()))[:10] activity = Activity('creating delegate proxy for job submission') delegate_arg_list = [] if self._config_fn: delegate_arg_list.extend(['--config', self._config_fn]) proc = LocalProcess(self._delegate_exec, '-d', delegate_id, '--noint', '--logfile', '/dev/stderr', *delegate_arg_list) output = proc.get_output(timeout=10, raise_errors=False) if ('glite-wms-job-delegate-proxy Success' in output) and (delegate_id in output): self._submit_args_dict.update({'-d': delegate_id}) activity.finish() if proc.status(timeout=0, terminate=True) != 0: self._log.log_process(proc) return self._submit_args_dict.get('-d') is not None
def _recover_jobs(self): proc = LocalProcess('zip', '-FF', self._db_fn, '--out', '%s.tmp' % self._db_fn) proc.stdin.write('y\n') proc.status(timeout=None) os.rename(self._db_fn, self._db_fn + '.broken') os.rename(self._db_fn + '.tmp', self._db_fn) tar = zipfile.ZipFile(self._db_fn, 'r', zipfile.ZIP_DEFLATED) remove_files([self._db_fn + '.broken']) broken_fn_list = [] for tar_info_fn in tar.namelist(): try: tuple(imap(lambda s: int(s[1:]), tar_info_fn.split('_', 1))) # check name fp = tar.open(tar_info_fn) try: fp.read() finally: fp.close() except Exception: clear_current_exception() broken_fn_list.append(tar_info_fn) for broken in broken_fn_list: os.system('zip %s -d %s' % (self._db_fn, broken))
def _submitJob(self, jobNum, module): fd, jdl = tempfile.mkstemp('.jdl') try: jdlData = self.makeJDL(jobNum, module) utils.safeWrite(os.fdopen(fd, 'w'), jdlData) except Exception: utils.removeFiles([jdl]) raise BackendError('Could not write jdl data to %s.' % jdl) try: submitArgs = [] for key_value in utils.filterDict(self._submitParams, vF=lambda v: v).items(): submitArgs.extend(key_value) submitArgs.append(jdl) activity = Activity('submitting job %d' % jobNum) proc = LocalProcess(self._submitExec, '--nomsg', '--noint', '--logfile', '/dev/stderr', *submitArgs) gcID = None for line in ifilter(lambda x: x.startswith('http'), imap(str.strip, proc.stdout.iter(timeout=60))): gcID = line retCode = proc.status(timeout=0, terminate=True) activity.finish() if (retCode != 0) or (gcID is None): if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files={'jdl': SafeFile(jdl).read()}) finally: utils.removeFiles([jdl]) return (jobNum, utils.QM(gcID, self._createId(gcID), None), { 'jdl': str.join('', jdlData) })
def discover(self): tags = ['h_vmem', 'h_cpu', 's_rt'] reqs = dict(izip(tags, [WMS.MEMORY, WMS.CPUTIME, WMS.WALLTIME])) parser = dict(izip(tags, [int, parseTime, parseTime])) proc = LocalProcess(self._configExec, '-sql') for queue in imap(str.strip, proc.stdout.iter(timeout=10)): proc_q = LocalProcess(self._configExec, '-sq', queue) queueInfo = {'name': queue} for line in proc_q.stdout.iter(timeout=10): attr, value = lmap(str.strip, line.split(' ', 1)) if (attr in tags) and (value != 'INFINITY'): queueInfo[reqs[attr]] = parser[attr](value) proc_q.status_raise(timeout=0) yield queueInfo proc.status_raise(timeout=0)
def _submit_job(self, jobnum, task): # Submit job and yield (jobnum, WMS ID, other data) jdl_fd, jdl_fn = tempfile.mkstemp('.jdl') try: jdl_line_list = self._make_jdl(jobnum, task) safe_write(os.fdopen(jdl_fd, 'w'), jdl_line_list) except Exception: remove_files([jdl_fn]) raise BackendError('Could not write jdl data to %s.' % jdl_fn) try: submit_arg_list = [] for key_value in filter_dict(self._submit_args_dict, value_filter=identity).items(): submit_arg_list.extend(key_value) submit_arg_list.append(jdl_fn) activity = Activity('submitting job %d' % jobnum) proc = LocalProcess(self._submit_exec, '--nomsg', '--noint', '--logfile', '/dev/stderr', *submit_arg_list) wms_id = None stripped_stdout_iter = imap(str.strip, proc.stdout.iter(timeout=60)) for line in ifilter(lambda x: x.startswith('http'), stripped_stdout_iter): wms_id = line exit_code = proc.status(timeout=0, terminate=True) activity.finish() if (exit_code != 0) or (wms_id is None): if self._explain_error(proc, exit_code): pass else: self._log.log_process(proc, files={'jdl': SafeFile(jdl_fn).read()}) finally: remove_files([jdl_fn]) job_data = {'jdl': str.join('', jdl_line_list)} return (jobnum, self._create_gc_id(wms_id), job_data)
def checkJobs(self, ids): if len(ids) == 0: raise StopIteration jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = utils.ActivityLog('checking job status') proc = LocalProcess(self._statusExec, '--verbosity', 1, '--noint', '--logfile', '/dev/stderr', '-i', jobs) for data in self._parseStatus(proc.stdout.iter(timeout=60)): data['id'] = self._createId(data['id']) yield (jobNumMap.get(data['id']), data['id'], self._statusMap[data['status']], data) retCode = proc.status(timeout=0, terminate=True) del activity if retCode != 0: if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files={'jobs': utils.safeRead(jobs)}) utils.removeFiles([jobs])
def getQueues(self): queues = {} tags = ['h_vmem', 'h_cpu', 's_rt'] reqs = dict(izip(tags, [WMS.MEMORY, WMS.CPUTIME, WMS.WALLTIME])) parser = dict(izip(tags, [int, parseTime, parseTime])) proc = LocalProcess(self._configExec, '-sql') for queue in imap(str.strip, proc.stdout.iter(timeout=10)): queues[queue] = dict() proc_q = LocalProcess(self._configExec, '-sq %s' % queue) for line in proc_q.stdout.iter(timeout=10): attr, value = lmap(str.strip, line.split(' ', 1)) if (attr in tags) and (value != 'INFINITY'): queues[queue][reqs[attr]] = parser[attr](value) proc_q.status_raise(timeout=0) proc.status_raise(timeout=0) return queues
def getVersion(): try: proc_ver = LocalProcess('svnversion', '-c', pathPKG()) version = proc_ver.get_output(timeout=10).strip() if version != '': assert (any(imap(str.isdigit, version))) proc_branch = LocalProcess('svn info', pathPKG()) if 'stable' in proc_branch.get_output(timeout=10): return '%s - stable' % version return '%s - testing' % version except Exception: pass return __import__('grid_control').__version__ + ' or later'
def image(self): proc = LocalProcess('neato', '-Tpng') proc.stdin.write(self._get_workflow_graph()) proc.stdin.close() cherrypy.response.headers['Content-Type'] = 'image/png' return proc.get_output(timeout=20)