def resolve(self): so_entries_dict = {} for option in self._content: for entry in self._content[option]: so_entries_dict.setdefault(entry.section, {}).setdefault(entry.option, []).append(entry) so_value_dict = {} for section in so_entries_dict: for option in so_entries_dict[section]: result = '' try: entry = ConfigEntry.processEntriesRaw(so_entries_dict[section][option])[0] if entry: result = entry.value except ConfigError: # eg. by '-=' without value clear_current_exception() so_value_dict.setdefault(section, {})[option] = result for option in self._content: for entry in self._content[option]: subst_dict = dict(so_value_dict.get('default', {})) subst_dict.update(so_value_dict.get('global', {})) subst_dict.update(so_value_dict.get(entry.section, {})) try: # Protection for non-interpolation "%" in value value = (entry.value.replace('%', '\x01').replace('\x01(', '%(') % subst_dict).replace('\x01', '%') except Exception: raise ConfigError('Unable to interpolate value %r with %r' % (entry.value, subst_dict)) if entry.value != value: entry.value = value entry.source = entry.source + ' [interpolated]'
def tchain(iterable_iter, timeout=None, max_concurrent=None, ex_cls=NestedException, ex_msg='Caught exception during threaded chain'): # Combines multiple, threaded generators into single generator threads = [] result = GCQueue() exc = ExceptionCollector() iterable_list = list(iterable_iter) def _start_generators(): while iterable_list and ((max_concurrent is None) or (len(threads) < max_concurrent)): iterable = iterable_list.pop(0) threads.append(start_daemon('tchain generator thread (%s)' % repr(iterable)[:50], _tchain_thread, exc, iterable, result)) _start_generators() if timeout is not None: t_end = time.time() + timeout while len(threads): if timeout is not None: timeout = max(0, t_end - time.time()) try: tmp = result.get(timeout) except IndexError: # Empty queue after waiting for timeout clear_current_exception() break if tmp == GCQueue: threads.pop() # which thread is irrelevant - only used as counter _start_generators() else: yield tmp exc.raise_any(ex_cls(ex_msg))
def _parse(self, proc): job_info = {} for line in proc.stdout.iter(self._timeout): if not line.strip(): yield job_info job_info = {} try: (key, value) = imap(str.strip, line.split(' = ', 1)) except Exception: clear_current_exception() continue if key == 'JobStatus': try: job_info[CheckInfo.RAW_STATUS] = int(value) except ValueError: # e.g. 'undefined' -> set status to unknown job_info[CheckInfo.RAW_STATUS] = -1 elif key == 'GlobalJobId': job_info[CheckInfo.WMSID] = value.split('#')[1] job_info[key] = value.strip('"') elif key == 'RemoteHost': job_info[CheckInfo.WN] = value.strip('"') elif 'date' in key.lower(): job_info[key] = value yield job_info
def retrieveJobs(self, gcID_jobNum_List): # Process output sandboxes returned by getJobsOutput # Function to force moving a directory def forceMove(source, target): try: if os.path.exists(target): shutil.rmtree(target) except IOError: self._log.exception('%r cannot be removed', target) return False try: shutil.move(source, target) except IOError: self._log.exception('Error moving job output directory from %r to %r', source, target) return False return True retrievedJobs = [] for inJobNum, pathName in self._getJobsOutput(gcID_jobNum_List): # inJobNum != None, pathName == None => Job could not be retrieved if pathName is None: if inJobNum not in retrievedJobs: yield (inJobNum, -1, {}, None) continue # inJobNum == None, pathName != None => Found leftovers of job retrieval if inJobNum is None: continue # inJobNum != None, pathName != None => Job retrieval from WMS was ok jobFile = os.path.join(pathName, 'job.info') try: job_info = self._job_parser.process(pathName) except Exception: self._log.exception('Unable to parse job.info') job_info = None if job_info: jobNum = job_info[JobResult.JOBNUM] if jobNum != inJobNum: raise BackendError('Invalid job id in job file %s' % jobFile) if forceMove(pathName, os.path.join(self._outputPath, 'job_%d' % jobNum)): retrievedJobs.append(inJobNum) yield (jobNum, job_info[JobResult.EXITCODE], job_info[JobResult.RAW], pathName) else: yield (jobNum, -1, {}, None) continue # Clean empty pathNames for subDir in imap(lambda x: x[0], os.walk(pathName, topdown=False)): try: os.rmdir(subDir) except Exception: clear_current_exception() if os.path.exists(pathName): # Preserve failed job utils.ensureDirExists(self._failPath, 'failed output directory') forceMove(pathName, os.path.join(self._failPath, os.path.basename(pathName))) yield (inJobNum, -1, {}, None)
def emit(self, record): fp = open(self._fn, self._mode) try: try: for idx, instance in enumerate(GCLogHandler.config_instances): fp.write('-' * 70 + '\nConfig instance %d\n' % idx + '=' * 70 + '\n') instance.write(fp) except Exception: fp.write('-> unable to display configuration!\n') clear_current_exception() finally: if GCLogHandler.config_instances: fp.write('\n' + '*' * 70 + '\n') if make_enum.enum_list: fp.write('\nList of enums\n') for enum in make_enum.enum_list: fp.write('\t%s\n' % str.join( '|', imap(lambda name_value: '%s:%s' % name_value, izip(enum.enum_name_list, enum.enum_value_list)))) fp.write('\n' + '*' * 70 + '\n') fp.write('\n') fp.close() logging.FileHandler.emit(self, record) sys.stderr.write( '\nIn case this is caused by a bug, please send the log file:\n' + '\t%r\n' % self._fn + 'to [email protected]\n')
def _generateDatasetName(self, key, data): if self._discovery: return GCProvider._generateDatasetName(self, key, data) if 'CMSSW_DATATIER' not in data: raise DatasetError('Incompatible data tiers in dataset: %s' % data) getPathComponents = lambda path: utils.QM(path, tuple(path.strip('/').split('/')), ()) userPath = getPathComponents(self._ds_name) (primary, processed, tier) = (None, None, None) # In case of a child dataset, use the parent infos to construct new path for parent in data.get('PARENT_PATH', []): if len(userPath) == 3: (primary, processed, tier) = userPath else: try: (primary, processed, tier) = getPathComponents(parent) except Exception: clear_current_exception() if (primary is None) and (len(userPath) > 0): primary = userPath[0] userPath = userPath[1:] if len(userPath) == 2: (processed, tier) = userPath elif len(userPath) == 1: (processed, tier) = (userPath[0], data['CMSSW_DATATIER']) elif len(userPath) == 0: (processed, tier) = ('Dataset_%s' % key, data['CMSSW_DATATIER']) rawDS = '/%s/%s/%s' % (primary, processed, tier) if None in (primary, processed, tier): raise DatasetError('Invalid dataset name supplied: %r\nresulting in %s' % (self._ds_name, rawDS)) return utils.replaceDict(rawDS, data)
def process(self, dn): job_info_dict = None try: job_info_dict = JobInfoProcessor.process(self, dn) except JobResultError: logger = logging.getLogger('jobs.results') logger.warning('Unable to process job information', exc_info=get_current_exception()) clear_current_exception() if job_info_dict: job_data_dict = job_info_dict[JobResult.RAW] result = {} def get_items_with_key(key_prefix): return ifilter(lambda key_value: key_value[0].startswith(key_prefix), job_data_dict.items()) # parse old job info data format for files old_fmt_header = [FileInfo.Hash, FileInfo.NameLocal, FileInfo.NameDest, FileInfo.Path] for (file_key, file_data) in get_items_with_key('FILE'): file_idx = file_key.replace('FILE', '') or '0' result[int(file_idx)] = dict(izip(old_fmt_header, file_data.strip('"').split(' '))) # parse new job info data format for (file_key, file_data) in get_items_with_key('OUTPUT_FILE'): (file_idx, file_prop) = file_key.replace('OUTPUT_FILE_', '').split('_') if isinstance(file_data, str): file_data = file_data.strip('"') file_prop = file_prop.lower().replace('dest', 'namedest').replace('local', 'namelocal') result.setdefault(int(file_idx), {})[FileInfo.str2enum(file_prop)] = file_data return list(result.values())
def run(self): if abs(self.last_poll_time - time.time()) > self.max_poll_interval: # check migration status try: migration_details = self.migration_task.migration_request[ 'migration_details'] migration_request_id = migration_details[ 'migration_request_id'] request_status = self.migration_task.dbs_client.migration_request_status( migration_rqst_id=migration_request_id) self.migration_task.logger.debug( "%s has migration_status=%s" % (self.migration_task, request_status[0]['migration_status'])) self.last_poll_time = time.time() except AttributeError: clear_current_exception() # simulation logging.warning("Simulation") request_status = [{'migration_status': 2}] self.migration_task.logger.debug( "%s has migration_status=%s" % (self.migration_task, request_status[0]['migration_status'])) self.last_poll_time = time.time() if request_status[0]['migration_status'] == 2: # migration okay self.migration_task.state.__class__ = MigrationDoneState elif request_status[0]['migration_status'] == 9: # migration failed self.migration_task.state.__class__ = MigrationFailedState
def __init__(self): # Collect host / user / installation specific config files def _resolve_hostname(): import socket host = socket.gethostname() return ignore_exception(Exception, host, lambda: socket.gethostbyaddr(host)[0]) try: hostname = hang_protection(_resolve_hostname, timeout=5) except TimeoutException: clear_current_exception() hostname = None logging.getLogger('console').warning('System call to resolve hostname is hanging!') def _get_default_config_fn_iter(): # return possible default config files if hostname: # host / domain specific for part_idx in irange(hostname.count('.') + 1, -1, -1): yield get_path_pkg('../config/%s.conf' % hostname.split('.', part_idx)[-1]) yield '/etc/grid-control.conf' # system specific yield '~/.grid-control.conf' # user specific yield get_path_pkg('../config/default.conf') # installation specific if os.environ.get('GC_CONFIG'): yield '$GC_CONFIG' # environment specific config_fn_list = list(_get_default_config_fn_iter()) log = logging.getLogger('config.sources.default') log.log(logging.DEBUG1, 'Possible default config files: %s', str.join(', ', config_fn_list)) config_fn_iter = imap(lambda fn: resolve_path(fn, must_exist=False), config_fn_list) FileConfigFiller.__init__(self, lfilter(os.path.exists, config_fn_iter), add_search_path=False)
def getGraph(instance, graph=None, visited=None): graph = graph or {} visited = visited or set() children = [] for attr in dir(instance): child = getattr(instance, attr) try: children.extend(child) children.extend(child.values()) except Exception: children.append(child) for child in children: try: if 'grid_control' not in child.__module__: continue if child.__class__.__name__ in [ 'instancemethod', 'function', 'type' ]: continue if child in (None, True, False): continue graph.setdefault(instance, []).append(child) if child not in visited: visited.add(child) getGraph(child, graph, visited) except Exception: clear_current_exception() return graph
def _submit_get_jobs_throttled(self, job_len_submit): # Verification heuristic - check whether enough jobs have succeeded before submitting more job_len_active = self.job_db.get_job_len(ClassSelector(JobClass.PROCESSING)) job_len_success = self.job_db.get_job_len(ClassSelector(JobClass.SUCCESS)) job_len_done = self.job_db.get_job_len(ClassSelector(JobClass.PROCESSED)) job_len_total = job_len_done + job_len_active verify_idx = bisect.bisect_left(self._verify_chunk_list, job_len_total) try: success_ratio = job_len_success * 1.0 / self._verify_chunk_list[verify_idx] goal = self._verify_chunk_list[verify_idx] * self._verify_threshold_list[verify_idx] if self._verify_chunk_list[verify_idx] - job_len_done + job_len_success < goal: if not self._unreachable_goal_flag: self._log.log_time(logging.WARNING, 'All remaining jobs are vetoed by an unachieveable verification goal!') self._log.log_time(logging.INFO, 'Current goal: %d successful jobs out of %d', goal, self._verify_chunk_list[verify_idx]) self._unreachable_goal_flag = True return 0 if success_ratio < self._verify_threshold_list[verify_idx]: return min(job_len_submit, self._verify_chunk_list[verify_idx] - job_len_total) else: return min(job_len_submit, self._verify_chunk_list[verify_idx + 1] - job_len_total) except IndexError: clear_current_exception() self._log.log_time(logging.DEBUG, 'All verification chunks passed') self._log.log_time(logging.DEBUG, 'Verification submission throttle disabled') self._verify = False return job_len_submit
def _script_thread(self, script, task, jobnum=None, job_obj=None, add_dict=None): # Get both task and job config / state dicts try: tmp = {} if job_obj is not None: for key, value in job_obj.get_dict().items(): tmp[key.upper()] = value tmp['GC_WORKDIR'] = self._path_work if jobnum is not None: tmp.update(task.get_job_dict(jobnum)) tmp.update(add_dict or {}) env = dict(os.environ) for key, value in tmp.items(): if not key.startswith('GC_'): key = 'GC_' + key env[key] = str(value) script = task.substitute_variables('monitoring script', script, jobnum, tmp) if not self._silent: proc = LocalProcess(*shlex.split(script), **{'env_dict': env}) proc_output = proc.get_output(timeout=self._script_timeout) if proc_output.strip(): self._log.info(proc_output.strip()) else: os.system(script) except Exception: self._log.exception('Error while running user script') clear_current_exception()
def tchain(iterable_iter, timeout=None, max_concurrent=None, ex_cls=NestedException, ex_msg='Caught exception during threaded chain'): # Combines multiple, threaded generators into single generator threads = [] result = GCQueue() exc = ExceptionCollector() iterable_list = list(iterable_iter) def _start_generators(): while iterable_list and ((max_concurrent is None) or (len(threads) < max_concurrent)): iterable = iterable_list.pop(0) threads.append(start_daemon('tchain generator thread (%s)' % repr(iterable)[:50], _tchain_thread, exc, iterable, result)) _start_generators() if timeout is not None: t_end = time.time() + timeout while len(threads): if timeout is not None: timeout = max(0, t_end - time.time()) try: tmp = result.get(timeout) except IndexError: # Empty queue after waiting for timeout clear_current_exception() break if tmp == GCQueue: threads.pop() # which thread is irrelevant - only used as counter _start_generators() else: yield tmp exc.raise_any(ex_cls(ex_msg))
def kill(self): try: os.kill(self.proc.pid, signal.SIGTERM) except OSError: if get_current_exception().errno != errno.ESRCH: # errno.ESRCH: no such process (already dead) raise clear_current_exception()
def getGraph(instance, graph = None, visited = None): graph = graph or {} visited = visited or set() children = [] for attr in dir(instance): child = getattr(instance, attr) try: children.extend(child) children.extend(child.values()) except Exception: children.append(child) for child in children: try: if 'grid_control' not in child.__module__: continue if child.__class__.__name__ in ['instancemethod', 'function', 'type']: continue if child in (None, True, False): continue graph.setdefault(instance, []).append(child) if child not in visited: visited.add(child) getGraph(child, graph, visited) except Exception: clear_current_exception() return graph
def log_error(self, target, brief=False, **kwargs): # Can also log content of additional files via kwargs now = time.time() entry = '%s.%s' % (time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime(now)), ('%.5f' % (now - int(now)))[2:]) self._logger.log_time(logging.WARNING, '%s failed with code %d', self.nice_cmd, self.wait()) if not brief: self._logger.log_time(logging.WARNING, '\n%s', self.get_error()) try: tar = tarfile.TarFile.open(target, 'a') data = {'exit_code': self.wait(), 'exec': self.cmd, 'args': self.args} files = [VirtualFile(os.path.join(entry, 'info'), DictFormat().format(data))] kwargs.update({'stdout': self.get_output(), 'stderr': self.get_error()}) for key, value in kwargs.items(): try: content = open(value, 'r').readlines() except Exception: clear_current_exception() content = [value] files.append(VirtualFile(os.path.join(entry, key), content)) for file_obj in files: info, handle = file_obj.get_tar_info() tar.addfile(info, handle) handle.close() tar.close() except Exception: raise GCError('Unable to log errors of external process "%s" to "%s"' % (self.nice_cmd, target)) self._logger.info('All logfiles were moved to %s', target)
def run(self): if abs(self.last_poll_time - time.time()) > self.max_poll_interval: # check migration status try: migration_details = self.migration_task.migration_request['migration_details'] migration_request_id = migration_details['migration_request_id'] request_status = self.migration_task.dbs_client.migration_request_status( migration_rqst_id=migration_request_id) self.migration_task.logger.debug("%s has migration_status=%s" % ( self.migration_task, request_status[0]['migration_status'])) self.last_poll_time = time.time() except AttributeError: clear_current_exception() # simulation logging.warning("Simulation") request_status = [{'migration_status': 2}] self.migration_task.logger.debug("%s has migration_status=%s" % ( self.migration_task, request_status[0]['migration_status'])) self.last_poll_time = time.time() if request_status[0]['migration_status'] == 2: # migration okay self.migration_task.state.__class__ = MigrationDoneState elif request_status[0]['migration_status'] == 9: # migration failed self.migration_task.state.__class__ = MigrationFailedState
def __init__(self, **kwargs): ProcessHandler.__init__(self, **kwargs) ssh_default_args = ' -vvv -o BatchMode=yes -o ForwardX11=no' self._shell_cmd = resolve_install_path('ssh') + ssh_default_args self._copy_cmd = resolve_install_path('scp') + ssh_default_args + ' -r' self._ssh_link_id = 0 self._ssh_link_args = '' self._ssh_link_timestamp = 0 self._ssh_link_fail_count = 0 self._ssh_link_master_proc = None try: self._remote_host = kwargs['remote_host'] except Exception: raise ConfigError( 'Request to initialize SSH-Type RemoteProcessHandler without remote host.' ) try: self._ssh_link_base = os.path.abspath(kwargs['sshLink']) # older ssh/gsissh puts a maximum length limit on control paths, use a different one if len(self._ssh_link_base) >= 107: self._ssh_link_base = os.path.expanduser( '~/.ssh/%s' % os.path.basename(self._ssh_link_base)) self._ssh_link = self._ssh_link_base _ssh_link_secure(self._ssh_link, init_dn=True) self._get_ssh_link() except KeyError: clear_current_exception() self._ssh_link = False # test connection once proc_test = self.logged_execute('exit') if proc_test.wait() != 0: raise CondorProcessError('Failed to validate remote connection.', proc_test)
def _read_jobs(self, job_limit): job_map = {} max_job_len = 0 if os.path.exists(self._db_fn): try: tar = zipfile.ZipFile(self._db_fn, 'r', zipfile.ZIP_DEFLATED) tar.testzip() except Exception: # Try to recover job archive clear_current_exception() self._log.warning('Job database is corrupted - starting recovery') self._recover_jobs() self._log.info('Recover completed!') activity = Activity('Reading job transactions') max_job_len = len(tar.namelist()) map_jobnum2tarfn = {} for idx, tar_info_fn in enumerate(tar.namelist()): (jobnum, tid) = tuple(imap(lambda s: int(s[1:]), tar_info_fn.split('_', 1))) if tid < map_jobnum2tarfn.get(jobnum, 0): continue try: data = self._fmt.parse(tar.open(tar_info_fn).read()) except Exception: clear_current_exception() continue job_map[jobnum] = self._create_job_obj(tar_info_fn, data) map_jobnum2tarfn[jobnum] = tid if idx % 100 == 0: activity.update('Reading job transactions %d [%d%%]' % (idx, (100.0 * idx) / max_job_len)) activity.finish() self._serial = max_job_len return job_map
def create_dbs3_json_blocks(opts, dataset_blocks): dbs3_proto_block_iter = create_dbs3_proto_blocks(opts, dataset_blocks) for (block, block_dump, block_size, dataset_type) in dbs3_proto_block_iter: dataset = block[DataProvider.Dataset] try: primary_dataset, processed_dataset, data_tier = dataset[1:].split( '/') except Exception: raise DatasetError('Dataset name %s is not a valid DBS name!' % dataset) # add primary dataset information block_dump['primds'] = { 'primary_ds_type': dataset_type, 'primary_ds_name': primary_dataset } # add dataset information block_dump['dataset'] = { 'dataset': dataset, 'processed_ds_name': processed_dataset, 'data_tier_name': data_tier, 'physics_group_name': None, 'dataset_access_type': 'VALID', 'xtcrosssection': None, # TODO: Add to metadata from FrameWorkJobReport, if possible! } # add block information site_db = CRIC() try: origin_site_name = site_db.se_to_cms_name( block[DataProvider.Locations][0])[0] except IndexError: clear_current_exception() origin_site_name = 'UNKNOWN' block_dump['block'] = { 'block_name': DataProvider.get_block_id(block), 'block_size': block_size, 'file_count': len(block[DataProvider.FileList]), 'origin_site_name': origin_site_name } if opts.do_close_blocks: block_dump['block']['open_for_writing'] = 0 else: block_dump['block']['open_for_writing'] = 1 # add acquisition_era, CRAB is important because of checks within DBS 3 block_dump['acquisition_era'] = { 'acquisition_era_name': 'CRAB', 'start_date': 0 } # add processing_era block_dump['processing_era'] = { 'processing_version': 1, 'description': 'grid-control' } yield validate_dbs3_json('blockBulk', block_dump)
def _read_jobs(self, job_limit): ensure_dir_exists(self._path_db, 'job database directory', JobError) candidates = [] for job_fn in fnmatch.filter(os.listdir(self._path_db), 'job_*.txt'): try: # 2xsplit is faster than regex jobnum = int(job_fn.split(".")[0].split("_")[1]) except Exception: clear_current_exception() continue candidates.append((jobnum, job_fn)) (job_map, max_job_len) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobnum, job_fn) in sorted(candidates): idx += 1 if jobnum >= job_limit >= 0: self._log.info( 'Stopped reading job infos at job #%d out of %d available job files, ' + 'since the limit of %d jobs is reached', jobnum, len(candidates), job_limit) break try: job_fn_full = os.path.join(self._path_db, job_fn) data = self._fmt.parse(SafeFile(job_fn_full).iter_close()) job_obj = self._create_job_obj(job_fn_full, data) except Exception: raise JobError('Unable to process job file %r' % job_fn_full) job_map[jobnum] = job_obj activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / max_job_len)) activity.finish() return job_map
def execute(self, wms_id_list): # yields list of (wms_id, job_status, job_info) exc = ExceptionCollector() for wms_id in wms_id_list: try: job_info = filter_dict( dict(self._status_fun(wms_id)), value_filter=lambda v: v not in ['', '0']) job_info[CheckInfo.RAW_STATUS] = job_info.pop('status', '').lower() if 'destination' in job_info: try: dest_info = job_info['destination'].split('/', 1) job_info[CheckInfo.SITE] = dest_info[0].strip() job_info[CheckInfo.QUEUE] = dest_info[1].strip() except Exception: clear_current_exception() yield (wms_id, self._status_map.get(job_info[CheckInfo.RAW_STATUS], Job.UNKNOWN), job_info) except Exception: exc.collect() if abort(): break exc.raise_any( BackendError('Encountered errors while checking job status'))
def __init__(self, **kwargs): ProcessHandler.__init__(self, **kwargs) ssh_default_args = ' -vvv -o BatchMode=yes -o ForwardX11=no' self._shell_cmd = resolve_install_path('ssh') + ssh_default_args self._copy_cmd = resolve_install_path('scp') + ssh_default_args + ' -r' self._ssh_link_id = 0 self._ssh_link_args = '' self._ssh_link_timestamp = 0 self._ssh_link_fail_count = 0 self._ssh_link_master_proc = None try: self._remote_host = kwargs['remote_host'] except Exception: raise ConfigError('Request to initialize SSH-Type RemoteProcessHandler without remote host.') try: self._ssh_link_base = os.path.abspath(kwargs['sshLink']) # older ssh/gsissh puts a maximum length limit on control paths, use a different one if len(self._ssh_link_base) >= 107: self._ssh_link_base = os.path.expanduser('~/.ssh/%s' % os.path.basename(self._ssh_link_base)) self._ssh_link = self._ssh_link_base _ssh_link_secure(self._ssh_link, init_dn=True) self._get_ssh_link() except KeyError: clear_current_exception() self._ssh_link = False # test connection once proc_test = self.logged_execute('exit') if proc_test.wait() != 0: raise CondorProcessError('Failed to validate remote connection.', proc_test)
def _getSubmitFileMap(self, task, jobNumList): """ Get listed files for submission Returns: taskFiles iterable as (descr, gcPath, scheddPath) files shared by all jobs jobsFileMap map of jobNum to iterable as (descr, gcPath, scheddPath) files per individual job """ taskFiles = [] def mapSBFiles(desrc, path, base): return (descr, path, os.path.join(self.getStagingDir(taskID=task.taskID), base)) taskFiles.extend( ismap(mapSBFiles, self.parentPool._getSandboxFilesIn(task))) proxyFile = () try: for authFile in parentPool.proxy.getauthFiles(): proxyFile = ('User Proxy', authFile, os.path.join( self.getStagingDir(taskID=task.taskID), os.path.basename(authFile))) except Exception: clear_current_exception() jobFileMap = {} for jobNum in jobNumList: jcFull, jcBase = self.getJobCfgPath(jobNum) jobsFileMap[jobNum] = ('Job Config %d' % jobNum, jcFull, os.path.join( self.getStagingDir(taskID=task.taskID), jcBase)) return taskFiles, proxyFile, jobFileMap
def _read_jobs(self, job_limit): ensure_dir_exists(self._path_db, 'job database directory', JobError) candidates = [] for job_fn in fnmatch.filter(os.listdir(self._path_db), 'job_*.txt'): try: # 2xsplit is faster than regex jobnum = int(job_fn.split(".")[0].split("_")[1]) except Exception: clear_current_exception() continue candidates.append((jobnum, job_fn)) (job_map, max_job_len) = ({}, len(candidates)) activity = Activity('Reading job infos') idx = 0 for (jobnum, job_fn) in sorted(candidates): idx += 1 if jobnum >= job_limit >= 0: self._log.info('Stopped reading job infos at job #%d out of %d available job files, ' + 'since the limit of %d jobs is reached', jobnum, len(candidates), job_limit) break try: job_fn_full = os.path.join(self._path_db, job_fn) data = self._fmt.parse(SafeFile(job_fn_full).iter_close()) job_obj = self._create_job_obj(job_fn_full, data) except Exception: raise JobError('Unable to process job file %r' % job_fn_full) job_map[jobnum] = job_obj activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / max_job_len)) activity.finish() return job_map
def _getSubmitFileMap(self, task, jobNumList): """ Get listed files for submission Returns: taskFiles iterable as (descr, gcPath, scheddPath) files shared by all jobs jobsFileMap map of jobNum to iterable as (descr, gcPath, scheddPath) files per individual job """ taskFiles = [] def mapSBFiles(desrc, path, base): return (descr, path, os.path.join(self.getStagingDir(taskID = task.taskID), base) ) taskFiles.extend(ismap(mapSBFiles, self.parentPool._getSandboxFilesIn(task))) proxyFile = () try: for authFile in parentPool.proxy.getauthFiles(): proxyFile = ('User Proxy', authFile, os.path.join(self.getStagingDir(taskID = task.taskID), os.path.basename(authFile))) except Exception: clear_current_exception() jobFileMap = {} for jobNum in jobNumList: jcFull, jcBase = self.getJobCfgPath(jobNum) jobsFileMap[jobNum] = ('Job Config %d' % jobNum, jcFull, os.path.join(self.getStagingDir(taskID = task.taskID), jcBase)) return taskFiles, proxyFile, jobFileMap
def _get_internal(self, desc, obj2str, str2obj, def2obj, option, default_obj, interactive=True, interactive_msg=None, interactive_msg_append_default=True, **kwargs): # interactive mode only overrides default values from the code uii = UserInputInterface() if interactive_msg and self.is_interactive(option, interactive): prompt = interactive_msg if interactive_msg_append_default and not unspecified(default_obj): prompt += (' [%s]' % self._get_default_str(default_obj, def2obj, obj2str)) while True: handler = signal.signal(signal.SIGINT, signal.SIG_DFL) try: user_input = uii.prompt_text('%s: ' % prompt) except Exception: sys.exit(os.EX_DATAERR) signal.signal(signal.SIGINT, handler) if user_input != '': try: default_obj = str2obj(user_input) except Exception: clear_current_exception() self._log.warning('Unable to parse %s: %s\n', desc, user_input) continue break return TypedConfigInterface._get_internal(self, desc, obj2str, str2obj, def2obj, option, default_obj, **kwargs)
def logging_defaults(): formatter_verbose = GCFormatter(ex_context=2, ex_vars=1, ex_fstack=1, ex_tree=2) root_logger = clean_logger() root_logger.manager.loggerDict.clear() root_logger.setLevel(logging.DEFAULT) root_handler = register_handler(root_logger, StdoutStreamHandler(), formatter_verbose) # Setup logger used for abort messages abort_logger = clean_logger('abort') abort_logger.propagate = False abort_handler = register_handler(abort_logger, StderrStreamHandler(), formatter_verbose) # Output verbose exception information into dedicated GC log (in gc / tmp / user directory) if possible try: register_handler(abort_logger, GCLogHandler(get_debug_file_candidates(), mode='w'), formatter_verbose) formatter_quiet = GCFormatter(ex_context=0, ex_vars=0, ex_fstack=0, ex_tree=1) abort_handler.setFormatter(formatter_quiet) root_handler.setFormatter(formatter_quiet) except Exception: # otherwise use verbose settings for default output clear_current_exception() # External libraries logging.getLogger('requests').setLevel(logging.WARNING) # Adding log_process_result to Logging class def log_process(self, proc, level=logging.WARNING, files=None, msg=None): msg = msg or 'Process %(call)s finished with exit code %(proc_status)s' status = proc.status(timeout=0) record = self.makeRecord(self.name, level, '<process>', 0, msg, tuple(), None) record.proc = proc record.call = proc.get_call() record.proc_status = status record.files = files or {} record.msg = record.msg % record.__dict__ self.handle(record) logging.Logger.log_process = log_process # Adding log with time prefix to Logging class def log_time(self, level, msg, *args, **kwargs): if self.isEnabledFor(level): tmp = self.findCaller() record = self.makeRecord(self.name, level, tmp[0], tmp[1], msg, args, kwargs.pop('exc_info', None)) record.print_time = True self.handle(record) logging.Logger.log_time = log_time
def __new__(cls, config, stream, register_callback=False): if is_dumb_terminal(stream): return ActivityMonitor.create_instance('TimedActivityMonitor', config, stream, register_callback) try: # try to pick up multi line activity stream return ActivityMonitor.create_instance('MultiActivityMonitor', config, stream, register_callback) except Exception: # fall back to standard terminal activity stream clear_current_exception() return ActivityMonitor.create_instance('SingleActivityMonitor', config, stream, register_callback)
def kill(self, sig=signal.SIGTERM): if not self._event_finished.is_set(): try: os.kill(self._pid, sig) except OSError: if get_current_exception().errno != errno.ESRCH: # errno.ESRCH: no such process (already dead) raise clear_current_exception()
def remove_files(args): for item in args: try: if os.path.isdir(item): os.rmdir(item) else: os.unlink(item) except Exception: clear_current_exception()
def remove_files(args): for item in args: try: if os.path.isdir(item): os.rmdir(item) else: os.unlink(item) except Exception: clear_current_exception()
def kill(self, sig=signal.SIGTERM): if not self._event_finished.is_set(): try: os.kill(self._pid, sig) except OSError: if get_current_exception( ).errno != errno.ESRCH: # errno.ESRCH: no such process (already dead) raise clear_current_exception()
def process_dbs3_json_blocks(opts, block_dump_iter): log = logging.getLogger('dbs3-migration') log.setLevel(logging.DEBUG) # dry run without import - just store block dumps in temp dir if opts.do_import: return dump_dbs3_json(opts.tempdir, block_dump_iter) # set-up dbs clients dbs3_target_client = DBS3LiteClient(url=opts.target_instance) dbs3_source_client = DBS3LiteClient(url=opts.source_instance) dbs3_migration_queue = DBS3MigrationQueue() dbs3_migration_file = os.path.join(opts.tempdir, 'dbs3_migration.pkl') # migrate parents and register datasets with dbs3 for block_dump in block_dump_iter: if not opts.continue_migration: # initiate the dbs3 to dbs3 migration of parent blocks log.debug('Checking parentage for block: %s', block_dump['block']['block_name']) unique_parent_lfns = set( imap(lambda parent: parent['parent_logical_file_name'], block_dump['file_parent_list'])) unique_blocks = set() for parent_lfn in unique_parent_lfns: for block in dbs3_source_client.get_dbs_block_list( logical_file_name=parent_lfn): unique_blocks.add(block['block_name']) for parent_block in unique_blocks: if dbs3_target_client.get_dbs_block_list( block_name=parent_block): log.debug('Block %s is already at destination', parent_block) continue migration_task = MigrationTask(block_name=parent_block, migration_url=opts.dbsSource, dbs_client=dbs3_target_client) try: dbs3_migration_queue.add_migration_task(migration_task) except AlreadyQueued: log.exception('Already queued') clear_current_exception() dbs3_migration_queue.save_to_disk(dbs3_migration_file) else: try: dbs3_migration_queue = DBS3MigrationQueue.read_from_disk( dbs3_migration_file) except IOError: log.exception( 'Probably, there is no DBS 3 migration for this dataset ongoing' ) raise # wait for all parent blocks migrated to dbs3 do_migration(dbs3_migration_queue) # insert block into dbs3 dbs3_target_client.insert_dbs_block_dump(block_dump)
def _get_job_selector_and_task(config, job_selector_str, require_task): if not require_task: try: # try to build job selector without task return (None, JobSelector.create(job_selector_str)) except Exception: if not except_nested(TaskNeededException, get_current_exception()): raise clear_current_exception() task = gc_create_workflow(config).task return (task, JobSelector.create(job_selector_str, task=task))
def release(self): if self._fd: fcntl.flock(self._fd, fcntl.LOCK_UN) self._fd.close() self._fd = None try: if os.path.exists(self._lockfile): os.unlink(self._lockfile) except Exception: clear_current_exception()
def unpack_wildcard_tar(log, output_dn): if os.path.exists(output_dn): if 'GC_WC.tar.gz' in os.listdir(output_dn): wildcard_tar = os.path.join(output_dn, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcard_tar, 'r:gz').extractall(output_dn) os.unlink(wildcard_tar) except Exception: log.error('Can\'t unpack output files contained in %s', wildcard_tar) clear_current_exception()
def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict): job_info_path = os.path.join(item, 'job.info') try: job_info_dict = DictFormat('=').parse(open(job_info_path)) if job_info_dict.get('exitcode') == 0: obj_dict['JOBINFO'] = job_info_dict yield (item, metadata_dict, entries, location_list, obj_dict) except Exception: self._log.log(logging.INFO2, 'Unable to parse job info file %r', job_info_path) clear_current_exception()
def __init__(self, filename, delimeter = '=', lowerCaseKey = True): dict.__init__(self) self.fmt = DictFormat(delimeter) self.filename = filename keyParser = {None: QM(lowerCaseKey, lambda k: parseType(k.lower()), parseType)} try: self.update(self.fmt.parse(open(filename), keyParser = keyParser)) except Exception: clear_current_exception() self.olddict = self.items()
def release(self): if self._fd: fcntl.flock(self._fd, fcntl.LOCK_UN) self._fd.close() self._fd = None try: if os.path.exists(self._lockfile): os.unlink(self._lockfile) except Exception: clear_current_exception()
def _query_das(self, query): if self._dataset_instance not in ('', 'prod/global'): query += ' instance=%s' % self._dataset_instance (start, sleep) = (time.time(), 0.4) while time.time() - start < 60: try: return self._gjrc.get(params={'input': query})['data'] except DASRetry: clear_current_exception() time.sleep(sleep) sleep += 0.4
def _processed_dataset_validation(item): try: grid_control_cms.Lexicon.procdataset(item) return item except AssertionError: clear_current_exception() try: grid_control_cms.Lexicon.userprocdataset(item) return item except AssertionError: raise
def _parse(self, proc): for line in ifilter(identity, proc.stdout.iter(self._timeout)): if 'error' in line.lower(): raise BackendError('Unable to parse status line %s' % repr(line)) tmp = line.split() try: wms_id = str(int(tmp[0])) except Exception: clear_current_exception() continue yield {CheckInfo.WMSID: wms_id, CheckInfo.RAW_STATUS: tmp[2], CheckInfo.QUEUE: tmp[1]}
def _processed_dataset_validation(item): try: grid_control_cms.Lexicon.procdataset(item) return item except AssertionError: clear_current_exception() try: grid_control_cms.Lexicon.userprocdataset(item) return item except AssertionError: raise
def _query_das(self, query): if self._dataset_instance not in ('', 'prod/global'): query += ' instance=%s' % self._dataset_instance (start, sleep) = (time.time(), 0.4) while time.time() - start < 60: try: return self._gjrc.get(params={'input': query})['data'] except DASRetry: clear_current_exception() time.sleep(sleep) sleep += 0.4
def __init__(self, rlock=False): if rlock: # signal handlers using events need to use rlock lock = threading.RLock() else: lock = threading.Lock() self._cond = threading.Condition(lock) try: self._cond_notify_all = self._cond.notify_all except Exception: clear_current_exception() self._cond_notify_all = self._cond.notifyAll self._flag = False
def __init__(self, filename, delimeter='=', lowerCaseKey=True): dict.__init__(self) self.fmt = DictFormat(delimeter) self.filename = filename keyParser = { None: QM(lowerCaseKey, lambda k: parseType(k.lower()), parseType) } try: self.update(self.fmt.parse(open(filename), keyParser=keyParser)) except Exception: clear_current_exception() self.olddict = self.items()
def run(self): # submit task to DBS 3 migration try: self.migration_task.migration_request = self.migration_task.dbs_client.migration_request_submit( self.migration_task.payload()) except AttributeError: clear_current_exception() # simulation self.migration_task.logger.info("%s has been queued for migration!" % self.migration_task) else: self.migration_task.logger.info("%s has been queued for migration!" % self.migration_task) self.migration_task.state = MigrationSubmittedState(self.migration_task)
def __init__(self, rlock=False): if rlock: # signal handlers using events need to use rlock lock = threading.RLock() else: lock = threading.Lock() self._cond = threading.Condition(lock) try: self._cond_notify_all = self._cond.notify_all except Exception: clear_current_exception() self._cond_notify_all = self._cond.notifyAll self._flag = False
def iter(self): while True: try: line = self.proc.fromchild.readline() except Exception: clear_current_exception() abort(True) break if not line: break self.stdout.append(line) yield line
def __init__(self, config, name): glite_path = os.environ.get('GLITE_WMS_LOCATION', os.environ.get('GLITE_LOCATION', '')) stored_sys_path = list(sys.path) for dn in [ 'lib', 'lib64', os.path.join('lib', 'python'), os.path.join('lib64', 'python') ]: sys.path.append(os.path.join(glite_path, dn)) try: # gLite 3.2 import wmsui_api glite_state_name_list = wmsui_api.states_names def _get_status_direct(wms_id): try: # new parameter json job_status = wmsui_api.getStatus( wmsui_api.getJobIdfromList(None, [wms_id])[0], 0) except Exception: clear_current_exception() job_status = wmsui_api.getStatus( wmsui_api.getJobIdfromList([wms_id])[0], 0) return lmap( lambda name: (name.lower(), job_status.getAttribute( glite_state_name_list.index(name))), glite_state_name_list) except Exception: # gLite 3.1 clear_current_exception() try: from glite_wmsui_LbWrapper import Status wrapper_status = Status() job_status = __import__('Job').JobStatus(wrapper_status) def _get_status_direct(wms_id): wrapper_status.getStatus(wms_id, 0) err, api_msg = wrapper_status.get_error() if err: raise BackendError(api_msg) info = wrapper_status.loadStatus() return lzip(imap(str.lower, job_status.states_names), info[0:job_status.ATTR_MAX]) except Exception: clear_current_exception() _get_status_direct = None sys.path = stored_sys_path check_executor = None if _get_status_direct: check_executor = GliteWMSDirectCheckJobs(config, _get_status_direct) GliteWMS.__init__(self, config, name, check_executor=check_executor)
def __init__(self, fn_candidates, mode = 'a', *args, **kwargs): (self._fn, self._mode) = (None, mode) for fn_candidate in fn_candidates: try: fn_candidate = os.path.abspath(os.path.normpath(os.path.expanduser(fn_candidate))) logging.FileHandler.__init__(self, fn_candidate, 'a', *args, **kwargs) self._fn = fn_candidate break except Exception: clear_current_exception() if self._fn is None: raise Exception('Unable to find writeable debug log path!')
def _close_nested_tar(self, outer_tar, nested_tar): # Function to close all contained outer_tar objects if nested_tar: nested_tar.close() try: # Python 3.2 does not close the wrapping gzip file object nested_tar.fileobj.close() # if an external file object is given except Exception: clear_current_exception() nested_tar.nested_tar_fp.seek(0) nested_tar_info = tarfile.TarInfo(nested_tar.nested_fn) nested_tar_info.size = len(nested_tar.nested_tar_fp.getvalue()) outer_tar.addfile(nested_tar_info, nested_tar.nested_tar_fp)
def _closeSubTar(self, tar, subTarTuple): if subTarTuple: (subTarFile, subTarFileObj, subTarFileName) = subTarTuple subTarFile.close() try: # Python 3.2 does not close the wrapping gzip file object if an external file object is given subTarFile.fileobj.close() except Exception: clear_current_exception() subTarFileObj.seek(0) subTarFileInfo = tarfile.TarInfo(subTarFileName) subTarFileInfo.size = len(subTarFileObj.getvalue()) tar.addfile(subTarFileInfo, subTarFileObj)