def _get_possible_merge_categories(self, map_cat2desc): # Get dictionary with categories that will get merged when removing a variable def _eq_dict(dict_a, dict_b, key): # Merge parameters to reach category goal - NP hard problem, so be greedy and quick! dict_a = dict(dict_a) dict_b = dict(dict_b) dict_a.pop(key) dict_b.pop(key) return dict_a == dict_b var_key_result = {} cat_key_search_dict = {} for cat_key in map_cat2desc: for var_key in map_cat2desc[cat_key]: if var_key not in cat_key_search_dict: cat_key_search = set(map_cat2desc.keys()) else: cat_key_search = cat_key_search_dict[var_key] if cat_key_search: matches = lfilter(lambda ck: _eq_dict(map_cat2desc[cat_key], map_cat2desc[ck], var_key), cat_key_search) if matches: cat_key_search_dict[var_key] = cat_key_search.difference(set(matches)) var_key_result.setdefault(var_key, []).append(matches) return var_key_result
def _resync(self): if self._data_provider: activity = Activity('Performing resync of datasource %r' % self._name) # Get old and new dataset information ds_old = DataProvider.loadFromFile(self._getDataPath('cache.dat')).getBlocks(show_stats = False) self._data_provider.clearCache() ds_new = self._data_provider.getBlocks(show_stats = False) self._data_provider.saveToFile(self._getDataPath('cache-new.dat'), ds_new) # Use old splitting information to synchronize with new dataset infos old_maxN = self._data_splitter.getMaxJobs() jobChanges = self._data_splitter.resyncMapping(self._getDataPath('map-new.tar'), ds_old, ds_new) activity.finish() if jobChanges is not None: # Move current splitting to backup and use the new splitting from now on def backupRename(old, cur, new): if self._keepOld: os.rename(self._getDataPath(cur), self._getDataPath(old)) os.rename(self._getDataPath(new), self._getDataPath(cur)) backupRename( 'map-old-%d.tar' % time.time(), 'map.tar', 'map-new.tar') backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat') self._data_splitter.importPartitions(self._getDataPath('map.tar')) self._maxN = self._data_splitter.getMaxJobs() self._log.debug('Dataset resync finished: %d -> %d partitions', old_maxN, self._maxN) return (set(jobChanges[0]), set(jobChanges[1]), old_maxN != self._maxN)
def getCMSFiles(self, blockPath): lumiDict = {} if self.selectedLumis: # Central lumi query lumiDict = self.getCMSLumisImpl(blockPath) lumiDict = QM(lumiDict, lumiDict, {}) for (fileInfo, listLumi) in self.getCMSFilesImpl(blockPath, self.onlyValid, self.selectedLumis): if self.selectedLumis: if not listLumi: listLumi = lumiDict.get(fileInfo[DataProvider.URL], []) def acceptLumi(): for (run, lumiList) in listLumi: for lumi in lumiList: if selectLumi((run, lumi), self.selectedLumis): return True if not acceptLumi(): continue if self.includeLumi: (listLumiExt_Run, listLumiExt_Lumi) = ([], []) for (run, lumi_list) in sorted(listLumi): for lumi in lumi_list: listLumiExt_Run.append(run) listLumiExt_Lumi.append(lumi) fileInfo[DataProvider.Metadata] = [listLumiExt_Run, listLumiExt_Lumi] else: fileInfo[DataProvider.Metadata] = [list(sorted(set(map(lambda (run, lumi_list): run, listLumi))))] yield fileInfo
def _resync_psrc(self): activity = Activity('Performing resync of datasource %r' % self.get_datasource_name()) # Get old and new dataset information provider_old = DataProvider.load_from_file(self._get_data_path('cache.dat')) block_list_old = provider_old.get_block_list_cached(show_stats=False) self._provider.clear_cache() block_list_new = self._provider.get_block_list_cached(show_stats=False) self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new) # Use old splitting information to synchronize with new dataset infos partition_len_old = self.get_parameter_len() partition_changes = self._resync_partitions( self._get_data_path('map-new.tar'), block_list_old, block_list_new) activity.finish() if partition_changes is not None: # Move current splitting to backup and use the new splitting from now on def _rename_with_backup(new, cur, old): if self._keep_old: os.rename(self._get_data_path(cur), self._get_data_path(old)) os.rename(self._get_data_path(new), self._get_data_path(cur)) _rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time()) _rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time()) self._set_reader(DataSplitter.load_partitions(self._get_data_path('map.tar'))) self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len) (pnum_list_redo, pnum_list_disable) = partition_changes return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)
def getSubmissionJobs(self, maxsample, static = {'showBlocker': True}): # Get list of submittable jobs readyList = self.jobDB.getJobs(ClassSelector(JobClass.READY)) retryOK = readyList defaultJob = Job() if self.maxRetry >= 0: retryOK = filter(lambda x: self.jobDB.get(x, defaultJob).attempt - 1 < self.maxRetry, readyList) modOK = filter(self._task.canSubmit, readyList) jobList = set.intersection(set(retryOK), set(modOK)) if static['showBlocker'] and len(readyList) > 0 and len(jobList) == 0: # No submission but ready jobs err = [] err += utils.QM(len(retryOK) > 0 and len(modOK) == 0, [], ['have hit their maximum number of retries']) err += utils.QM(len(retryOK) == 0 and len(modOK) > 0, [], ['are vetoed by the task module']) utils.vprint('All remaining jobs %s!' % str.join(utils.QM(retryOK or modOK, ' or ', ' and '), err), -1, True) static['showBlocker'] = not (len(readyList) > 0 and len(jobList) == 0) # Determine number of jobs to submit submit = len(jobList) if self.inQueue > 0: submit = min(submit, self.inQueue - self.jobDB.getJobsN(ClassSelector(JobClass.ATWMS))) if self.inFlight > 0: submit = min(submit, self.inFlight - self.jobDB.getJobsN(ClassSelector(JobClass.PROCESSING))) if self.continuous: submit = min(submit, maxsample) submit = max(submit, 0) if self.doShuffle: return self.sample(jobList, submit) else: return sorted(jobList)[:submit]
def _getSubmissionJobs(self, maxsample): # Get list of submittable jobs readyList = self.jobDB.getJobs(ClassSelector(JobClass.READY)) retryOK = readyList defaultJob = Job() if self._job_retries >= 0: retryOK = lfilter(lambda x: self.jobDB.get(x, defaultJob).attempt - 1 < self._job_retries, readyList) modOK = lfilter(self._task.canSubmit, readyList) jobList = set.intersection(set(retryOK), set(modOK)) if self._showBlocker and readyList and not jobList: # No submission but ready jobs err = [] err += utils.QM((len(retryOK) > 0) and (len(modOK) == 0), [], ['have hit their maximum number of retries']) err += utils.QM((len(retryOK) == 0) and (len(modOK) > 0), [], ['are vetoed by the task module']) self._log_user_time.warning('All remaining jobs %s!', str.join(utils.QM(retryOK or modOK, ' or ', ' and '), err)) self._showBlocker = not (len(readyList) > 0 and len(jobList) == 0) # Determine number of jobs to submit submit = len(jobList) if self._njobs_inqueue > 0: submit = min(submit, self._njobs_inqueue - self.jobDB.getJobsN(ClassSelector(JobClass.ATWMS))) if self._njobs_inflight > 0: submit = min(submit, self._njobs_inflight - self.jobDB.getJobsN(ClassSelector(JobClass.PROCESSING))) if self._continuous and (maxsample > 0): submit = min(submit, maxsample) submit = max(submit, 0) if self._do_shuffle: return self._sample(jobList, submit) return sorted(jobList)[:submit]
def __init__(self, config, datasource_name): BaseConsistencyProcessor.__init__(self, config, datasource_name) self._check_url = config.get_enum(self._get_dproc_opt('check unique url'), DatasetUniqueMode, DatasetUniqueMode.abort) self._check_block = config.get_enum(self._get_dproc_opt('check unique block'), DatasetUniqueMode, DatasetUniqueMode.abort) (self._recorded_url, self._recorded_block) = (set(), set())
def _resync_adapter(self, pa_old, pa_new, result_redo, result_disable, size_change): (map_jobnum2pnum, pspi_list_added, pspi_list_missing) = _diff_pspi_list(pa_old, pa_new, result_redo, result_disable) # Reorder and reconstruct parameter space with the following layout: # NNNNNNNNNNNNN OOOOOOOOO | source: NEW (==self) and OLD (==from file) # <same><added> <missing> | same: both in NEW and OLD, added: only in NEW, missing: only in OLD if pspi_list_added: _extend_map_jobnum2pnum(map_jobnum2pnum, pa_old.get_job_len(), pspi_list_added) if pspi_list_missing: # extend the parameter source by placeholders for the missing parameter space points psrc_missing = _create_placeholder_psrc(pa_old, pa_new, map_jobnum2pnum, pspi_list_missing, result_disable) self._psrc = ParameterSource.create_instance('ChainParameterSource', self._psrc_raw, psrc_missing) self._map_jobnum2pnum = map_jobnum2pnum # Update Job2PID map # Write resynced state self._write_jobnum2pnum(self._path_jobnum2pnum + '.tmp') ParameterSource.get_class('GCDumpParameterSource').write(self._path_params + '.tmp', self.get_job_len(), self.get_job_metadata(), self.iter_jobs()) os.rename(self._path_jobnum2pnum + '.tmp', self._path_jobnum2pnum) os.rename(self._path_params + '.tmp', self._path_params) result_redo = result_redo.difference(result_disable) if result_redo or result_disable: map_pnum2jobnum = reverse_dict(self._map_jobnum2pnum) def _translate_pnum(pnum): return map_pnum2jobnum.get(pnum, pnum) result_redo = set(imap(_translate_pnum, result_redo)) result_disable = set(imap(_translate_pnum, result_disable)) return (result_redo, result_disable, size_change) return (set(), set(), size_change)
def resync_psrc(self): (psrc_redo, psrc_disable, _) = self._psrc.resync_psrc() result_redo = set(lfilter(lambda pnum: pnum < self._max_len, psrc_redo)) result_disable = set( lfilter(lambda pnum: pnum < self._max_len, psrc_disable)) self._psrc_len = self._psrc.get_parameter_len() return (result_redo, result_disable, False ) # size can never change on-the-fly
def __init__(self, config, datasource_name): BaseConsistencyProcessor.__init__(self, config, datasource_name) self._check_url = config.get_enum( self._get_dproc_opt('check unique url'), DatasetUniqueMode, DatasetUniqueMode.abort) self._check_block = config.get_enum( self._get_dproc_opt('check unique block'), DatasetUniqueMode, DatasetUniqueMode.abort) (self._recorded_url, self._recorded_block) = (set(), set())
def process_dbs3_json_blocks(opts, block_dump_iter): logger = logging.getLogger('dbs3-migration') logger.setLevel(logging.DEBUG) # dry run without import - just store block dumps in temp dir if opts.do_import: return dump_dbs3_json(opts.tempdir, block_dump_iter) # set-up dbs clients dbs3_target_client = DBS3LiteClient(url=opts.target_instance) dbs3_source_client = DBS3LiteClient(url=opts.source_instance) dbs3_migration_queue = DBS3MigrationQueue() dbs3_migration_file = os.path.join(opts.tempdir, 'dbs3_migration.pkl') # migrate parents and register datasets with dbs3 for blockDump in block_dump_iter: if not opts.continue_migration: # initiate the dbs3 to dbs3 migration of parent blocks logger.debug('Checking parentage for block: %s', blockDump['block']['block_name']) unique_parent_lfns = set( imap(lambda parent: parent['parent_logical_file_name'], blockDump['file_parent_list'])) unique_blocks = set() for parent_lfn in unique_parent_lfns: for block in dbs3_source_client.listBlocks( logical_file_name=parent_lfn): unique_blocks.add(block['block_name']) for parent_block in unique_blocks: if dbs3_target_client.listBlocks( block_name=parent_block ): # block already at destination logger.debug('Block %s is already at destination', parent_block) continue migration_task = MigrationTask(block_name=parent_block, migration_url=opts.dbsSource, dbs_client=dbs3_target_client) try: dbs3_migration_queue.add_migration_task(migration_task) except AlreadyQueued as aq: logger.debug(aq.message) dbs3_migration_queue.save_to_disk(dbs3_migration_file) else: try: dbs3_migration_queue = DBS3MigrationQueue.read_from_disk( dbs3_migration_file) except IOError: logger.exception( 'Probably, there is no DBS 3 migration for this dataset ongoing' ) raise # wait for all parent blocks migrated to dbs3 do_migration(dbs3_migration_queue) # insert block into dbs3 dbs3_target_client.insertBulkBlock(blockDump)
def _resync(self): # This function is _VERY_ time critical! tmp = self._psrc_raw.resync_psrc() # First ask about psrc changes (result_redo, result_disable, size_change) = (set(tmp[0]), set(tmp[1]), tmp[2]) psrc_hash_new = self._psrc_raw.get_psrc_hash() psrc_hash_changed = self._psrc_hash_stored != psrc_hash_new self._psrc_hash_stored = psrc_hash_new if not (result_redo or result_disable or size_change or psrc_hash_changed): return ParameterSource.get_empty_resync_result() ps_old = ParameterSource.create_instance('GCDumpParameterSource', self._path_params) pa_old = ParameterAdapter(None, ps_old) pa_new = ParameterAdapter(None, self._psrc_raw) return self._resync_adapter(pa_old, pa_new, result_redo, result_disable, size_change)
def getKeyMergeResults(): varKeyResult = {} catKeySearchDict = {} for catKey in catDescDict: for varKey in catDescDict[catKey]: if varKey not in catKeySearchDict: catKeySearch = set(catDescDict.keys()) else: catKeySearch = catKeySearchDict[varKey] if catKeySearch: matches = filter(lambda ck: eqDict(catDescDict[catKey], catDescDict[ck], varKey), catKeySearch) if matches: catKeySearchDict[varKey] = catKeySearch.difference(set(matches)) varKeyResult.setdefault(varKey, []).append(matches) return varKeyResult
def process_fwjr(sample, fwjr_xml_dom, map_sample2run_info_dict, map_sample2input_events, map_sample2output_events): def _get_element_data(node, name): return node.getElementsByTagName(name)[0].childNodes[0].data # Collect run lumi information for run_node in fwjr_xml_dom.getElementsByTagName('Run'): for lumi_node in run_node.getElementsByTagName('LumiSection'): run = int(run_node.getAttribute('ID')) lumi = int(lumi_node.getAttribute('ID')) map_sample2run_info_dict.setdefault(sample, {}).setdefault(run, set()).add(lumi) # Collect output file information for output_file_node in fwjr_xml_dom.getElementsByTagName('File'): pfn = _get_element_data(output_file_node, 'PFN') if pfn not in map_sample2output_events.setdefault(sample, {}): map_sample2output_events[sample][pfn] = 0 map_sample2output_events[sample][pfn] += int( _get_element_data(output_file_node, 'TotalEvents')) # Collect input file information for input_file_node in fwjr_xml_dom.getElementsByTagName('InputFile'): if sample not in map_sample2input_events: map_sample2input_events[sample] = 0 map_sample2input_events[sample] += int( _get_element_data(input_file_node, 'EventsRead'))
def _get_sandbox_file_list(self, task, sm_list): # Prepare all input files dep_list = set(ichain(imap(lambda x: x.get_dependency_list(), [task] + sm_list))) dep_fn_list = lmap(lambda dep: resolve_path('env.%s.sh' % dep, lmap(lambda pkg: get_path_share('', pkg=pkg), os.listdir(get_path_pkg()))), dep_list) task_config_dict = dict_union(self._remote_event_handler.get_mon_env_dict(), *imap(lambda x: x.get_task_dict(), [task] + sm_list)) task_config_dict.update({'GC_DEPFILES': str.join(' ', dep_list), 'GC_USERNAME': self._token.get_user_name(), 'GC_WMS_NAME': self._name}) task_config_str_list = DictFormat(escape_strings=True).format( task_config_dict, format='export %s%s%s\n') vn_alias_dict = dict(izip(self._remote_event_handler.get_mon_env_dict().keys(), self._remote_event_handler.get_mon_env_dict().keys())) vn_alias_dict.update(task.get_var_alias_map()) vn_alias_str_list = DictFormat(delimeter=' ').format(vn_alias_dict, format='%s%s%s\n') # Resolve wildcards in task input files def _get_task_fn_list(): for fpi in task.get_sb_in_fpi_list(): matched = glob.glob(fpi.path_abs) if matched != []: for match in matched: yield match else: yield fpi.path_abs return lchain([self._remote_event_handler.get_file_list(), dep_fn_list, _get_task_fn_list(), [ VirtualFile('_config.sh', sorted(task_config_str_list)), VirtualFile('_varmap.dat', sorted(vn_alias_str_list))]])
def _checkJobList(self, wms, jobList): if self._defect_tries: nDefect = len(self._defect_counter ) # Waiting list gets larger in case reported == [] waitList = self._sample( self._defect_counter, nDefect - max(1, int(nDefect / 2**self._defect_raster))) jobList = lfilter(lambda x: x not in waitList, jobList) (change, timeoutList, reported) = JobManager._checkJobList(self, wms, jobList) for jobNum in reported: self._defect_counter.pop(jobNum, None) if self._defect_tries and (change is not None): self._defect_raster = utils.QM( reported, 1, self._defect_raster + 1) # make 'raster' iteratively smaller for jobNum in ifilter(lambda x: x not in reported, jobList): self._defect_counter[jobNum] = self._defect_counter.get( jobNum, 0) + 1 kickList = lfilter( lambda jobNum: self._defect_counter[jobNum] >= self. _defect_tries, self._defect_counter) for jobNum in set(kickList + utils.QM( (len(reported) == 0) and (len(jobList) == 1), jobList, [])): timeoutList.append(jobNum) self._defect_counter.pop(jobNum) return (change, timeoutList, reported)
def collapse_psp_list(psp_list, tracked_list, opts): psp_dict = {} psp_dict_nicks = {} header_list = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in tracked_list: tracked_list.remove('DATASETSPLIT') if opts.collapse == 1: tracked_list.append('DATASETNICK') header_list.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: header_list.append(('COLLATE_NICK', '# of nicks')) for pset in psp_list: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') hash_str = md5_hex( repr(lmap(lambda key: pset.get(str(key)), tracked_list))) psp_dict.setdefault(hash_str, []).append(pset) psp_dict_nicks.setdefault(hash_str, set()).add(nickname) def _do_collate(hash_str): psp = psp_dict[hash_str][0] psp['COLLATE_JOBS'] = len(psp_dict[hash_str]) psp['COLLATE_NICK'] = len(psp_dict_nicks[hash_str]) return psp psp_list = sorted(imap(_do_collate, psp_dict), key=lambda x: tuple(imap(str, x.values()))) return (header_list, psp_list)
def logging_setup(config): if config.getBool('debug mode', False, onChange=None): config.set('level', 'NOTSET', '?=') config.set('detail lower limit', 'NOTSET') config.set('detail upper limit', 'NOTSET') config.set('abort handler', 'stdout debug_file', '?=') config.setInt('abort code context', 2) config.setInt('abort variables', 2) config.setInt('abort file stack', 2) config.setInt('abort tree', 2) display_logger = config.getBool('display logger', False, onChange=None) # Find logger names in options logger_names = set() for option in config.getOptions(): if option in ['debug mode', 'display logger']: pass elif option.count(' ') == 0: logger_names.add('') else: logger_names.add(option.split(' ')[0].strip()) logger_names = sorted(logger_names) logger_names.reverse() for logger_name in logger_names: logging_create_handlers(config, logger_name) if display_logger: dump_log_setup(logging.WARNING)
def main(opts, args): config = get_dataset_config(opts, args) provider = config.getPlugin('dataset', cls = DataProvider) blocks = provider.getBlocks() if len(blocks) == 0: raise DatasetError('No blocks!') datasets = set(imap(itemgetter(DataProvider.Dataset), blocks)) if len(datasets) > 1 or opts.info: headerbase = [(DataProvider.Dataset, 'Dataset')] else: print('Dataset: %s' % blocks[0][DataProvider.Dataset]) headerbase = [] if opts.list_datasets: list_datasets(blocks) if opts.list_blocks: list_blocks(blocks, headerbase) if opts.list_files: list_files(datasets, blocks) if opts.list_storage: list_storage(blocks, headerbase) if opts.metadata and not opts.save: list_metadata(datasets, blocks) if opts.block_metadata and not opts.save: list_block_metadata(datasets, blocks) if opts.config_entry: list_config_entries(opts, blocks, provider) if opts.info: list_infos(blocks) if opts.save: save_dataset(opts, provider)
def doTransfer(self, listDescSourceTarget): for (desc, source, target) in listDescSourceTarget: if not self.smPaths: raise ConfigError( "%s can't be transferred because '%s path wasn't set" % (desc, self.smOptPrefix)) for idx, sePath in enumerate(set(self.smPaths)): utils.vprint('Copy %s to SE %d ' % (desc, idx + 1), -1, newline=False) sys.stdout.flush() proc = se_copy(source, os.path.join(sePath, target), self.smForce) if proc.status(timeout=5 * 60, terminate=True) == 0: utils.vprint('finished', -1) else: utils.vprint('failed', -1) utils.eprint(proc.stderr.read(timeout=0)) utils.eprint( 'Unable to copy %s! You can try to copy it manually.' % desc) if not utils.getUserBool( 'Is %s (%s) available on SE %s?' % (desc, source, sePath), False): raise StorageError('%s is missing on SE %s!' % (desc, sePath))
def logging_setup(config): if config.getBool('debug mode', False, onChange = None): config.set('level', 'NOTSET', '?=') config.set('detail lower limit', 'NOTSET') config.set('detail upper limit', 'NOTSET') config.set('abort handler', 'stdout debug_file', '?=') config.setInt('abort code context', 2) config.setInt('abort variables', 2) config.setInt('abort file stack', 2) config.setInt('abort tree', 2) display_logger = config.getBool('display logger', False, onChange = None) # Find logger names in options logger_names = set() for option in config.getOptions(): if option in ['debug mode', 'display logger']: pass elif option.count(' ') == 0: logger_names.add('') else: logger_names.add(option.split(' ')[0].strip()) logger_names = sorted(logger_names) logger_names.reverse() for logger_name in logger_names: logging_create_handlers(config, logger_name) if display_logger: dump_log_setup(logging.WARNING)
def create_dbs3_proto_blocks(opts, dataset_blocks): for dataset in dataset_blocks: missing_info_blocks = [] dataset_types = set() for block in dataset_blocks[dataset]: block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []} (block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump) if len(block_dataset_types) > 1: raise Exception('Data and MC files are mixed in block %s#%s' % (dataset, block[DataProvider.BlockName])) elif len(block_dataset_types) == 1: yield (block, block_dump, block_size, block_dataset_types.pop()) else: missing_info_blocks.append((block, block_dump, block_size)) dataset_types.update(block_dataset_types) # collect dataset types in this dataset for blocks with missing type information if missing_info_blocks: if len(dataset_types) > 1: raise Exception('Data and MC files are mixed in dataset %s! Unable to determine dataset type for blocks without type info') elif len(dataset_types) == 0: if not opts.datatype: raise Exception('Please supply dataset type via --datatype!') dataset_type = opts.datatype else: dataset_type = dataset_types.pop() for (block, block_dump, block_size) in missing_info_blocks: yield (block, block_dump, block_size, dataset_type)
def _interact_with_child(self, pid, fd_parent_stdin, fd_parent_stdout, fd_parent_stderr): thread_in = self._start_watcher('stdin', False, pid, self._handle_input, fd_parent_stdin, self._buffer_stdin, self._event_shutdown) thread_out = self._start_watcher('stdout', False, pid, self._handle_output, fd_parent_stdout, self._buffer_stdout, self._event_shutdown) thread_err = self._start_watcher('stderr', False, pid, self._handle_output, fd_parent_stderr, self._buffer_stderr, self._event_shutdown) while self._status is None: # blocking (with spurious wakeups!) - OSError=unable to wait for child - status=False => OS_ABORT (result_pid, status) = ignore_exception(OSError, (pid, False), os.waitpid, pid, 0) if result_pid == pid: self._status = status self._time_finished = time.time() self._event_shutdown.set( ) # start shutdown of handlers and wait for it to finish self._buffer_stdin.finish() # wakeup process input handler thread_in.join() thread_out.join() thread_err.join() for fd_open in set( [fd_parent_stdin, fd_parent_stdout, fd_parent_stderr]): os.close(fd_open) # fd_parent_stdin == fd_parent_stdout for pty self._buffer_stdout.finish() # wakeup pending output buffer waits self._buffer_stderr.finish() self._event_finished.set()
def _display_setup(self, dataset_fn, head): if os.path.exists(dataset_fn): nick_name_set = set() for block in DataProvider.load_from_file( dataset_fn).get_block_list_cached(show_stats=False): nick_name_set.add(block[DataProvider.Nickname]) self._log.info('Mapping between nickname and other settings:') report = [] def _get_dataset_lookup_psrc(psrc): is_lookup_cls = isinstance( psrc, ParameterSource.get_class('LookupBaseParameterSource')) return is_lookup_cls and ('DATASETNICK' in psrc.get_parameter_deps()) ps_lookup = lfilter(_get_dataset_lookup_psrc, self._source.get_used_psrc_list()) for nick in sorted(nick_name_set): tmp = {'DATASETNICK': nick} for src in ps_lookup: src.fill_parameter_content(None, tmp) tmp[1] = str.join( ', ', imap(os.path.basename, self._nm_cfg.lookup(nick, '', is_selector=False))) tmp[2] = str_lumi_nice( self._nm_lumi.lookup(nick, '', is_selector=False)) report.append(tmp) ConsoleTable.create(head, report, 'cl')
def do_transfer(self, desc_source_target_list): for (desc, source, target) in desc_source_target_list: if not self._storage_paths: raise ConfigError( "%s can't be transferred because '%s path wasn't set" % (desc, self._storage_channel)) for idx, se_path in enumerate(set(self._storage_paths)): activity = Activity('Copy %s to SE %d ' % (desc, idx + 1)) proc = se_copy(source, os.path.join(se_path, target), self._storage_force) proc.status(timeout=5 * 60, terminate=True) activity.finish() if proc.status(timeout=0) == 0: self._log.info('Copy %s to SE %d finished', desc, idx + 1) else: self._log.info('Copy %s to SE %d failed', desc, idx + 1) self._log.log_process(proc) self._log.critical( 'Unable to copy %s! You can try to copy it manually.', desc) msg = 'Is %s (%s) available on SE %s?' % (desc, source, se_path) if not UserInputInterface().prompt_bool(msg, False): raise StorageError('%s is missing on SE %s!' % (desc, se_path))
def _displaySetup(self, dsPath, head): if os.path.exists(dsPath): nickNames = set() for block in DataProvider.loadFromFile(dsPath).getBlocks(): nickNames.add(block[DataProvider.Nickname]) log = logging.getLogger('user') log.info('Mapping between nickname and other settings:') report = [] (ps_basic, ps_nested) = self._pfactory.getLookupSources() if ps_nested: log.info( 'This list doesn\'t show "nickname constants" with multiple values!' ) for nick in sorted(nickNames): tmp = {'DATASETNICK': nick} for src in ps_basic: src.fillParameterInfo(None, tmp) tmp[1] = str.join( ', ', imap(os.path.basename, self._nmCfg.lookup(nick, '', is_selector=False))) tmp[2] = formatLumiNice( self._nmLumi.lookup(nick, '', is_selector=False)) report.append(tmp) utils.printTabular(head, report, 'cl')
def collapse_psp_list(psp_list, tracked_list, opts): psp_dict = {} psp_dict_nicks = {} header_list = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in tracked_list: tracked_list.remove('DATASETSPLIT') if opts.collapse == 1: tracked_list.append('DATASETNICK') header_list.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: header_list.append(('COLLATE_NICK', '# of nicks')) for pset in psp_list: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') hash_str = md5_hex(repr(lmap(lambda key: pset.get(str(key)), tracked_list))) psp_dict.setdefault(hash_str, []).append(pset) psp_dict_nicks.setdefault(hash_str, set()).add(nickname) def _do_collate(hash_str): psp = psp_dict[hash_str][0] psp['COLLATE_JOBS'] = len(psp_dict[hash_str]) psp['COLLATE_NICK'] = len(psp_dict_nicks[hash_str]) return psp psp_list = sorted(imap(_do_collate, psp_dict), key=lambda x: tuple(imap(str, x.values()))) return (header_list, psp_list)
def _get_graph(instance, graph=None, visited=None): graph = graph or {} children = _get_instance_children(instance) visited = visited or set() for child in children: child_module = '' if hasattr(child, '__module__'): child_module = child.__module__ or '' child_name = '' if hasattr(child, '__name__'): child_name = child.__name__ or '' child_class_name = child.__class__.__name__ or '' if 'grid_control' not in child_module: continue if 'testsuite' in child_name: continue if not issubclass(child.__class__, Plugin): continue if child_class_name in ['instancemethod', 'function', 'type', 'method-wrapper']: continue if child in (None, True, False): continue graph.setdefault(instance, []).append(child) if child not in visited: visited.add(child) _get_graph(child, graph, visited) return (graph, list(visited))
def _get_workflow_graph(workflow): (graph, node_list) = _get_graph(workflow) # Process nodes node_str_list = [] map_node2name = {} map_node2color = {} for node in sorted(node_list, key=lambda x: x.__class__.__name__): node_props = { 'label': '"%s"' % _get_node_label(node), 'fillcolor': '"%s"' % _get_node_color(node, map_node2color), 'style': '"filled"', } if node == workflow: node_props['root'] = 'True' node_prop_str = str.join('; ', imap(lambda key: '%s = %s' % (key, node_props[key]), node_props)) node_str_list.append('%s [%s];\n' % (_get_node_name(node, map_node2name), node_prop_str)) # Process edges edge_str_list = [] for entry in sorted(graph, key=lambda x: x.__class__.__name__): for child in sorted(set(graph[entry]), key=lambda x: x.__class__.__name__): edge_str_list.append('%s -> %s;\n' % (_get_node_name(entry, map_node2name), _get_node_name(child, map_node2name))) cluster_str_list = [] dot_header = 'digraph mygraph {\nmargin=0;\nedge [len=2];\noverlap=compress;splines=True;\n' dot_format_string_list = [dot_header] + node_str_list + cluster_str_list + edge_str_list + ['}\n'] return str.join('', dot_format_string_list)
def _get_graph(instance, graph=None, visited=None): graph = graph or {} children = _get_instance_children(instance) visited = visited or set() for child in children: child_module = '' if hasattr(child, '__module__'): child_module = child.__module__ or '' child_name = '' if hasattr(child, '__name__'): child_name = child.__name__ or '' child_class_name = child.__class__.__name__ or '' if 'grid_control' not in child_module: continue if 'testsuite' in child_name: continue if not issubclass(child.__class__, Plugin): continue if child_class_name in [ 'instancemethod', 'function', 'type', 'method-wrapper' ]: continue if child in (None, True, False): continue graph.setdefault(instance, []).append(child) if child not in visited: visited.add(child) _get_graph(child, graph, visited) return (graph, list(visited))
def _interact_with_child(self, pid, fd_parent_stdin, fd_parent_stdout, fd_parent_stderr): thread_in = self._start_thread('stdin', False, pid, self._handle_input, fd_parent_stdin, self._buffer_stdin, self._event_shutdown) thread_out = self._start_thread('stdout', False, pid, self._handle_output, fd_parent_stdout, self._buffer_stdout, self._event_shutdown) thread_err = self._start_thread('stderr', False, pid, self._handle_output, fd_parent_stderr, self._buffer_stderr, self._event_shutdown) while self._status is None: try: (result_pid, status) = os.waitpid(pid, 0) # blocking (with spurious wakeups!) except OSError: # unable to wait for child (result_pid, status) = (pid, False) # False == 'OS_ABORT' if result_pid == pid: self._status = status self._time_ended = time.time() self._event_shutdown.set( ) # start shutdown of handlers and wait for it to finish self._buffer_stdin.finish() # wakeup process input handler thread_in.join() thread_out.join() thread_err.join() for fd in set([fd_parent_stdin, fd_parent_stdout, fd_parent_stderr ]): # fd_parent_stdin == fd_parent_stdout for pty os.close(fd) self._buffer_stdout.finish() # wakeup pending output buffer waits self._buffer_stderr.finish() self._event_finished.set()
def create_dbs3_proto_blocks(opts, dataset_blocks): for dataset in dataset_blocks: missing_info_blocks = [] dataset_types = set() for block in dataset_blocks[dataset]: block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []} (block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump) if len(block_dataset_types) > 1: raise Exception('Data and MC files are mixed in block %s' % DataProvider.bName(block)) elif len(block_dataset_types) == 1: yield (block, block_dump, block_size, block_dataset_types.pop()) else: missing_info_blocks.append((block, block_dump, block_size)) dataset_types.update(block_dataset_types) # collect dataset types in this dataset for blocks with missing type information if missing_info_blocks: if len(dataset_types) > 1: raise Exception('Data and MC files are mixed in dataset %s! Unable to determine dataset type for blocks without type info') elif len(dataset_types) == 0: if not opts.datatype: raise Exception('Please supply dataset type via --datatype!') dataset_type = opts.datatype else: dataset_type = dataset_types.pop() for (block, block_dump, block_size) in missing_info_blocks: yield (block, block_dump, block_size, dataset_type)
def _check_get_jobnum_list(self, task, wms, jobnum_list): if self._defect_tries: num_defect = len( self._defect_counter ) # Waiting list gets larger in case reported == [] num_wait = num_defect - max( 1, int(num_defect / 2**self._defect_raster)) jobnum_list_wait = self._sample(self._defect_counter, num_wait) jobnum_list = lfilter( lambda jobnum: jobnum not in jobnum_list_wait, jobnum_list) (change, jobnum_list_timeout, reported) = JobManager._check_get_jobnum_list(self, task, wms, jobnum_list) for jobnum in reported: self._defect_counter.pop(jobnum, None) if self._defect_tries and (change is not None): # make 'raster' iteratively smaller self._defect_raster += 1 if reported: self._defect_raster = 1 for jobnum in ifilter(lambda x: x not in reported, jobnum_list): self._defect_counter[jobnum] = self._defect_counter.get( jobnum, 0) + 1 jobnum_list_kick = lfilter( lambda jobnum: self._defect_counter[jobnum] >= self. _defect_tries, self._defect_counter) if (len(reported) == 0) and (len(jobnum_list) == 1): jobnum_list_kick.extend(jobnum_list) for jobnum in set(jobnum_list_kick): jobnum_list_timeout.append(jobnum) self._defect_counter.pop(jobnum) return (change, jobnum_list_timeout, reported)
def _displaySetup(self, dsPath, head): if os.path.exists(dsPath): nickNames = set() for block in DataProvider.loadFromFile(dsPath).getBlocks(): nickNames.add(block[DataProvider.Nickname]) utils.vprint('Mapping between nickname and other settings:\n', -1) report = [] for nick in sorted(nickNames): lumi_filter_str = formatLumi( self._nmLumi.lookup(nick, '', is_selector=False)) if len(lumi_filter_str) > 4: nice_lumi_filter = '%s ... %s (%d entries)' % ( lumi_filter_str[0], lumi_filter_str[-1], len(lumi_filter_str)) else: nice_lumi_filter = str.join(', ', lumi_filter_str) config_files = self._nmCfg.lookup(nick, '', is_selector=False) tmp = { 0: nick, 1: str.join(', ', imap(os.path.basename, config_files)), 2: nice_lumi_filter } lookupvars = {'DATASETNICK': nick} for src in self._pm.lookupSources: src.fillParameterInfo(None, lookupvars) tmp.update(lookupvars) report.append(tmp) utils.printTabular(head, report, 'cl') utils.vprint(level=-1)
def __init__(self, config, name): self._name = name # needed for changeView calls before the constructor head = [('DATASETNICK', 'Nickname')] # Mapping between nickname and config files: self._nmCfg = config.getLookup('nickname config', {}, defaultMatcher = 'regex', parser = lambda x: lmap(str.strip, x.split(',')), strfun = lambda x: str.join(',', x)) if not self._nmCfg.empty(): allConfigFiles = sorted(set(ichain(self._nmCfg.get_values()))) config.set('config file', str.join('\n', allConfigFiles)) head.append((1, 'Config file')) elif config.get('config file', ''): raise ConfigError("Please use 'nickname config' instead of 'config file'") # Mapping between nickname and constants - only display - work is handled by the 'normal' parameter factory nmCName = config.getList('nickname constants', [], onChange = None) param_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, addSections = ['parameters']) param_config.set('constants', str.join(' ', nmCName), '+=') for cName in nmCName: param_config.set(cName + ' matcher', 'regex') param_config.set(cName + ' lookup', 'DATASETNICK') head.append((cName, cName)) # Mapping between nickname and lumi filter - only display - work is handled by the 'normal' lumi filter config.set('lumi filter matcher', 'regex') if 'nickname lumi filter' in config.getOptions(): config.set('lumi filter', strDictLong(config.getDict('nickname lumi filter', {}, onChange = None))) self._nmLumi = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = None) if not self._nmLumi.empty(): head.append((2, 'Lumi filter')) CMSSW.__init__(self, config, name) self._displaySetup(config.getWorkPath('datacache.dat'), head)
def get_workflow_graph(workflow): graph = getGraph(workflow) classCluster = {} for entry in graph: classCluster.setdefault(getNodeParent(entry.__class__), []).append(entry) clusters = '' globalNodes = [] colors = {} for (cluster_id, classClusterEntries) in enumerate(classCluster.values()): if len(classClusterEntries) == 1: globalNodes.append(classClusterEntries[0]) clusters += 'subgraph cluster_%d {' % cluster_id for node in classClusterEntries: clusters += '%s [label="%s", fillcolor="%s", style="filled"];\n' % ( getNodeName(node), getNodeLabel(node), getNodeColor(node, colors)) clusters += '}\n' edgeStr = '' for entry in sorted(graph, key=lambda x: x.__class__.__name__): for child in sorted(set(graph[entry]), key=lambda x: x.__class__.__name__): edgeStr += '%s -> %s;\n' % (getNodeName(entry), getNodeName(child)) header = 'digraph mygraph {\nmargin=0;\noverlap=scale;splines=True;\n' footer = '}\n' return header + clusters + edgeStr + footer
def doTransfer(self, listDescSourceTarget): for (desc, source, target) in listDescSourceTarget: if not self.smPaths: raise ConfigError( "%s can't be transferred because '%s path wasn't set" % (desc, self.smOptPrefix)) for idx, sePath in enumerate(set(self.smPaths)): activity = Activity('Copy %s to SE %d ' % (desc, idx + 1)) proc = se_copy(source, os.path.join(sePath, target), self.smForce) proc.status(timeout=5 * 60, terminate=True) activity.finish() if proc.status(timeout=0) == 0: self._log.info('Copy %s to SE %d finished', desc, idx + 1) else: self._log.info('Copy %s to SE %d failed', desc, idx + 1) self._log.critical(proc.stderr.read(timeout=0)) self._log.critical( 'Unable to copy %s! You can try to copy it manually.', desc) if not utils.getUserBool( 'Is %s (%s) available on SE %s?' % (desc, source, sePath), False): raise StorageError('%s is missing on SE %s!' % (desc, sePath))
def getEntries(self, path, metadata, events, seList, objStore): datacachePath = os.path.join(objStore.get('GC_WORKDIR', ''), 'datacache.dat') source = utils.QM((self._source == '') and os.path.exists(datacachePath), datacachePath, self._source) if source and (source not in self._lfnMap): pSource = DataProvider.createInstance('ListProvider', createConfig(), source) for (n, fl) in imap( lambda b: (b[DataProvider.Dataset], b[DataProvider.FileList]), pSource.getBlocks()): self._lfnMap.setdefault(source, {}).update( dict( imap( lambda fi: (self.lfnTrans(fi[DataProvider.URL]), n), fl))) pList = set() for key in ifilter(lambda k: k in metadata, self._parentKeys): pList.update( imap( lambda pPath: self._lfnMap.get(source, {}).get( self.lfnTrans(pPath)), metadata[key])) metadata['PARENT_PATH'] = lfilter(identity, pList) yield (path, metadata, events, seList, objStore)
def uniqueListLR(inList): # (left to right) tmpSet, result = (set(), []) # Duplicated items are removed from the right [a,b,a] -> [a,b] for x in inList: if x not in tmpSet: result.append(x) tmpSet.add(x) return result
def getGraph(instance, graph=None, visited=None): graph = graph or {} visited = visited or set() children = [] for attr in dir(instance): child = getattr(instance, attr) try: children.extend(child) children.extend(child.values()) except Exception: children.append(child) for child in children: try: if 'grid_control' not in child.__module__: continue if child.__class__.__name__ in [ 'instancemethod', 'function', 'type' ]: continue graph.setdefault(instance, []).append(child) if child not in visited: visited.add(child) getGraph(child, graph, visited) except Exception: pass return graph
def logging_create_handlers(config, logger_name): LogLevelEnum = makeEnum(lmap(lambda level: logging.getLevelName(level).upper(), irange(51))) logger = logging.getLogger(logger_name.lower()) # Set logging level logger.setLevel(config.getEnum(logger_name + ' level', LogLevelEnum, logger.level, onChange = None)) # Set propagate status logger.propagate = config.getBool(logger_name + ' propagate', bool(logger.propagate), onChange = None) # Setup handlers if logger_name + ' handler' in config.getOptions(): # remove any standard handlers: for handler in list(logger.handlers): logger.removeHandler(handler) handler_list = config.getList(logger_name + ' handler', [], onChange = None) for handler_str in set(handler_list): # add only unique output handlers if handler_str == 'stdout': handler = StdoutStreamHandler() elif handler_str == 'stderr': handler = StderrStreamHandler() elif handler_str == 'file': handler = logging.FileHandler(config.get(logger_name + ' file', onChange = None), 'w') elif handler_str == 'debug_file': handler = GCLogHandler(config.get(logger_name + ' debug file', onChange = None), 'w') else: raise Exception('Unknown handler %s for logger %s' % (handler_str, logger_name)) logger.addHandler(logging_configure_handler(config, logger_name, handler_str, handler))
def getGraph(instance, graph = None, visited = None): graph = graph or {} visited = visited or set() children = [] for attr in dir(instance): child = getattr(instance, attr) try: children.extend(child) children.extend(child.values()) except Exception: children.append(child) for child in children: try: if 'grid_control' not in child.__module__: continue if child.__class__.__name__ in ['instancemethod', 'function', 'type']: continue if child in (None, True, False): continue graph.setdefault(instance, []).append(child) if child not in visited: visited.add(child) getGraph(child, graph, visited) except Exception: clear_current_exception() return graph
def display(self): (catStateDict, catDescDict, _) = CategoryBaseReport._getCategoryStateSummary(self) infos = [] head = set() stateCat = { Job.SUCCESS: 'SUCCESS', Job.FAILED: 'FAILED', Job.RUNNING: 'RUNNING', Job.DONE: 'RUNNING' } for catKey in catDescDict: tmp = dict(catDescDict[catKey]) head.update(tmp.keys()) for stateKey in catStateDict[catKey]: state = stateCat.get(stateKey, 'WAITING') tmp[state] = tmp.get(state, 0) + catStateDict[catKey][stateKey] infos.append(tmp) stateCatList = ['WAITING', 'RUNNING', 'FAILED', 'SUCCESS'] utils.printTabular(lmap(lambda x: (x, x), sorted(head) + stateCatList), infos, 'c' * len(head), fmt=dict.fromkeys( stateCatList, lambda x: '%7d' % parseStr(x, int, 0)))
def _check_get_jobnum_list(self, task, wms, jobnum_list): if self._defect_tries: num_defect = len(self._defect_counter) # Waiting list gets larger in case reported == [] num_wait = num_defect - max(1, int(num_defect / 2 ** self._defect_raster)) jobnum_list_wait = self._sample(self._defect_counter, num_wait) jobnum_list = lfilter(lambda jobnum: jobnum not in jobnum_list_wait, jobnum_list) (change, jobnum_list_timeout, reported) = JobManager._check_get_jobnum_list( self, task, wms, jobnum_list) for jobnum in reported: self._defect_counter.pop(jobnum, None) if self._defect_tries and (change is not None): # make 'raster' iteratively smaller self._defect_raster += 1 if reported: self._defect_raster = 1 for jobnum in ifilter(lambda x: x not in reported, jobnum_list): self._defect_counter[jobnum] = self._defect_counter.get(jobnum, 0) + 1 jobnum_list_kick = lfilter(lambda jobnum: self._defect_counter[jobnum] >= self._defect_tries, self._defect_counter) if (len(reported) == 0) and (len(jobnum_list) == 1): jobnum_list_kick.extend(jobnum_list) for jobnum in set(jobnum_list_kick): jobnum_list_timeout.append(jobnum) self._defect_counter.pop(jobnum) return (change, jobnum_list_timeout, reported)
def process_dbs3_json_blocks(opts, block_dump_iter): log = logging.getLogger('dbs3-migration') log.setLevel(logging.DEBUG) # dry run without import - just store block dumps in temp dir if opts.do_import: return dump_dbs3_json(opts.tempdir, block_dump_iter) # set-up dbs clients dbs3_target_client = DBS3LiteClient(url=opts.target_instance) dbs3_source_client = DBS3LiteClient(url=opts.source_instance) dbs3_migration_queue = DBS3MigrationQueue() dbs3_migration_file = os.path.join(opts.tempdir, 'dbs3_migration.pkl') # migrate parents and register datasets with dbs3 for block_dump in block_dump_iter: if not opts.continue_migration: # initiate the dbs3 to dbs3 migration of parent blocks log.debug('Checking parentage for block: %s', block_dump['block']['block_name']) unique_parent_lfns = set(imap(lambda parent: parent['parent_logical_file_name'], block_dump['file_parent_list'])) unique_blocks = set() for parent_lfn in unique_parent_lfns: for block in dbs3_source_client.get_dbs_block_list(logical_file_name=parent_lfn): unique_blocks.add(block['block_name']) for parent_block in unique_blocks: if dbs3_target_client.get_dbs_block_list(block_name=parent_block): log.debug('Block %s is already at destination', parent_block) continue migration_task = MigrationTask(block_name=parent_block, migration_url=opts.dbsSource, dbs_client=dbs3_target_client) try: dbs3_migration_queue.add_migration_task(migration_task) except AlreadyQueued: log.exception('Already queued') clear_current_exception() dbs3_migration_queue.save_to_disk(dbs3_migration_file) else: try: dbs3_migration_queue = DBS3MigrationQueue.read_from_disk(dbs3_migration_file) except IOError: log.exception('Probably, there is no DBS 3 migration for this dataset ongoing') raise # wait for all parent blocks migrated to dbs3 do_migration(dbs3_migration_queue) # insert block into dbs3 dbs3_target_client.insert_dbs_block_dump(block_dump)
def process(self, pNum, splitInfo, result): for idx, mkey in enumerate( splitInfo.get(DataSplitter.MetadataHeader, [])): if mkey in self._metadata: tmp = set( imap(lambda x: x[idx], splitInfo[DataSplitter.Metadata])) if len(tmp) == 1: result[mkey] = tmp.pop()
def report(): for nick in sorted(set(self.nmCfg.keys() + self.nmConst.keys() + self.nmLumi.keys())): tmp = { 0: nick, 1: str.join(", ", map(os.path.basename, self.nmCfg.get(nick, ""))), 2: self.displayLumi(self.nmLumi.get(nick, "")), } yield utils.mergeDicts([tmp, self.nmConst.get(nick, {})])
def getNodes(self): (result, active) = (set(), False) for group in utils.LoggedProcess(self.configExec, '-shgrpl').iter(): result.add(group.strip()) for host in utils.LoggedProcess(self.configExec, '-shgrp_resolved %s' % group).iter(): result.update(host.split()) if len(result) > 0: return list(result)
def execute(self, wmsIDs): # yields list of (wmsID, job_status, job_info) checked_ids = set() for (wmsID, job_status, job_info) in self._executor.execute(wmsIDs): checked_ids.add(wmsID) yield (wmsID, job_status, job_info) if self._executor.get_status() == CheckStatus.OK: for wmsID in wmsIDs: if wmsID not in checked_ids: yield (wmsID, self._missing_state, {})
def show_report(self, job_db, jobnum_list): (header_list, job_env_dict_list, vn_set) = ([], [], set()) for jobnum in jobnum_list: job_env_dict = self._task.get_job_dict(jobnum) vn_set.update(job_env_dict) job_env_dict.update(self._task.get_transient_variables()) job_env_dict_list.append(job_env_dict) header_list.extend(imap(lambda key: (key, '<%s>' % key), self._task.get_transient_variables())) self._show_table(sorted(header_list + lzip(vn_set, vn_set)), job_env_dict_list)
def cms_name_to_se(self, cms_name): cms_name_regex = re.compile(cms_name.replace('*', '.*').replace('%', '.*')) def _select_psn_site(site): return site['type'] == 'psn' and cms_name_regex.match(site['alias']) psn_site_names = ifilter(_select_psn_site, self._query('site-names')) site_aliases = set(imap(lambda x: x['alias'], psn_site_names)) def _select_se(resource): return (resource['type'] == 'SE') and (resource['alias'] in site_aliases) return lmap(lambda x: x['fqdn'], ifilter(_select_se, self._query('site-resources')))
def _cleanup_running(self): # clean running activity list running_thread_names = set(imap(get_thread_name, threading.enumerate())) for thread_name in list(Activity.running_by_thread_name): if thread_name not in running_thread_names: finished_activities = Activity.running_by_thread_name.get(thread_name, []) while finished_activities: finished_activities[-1].finish() Activity.running_by_thread_name.pop(thread_name, None)
def list_parameters(opts, psource): (result, needGCParam) = get_parameters(opts, psource) enabledOutput = opts.output.split(',') output = lfilter(lambda k: not opts.output or k in enabledOutput, psource.getJobKeys()) stored = lfilter(lambda k: k.untracked == False, output) untracked = lfilter(lambda k: k.untracked == True, output) if opts.collapse > 0: result_old = result result = {} result_nicks = {} head = [('COLLATE_JOBS', '# of jobs')] if 'DATASETSPLIT' in stored: stored.remove('DATASETSPLIT') if opts.collapse == 1: stored.append('DATASETNICK') head.append(('DATASETNICK', 'DATASETNICK')) elif opts.collapse == 2: head.append(('COLLATE_NICK', '# of nicks')) for pset in result_old: if ('DATASETSPLIT' in pset) and (opts.collapse == 1): pset.pop('DATASETSPLIT') nickname = None if ('DATASETNICK' in pset) and (opts.collapse == 2): nickname = pset.pop('DATASETNICK') h = md5_hex(repr(lmap(lambda key: pset.get(str(key)), stored))) result.setdefault(h, []).append(pset) result_nicks.setdefault(h, set()).add(nickname) def doCollate(h): tmp = result[h][0] tmp['COLLATE_JOBS'] = len(result[h]) tmp['COLLATE_NICK'] = len(result_nicks[h]) return tmp result = lmap(doCollate, result) else: head = [('GC_JOB_ID', '#')] if needGCParam: head.append(('GC_PARAM', 'GC_PARAM')) if opts.active: head.append((ParameterInfo.ACTIVE, 'ACTIVE')) if opts.visible: stored = opts.visible.split(',') head.extend(sorted(izip(stored, stored))) if opts.untracked: head.extend( sorted( imap( lambda n: (n, '(%s)' % n), ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'], untracked)))) utils.vprint('') utils.printTabular(head, result)
def execute(self, wms_id_list): # yields list of (wms_id, job_status, job_info) checked_ids = set() for (wms_id, job_status, job_info) in self._executor.execute(wms_id_list): checked_ids.add(wms_id) yield (wms_id, job_status, job_info) if self._executor.get_status() == CheckStatus.OK: for wms_id in wms_id_list: if wms_id not in checked_ids: yield (wms_id, self._missing_state, {})
def get_dataset_name_list(self): if self._cache_dataset is None: self._cache_dataset = set() exc = ExceptionCollector() for provider in self._provider_list: try: self._cache_dataset.update(provider.get_dataset_name_list()) except Exception: exc.collect() exc.raise_any(DatasetError('Could not retrieve all datasets!')) return list(self._cache_dataset)
def _processFI(self, block, idxRuns, idxLumi): for fi in block[DataProvider.FileList]: if (not self._lumi_filter.empty()) and not self._acceptLumi(block, fi, idxRuns, idxLumi): continue if (self._lumi_keep == LumiKeep.Run) and (idxLumi is not None): if idxRuns is not None: fi[DataProvider.Metadata][idxRuns] = list(set(fi[DataProvider.Metadata][idxRuns])) fi[DataProvider.Metadata].pop(idxLumi) elif self._lumi_keep == LumiKeep.none: removeRunLumi(fi[DataProvider.Metadata], idxRuns, idxLumi) yield fi
def process(self, pNum, splitInfo, result): for idx, mkey in enumerate(splitInfo.get(DataSplitter.MetadataHeader, [])): if mkey in self._metadata: def getMetadataProtected(x): if idx < len(x): return x[idx] tmp = set(imap(getMetadataProtected, splitInfo[DataSplitter.Metadata])) if len(tmp) == 1: value = tmp.pop() if value is not None: result[mkey] = value