Example #1
0
	def _get_possible_merge_categories(self, map_cat2desc):
		# Get dictionary with categories that will get merged when removing a variable
		def _eq_dict(dict_a, dict_b, key):
			# Merge parameters to reach category goal - NP hard problem, so be greedy and quick!
			dict_a = dict(dict_a)
			dict_b = dict(dict_b)
			dict_a.pop(key)
			dict_b.pop(key)
			return dict_a == dict_b

		var_key_result = {}
		cat_key_search_dict = {}
		for cat_key in map_cat2desc:
			for var_key in map_cat2desc[cat_key]:
				if var_key not in cat_key_search_dict:
					cat_key_search = set(map_cat2desc.keys())
				else:
					cat_key_search = cat_key_search_dict[var_key]
				if cat_key_search:
					matches = lfilter(lambda ck: _eq_dict(map_cat2desc[cat_key],
						map_cat2desc[ck], var_key), cat_key_search)
					if matches:
						cat_key_search_dict[var_key] = cat_key_search.difference(set(matches))
						var_key_result.setdefault(var_key, []).append(matches)
		return var_key_result
Example #2
0
	def _resync(self):
		if self._data_provider:
			activity = Activity('Performing resync of datasource %r' % self._name)
			# Get old and new dataset information
			ds_old = DataProvider.loadFromFile(self._getDataPath('cache.dat')).getBlocks(show_stats = False)
			self._data_provider.clearCache()
			ds_new = self._data_provider.getBlocks(show_stats = False)
			self._data_provider.saveToFile(self._getDataPath('cache-new.dat'), ds_new)

			# Use old splitting information to synchronize with new dataset infos
			old_maxN = self._data_splitter.getMaxJobs()
			jobChanges = self._data_splitter.resyncMapping(self._getDataPath('map-new.tar'), ds_old, ds_new)
			activity.finish()
			if jobChanges is not None:
				# Move current splitting to backup and use the new splitting from now on
				def backupRename(old, cur, new):
					if self._keepOld:
						os.rename(self._getDataPath(cur), self._getDataPath(old))
					os.rename(self._getDataPath(new), self._getDataPath(cur))
				backupRename(  'map-old-%d.tar' % time.time(),   'map.tar',   'map-new.tar')
				backupRename('cache-old-%d.dat' % time.time(), 'cache.dat', 'cache-new.dat')
				self._data_splitter.importPartitions(self._getDataPath('map.tar'))
				self._maxN = self._data_splitter.getMaxJobs()
				self._log.debug('Dataset resync finished: %d -> %d partitions', old_maxN, self._maxN)
				return (set(jobChanges[0]), set(jobChanges[1]), old_maxN != self._maxN)
Example #3
0
	def getCMSFiles(self, blockPath):
		lumiDict = {}
		if self.selectedLumis: # Central lumi query
			lumiDict = self.getCMSLumisImpl(blockPath)
			lumiDict = QM(lumiDict, lumiDict, {})
		for (fileInfo, listLumi) in self.getCMSFilesImpl(blockPath, self.onlyValid, self.selectedLumis):
			if self.selectedLumis:
				if not listLumi:
					listLumi = lumiDict.get(fileInfo[DataProvider.URL], [])
				def acceptLumi():
					for (run, lumiList) in listLumi:
						for lumi in lumiList:
							if selectLumi((run, lumi), self.selectedLumis):
								return True
				if not acceptLumi():
					continue
				if self.includeLumi:
					(listLumiExt_Run, listLumiExt_Lumi) = ([], [])
					for (run, lumi_list) in sorted(listLumi):
						for lumi in lumi_list:
							listLumiExt_Run.append(run)
							listLumiExt_Lumi.append(lumi)
					fileInfo[DataProvider.Metadata] = [listLumiExt_Run, listLumiExt_Lumi]
				else:
					fileInfo[DataProvider.Metadata] = [list(sorted(set(map(lambda (run, lumi_list): run, listLumi))))]
			yield fileInfo
	def _resync_psrc(self):
		activity = Activity('Performing resync of datasource %r' % self.get_datasource_name())
		# Get old and new dataset information
		provider_old = DataProvider.load_from_file(self._get_data_path('cache.dat'))
		block_list_old = provider_old.get_block_list_cached(show_stats=False)
		self._provider.clear_cache()
		block_list_new = self._provider.get_block_list_cached(show_stats=False)
		self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new)

		# Use old splitting information to synchronize with new dataset infos
		partition_len_old = self.get_parameter_len()
		partition_changes = self._resync_partitions(
			self._get_data_path('map-new.tar'), block_list_old, block_list_new)
		activity.finish()
		if partition_changes is not None:
			# Move current splitting to backup and use the new splitting from now on
			def _rename_with_backup(new, cur, old):
				if self._keep_old:
					os.rename(self._get_data_path(cur), self._get_data_path(old))
				os.rename(self._get_data_path(new), self._get_data_path(cur))
			_rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time())
			_rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time())
			self._set_reader(DataSplitter.load_partitions(self._get_data_path('map.tar')))
			self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len)
			(pnum_list_redo, pnum_list_disable) = partition_changes
			return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)
	def getSubmissionJobs(self, maxsample, static = {'showBlocker': True}):
		# Get list of submittable jobs
		readyList = self.jobDB.getJobs(ClassSelector(JobClass.READY))
		retryOK = readyList
		defaultJob = Job()
		if self.maxRetry >= 0:
			retryOK = filter(lambda x: self.jobDB.get(x, defaultJob).attempt - 1 < self.maxRetry, readyList)
		modOK = filter(self._task.canSubmit, readyList)
		jobList = set.intersection(set(retryOK), set(modOK))

		if static['showBlocker'] and len(readyList) > 0 and len(jobList) == 0: # No submission but ready jobs
			err = []
			err += utils.QM(len(retryOK) > 0 and len(modOK) == 0, [], ['have hit their maximum number of retries'])
			err += utils.QM(len(retryOK) == 0 and len(modOK) > 0, [], ['are vetoed by the task module'])
			utils.vprint('All remaining jobs %s!' % str.join(utils.QM(retryOK or modOK, ' or ', ' and '), err), -1, True)
		static['showBlocker'] = not (len(readyList) > 0 and len(jobList) == 0)

		# Determine number of jobs to submit
		submit = len(jobList)
		if self.inQueue > 0:
			submit = min(submit, self.inQueue - self.jobDB.getJobsN(ClassSelector(JobClass.ATWMS)))
		if self.inFlight > 0:
			submit = min(submit, self.inFlight - self.jobDB.getJobsN(ClassSelector(JobClass.PROCESSING)))
		if self.continuous:
			submit = min(submit, maxsample)
		submit = max(submit, 0)

		if self.doShuffle:
			return self.sample(jobList, submit)
		else:
			return sorted(jobList)[:submit]
	def _getSubmissionJobs(self, maxsample):
		# Get list of submittable jobs
		readyList = self.jobDB.getJobs(ClassSelector(JobClass.READY))
		retryOK = readyList
		defaultJob = Job()
		if self._job_retries >= 0:
			retryOK = lfilter(lambda x: self.jobDB.get(x, defaultJob).attempt - 1 < self._job_retries, readyList)
		modOK = lfilter(self._task.canSubmit, readyList)
		jobList = set.intersection(set(retryOK), set(modOK))

		if self._showBlocker and readyList and not jobList: # No submission but ready jobs
			err = []
			err += utils.QM((len(retryOK) > 0) and (len(modOK) == 0), [], ['have hit their maximum number of retries'])
			err += utils.QM((len(retryOK) == 0) and (len(modOK) > 0), [], ['are vetoed by the task module'])
			self._log_user_time.warning('All remaining jobs %s!', str.join(utils.QM(retryOK or modOK, ' or ', ' and '), err))
		self._showBlocker = not (len(readyList) > 0 and len(jobList) == 0)

		# Determine number of jobs to submit
		submit = len(jobList)
		if self._njobs_inqueue > 0:
			submit = min(submit, self._njobs_inqueue - self.jobDB.getJobsN(ClassSelector(JobClass.ATWMS)))
		if self._njobs_inflight > 0:
			submit = min(submit, self._njobs_inflight - self.jobDB.getJobsN(ClassSelector(JobClass.PROCESSING)))
		if self._continuous and (maxsample > 0):
			submit = min(submit, maxsample)
		submit = max(submit, 0)

		if self._do_shuffle:
			return self._sample(jobList, submit)
		return sorted(jobList)[:submit]
Example #7
0
	def __init__(self, config, datasource_name):
		BaseConsistencyProcessor.__init__(self, config, datasource_name)
		self._check_url = config.get_enum(self._get_dproc_opt('check unique url'),
			DatasetUniqueMode, DatasetUniqueMode.abort)
		self._check_block = config.get_enum(self._get_dproc_opt('check unique block'),
			DatasetUniqueMode, DatasetUniqueMode.abort)
		(self._recorded_url, self._recorded_block) = (set(), set())
Example #8
0
	def _resync_adapter(self, pa_old, pa_new, result_redo, result_disable, size_change):
		(map_jobnum2pnum, pspi_list_added, pspi_list_missing) = _diff_pspi_list(pa_old, pa_new,
			result_redo, result_disable)
		# Reorder and reconstruct parameter space with the following layout:
		# NNNNNNNNNNNNN OOOOOOOOO | source: NEW (==self) and OLD (==from file)
		# <same><added> <missing> | same: both in NEW and OLD, added: only in NEW, missing: only in OLD
		if pspi_list_added:
			_extend_map_jobnum2pnum(map_jobnum2pnum, pa_old.get_job_len(), pspi_list_added)
		if pspi_list_missing:
			# extend the parameter source by placeholders for the missing parameter space points
			psrc_missing = _create_placeholder_psrc(pa_old, pa_new,
				map_jobnum2pnum, pspi_list_missing, result_disable)
			self._psrc = ParameterSource.create_instance('ChainParameterSource',
				self._psrc_raw, psrc_missing)

		self._map_jobnum2pnum = map_jobnum2pnum  # Update Job2PID map
		# Write resynced state
		self._write_jobnum2pnum(self._path_jobnum2pnum + '.tmp')
		ParameterSource.get_class('GCDumpParameterSource').write(self._path_params + '.tmp',
			self.get_job_len(), self.get_job_metadata(), self.iter_jobs())
		os.rename(self._path_jobnum2pnum + '.tmp', self._path_jobnum2pnum)
		os.rename(self._path_params + '.tmp', self._path_params)

		result_redo = result_redo.difference(result_disable)
		if result_redo or result_disable:
			map_pnum2jobnum = reverse_dict(self._map_jobnum2pnum)

			def _translate_pnum(pnum):
				return map_pnum2jobnum.get(pnum, pnum)
			result_redo = set(imap(_translate_pnum, result_redo))
			result_disable = set(imap(_translate_pnum, result_disable))
			return (result_redo, result_disable, size_change)
		return (set(), set(), size_change)
Example #9
0
 def resync_psrc(self):
     (psrc_redo, psrc_disable, _) = self._psrc.resync_psrc()
     result_redo = set(lfilter(lambda pnum: pnum < self._max_len,
                               psrc_redo))
     result_disable = set(
         lfilter(lambda pnum: pnum < self._max_len, psrc_disable))
     self._psrc_len = self._psrc.get_parameter_len()
     return (result_redo, result_disable, False
             )  # size can never change on-the-fly
Example #10
0
 def __init__(self, config, datasource_name):
     BaseConsistencyProcessor.__init__(self, config, datasource_name)
     self._check_url = config.get_enum(
         self._get_dproc_opt('check unique url'), DatasetUniqueMode,
         DatasetUniqueMode.abort)
     self._check_block = config.get_enum(
         self._get_dproc_opt('check unique block'), DatasetUniqueMode,
         DatasetUniqueMode.abort)
     (self._recorded_url, self._recorded_block) = (set(), set())
Example #11
0
def process_dbs3_json_blocks(opts, block_dump_iter):
    logger = logging.getLogger('dbs3-migration')
    logger.setLevel(logging.DEBUG)

    # dry run without import - just store block dumps in temp dir
    if opts.do_import:
        return dump_dbs3_json(opts.tempdir, block_dump_iter)
    # set-up dbs clients
    dbs3_target_client = DBS3LiteClient(url=opts.target_instance)
    dbs3_source_client = DBS3LiteClient(url=opts.source_instance)
    dbs3_migration_queue = DBS3MigrationQueue()
    dbs3_migration_file = os.path.join(opts.tempdir, 'dbs3_migration.pkl')

    # migrate parents and register datasets with dbs3
    for blockDump in block_dump_iter:
        if not opts.continue_migration:
            # initiate the dbs3 to dbs3 migration of parent blocks
            logger.debug('Checking parentage for block: %s',
                         blockDump['block']['block_name'])
            unique_parent_lfns = set(
                imap(lambda parent: parent['parent_logical_file_name'],
                     blockDump['file_parent_list']))
            unique_blocks = set()
            for parent_lfn in unique_parent_lfns:
                for block in dbs3_source_client.listBlocks(
                        logical_file_name=parent_lfn):
                    unique_blocks.add(block['block_name'])
            for parent_block in unique_blocks:
                if dbs3_target_client.listBlocks(
                        block_name=parent_block
                ):  # block already at destination
                    logger.debug('Block %s is already at destination',
                                 parent_block)
                    continue
                migration_task = MigrationTask(block_name=parent_block,
                                               migration_url=opts.dbsSource,
                                               dbs_client=dbs3_target_client)
                try:
                    dbs3_migration_queue.add_migration_task(migration_task)
                except AlreadyQueued as aq:
                    logger.debug(aq.message)
            dbs3_migration_queue.save_to_disk(dbs3_migration_file)
        else:
            try:
                dbs3_migration_queue = DBS3MigrationQueue.read_from_disk(
                    dbs3_migration_file)
            except IOError:
                logger.exception(
                    'Probably, there is no DBS 3 migration for this dataset ongoing'
                )
                raise

        # wait for all parent blocks migrated to dbs3
        do_migration(dbs3_migration_queue)
        # insert block into dbs3
        dbs3_target_client.insertBulkBlock(blockDump)
Example #12
0
	def _resync(self):  # This function is _VERY_ time critical!
		tmp = self._psrc_raw.resync_psrc()  # First ask about psrc changes
		(result_redo, result_disable, size_change) = (set(tmp[0]), set(tmp[1]), tmp[2])
		psrc_hash_new = self._psrc_raw.get_psrc_hash()
		psrc_hash_changed = self._psrc_hash_stored != psrc_hash_new
		self._psrc_hash_stored = psrc_hash_new
		if not (result_redo or result_disable or size_change or psrc_hash_changed):
			return ParameterSource.get_empty_resync_result()

		ps_old = ParameterSource.create_instance('GCDumpParameterSource', self._path_params)
		pa_old = ParameterAdapter(None, ps_old)
		pa_new = ParameterAdapter(None, self._psrc_raw)
		return self._resync_adapter(pa_old, pa_new, result_redo, result_disable, size_change)
Example #13
0
		def getKeyMergeResults():
			varKeyResult = {}
			catKeySearchDict = {}
			for catKey in catDescDict:
				for varKey in catDescDict[catKey]:
					if varKey not in catKeySearchDict:
						catKeySearch = set(catDescDict.keys())
					else:
						catKeySearch = catKeySearchDict[varKey]
					if catKeySearch:
						matches = filter(lambda ck: eqDict(catDescDict[catKey], catDescDict[ck], varKey), catKeySearch)
						if matches:
							catKeySearchDict[varKey] = catKeySearch.difference(set(matches))
							varKeyResult.setdefault(varKey, []).append(matches)
			return varKeyResult
Example #14
0
def process_fwjr(sample, fwjr_xml_dom, map_sample2run_info_dict,
                 map_sample2input_events, map_sample2output_events):
    def _get_element_data(node, name):
        return node.getElementsByTagName(name)[0].childNodes[0].data

    # Collect run lumi information
    for run_node in fwjr_xml_dom.getElementsByTagName('Run'):
        for lumi_node in run_node.getElementsByTagName('LumiSection'):
            run = int(run_node.getAttribute('ID'))
            lumi = int(lumi_node.getAttribute('ID'))
            map_sample2run_info_dict.setdefault(sample,
                                                {}).setdefault(run,
                                                               set()).add(lumi)
    # Collect output file information
    for output_file_node in fwjr_xml_dom.getElementsByTagName('File'):
        pfn = _get_element_data(output_file_node, 'PFN')
        if pfn not in map_sample2output_events.setdefault(sample, {}):
            map_sample2output_events[sample][pfn] = 0
        map_sample2output_events[sample][pfn] += int(
            _get_element_data(output_file_node, 'TotalEvents'))
    # Collect input file information
    for input_file_node in fwjr_xml_dom.getElementsByTagName('InputFile'):
        if sample not in map_sample2input_events:
            map_sample2input_events[sample] = 0
        map_sample2input_events[sample] += int(
            _get_element_data(input_file_node, 'EventsRead'))
Example #15
0
	def _get_sandbox_file_list(self, task, sm_list):
		# Prepare all input files
		dep_list = set(ichain(imap(lambda x: x.get_dependency_list(), [task] + sm_list)))
		dep_fn_list = lmap(lambda dep: resolve_path('env.%s.sh' % dep,
			lmap(lambda pkg: get_path_share('', pkg=pkg), os.listdir(get_path_pkg()))), dep_list)
		task_config_dict = dict_union(self._remote_event_handler.get_mon_env_dict(),
			*imap(lambda x: x.get_task_dict(), [task] + sm_list))
		task_config_dict.update({'GC_DEPFILES': str.join(' ', dep_list),
			'GC_USERNAME': self._token.get_user_name(), 'GC_WMS_NAME': self._name})
		task_config_str_list = DictFormat(escape_strings=True).format(
			task_config_dict, format='export %s%s%s\n')
		vn_alias_dict = dict(izip(self._remote_event_handler.get_mon_env_dict().keys(),
			self._remote_event_handler.get_mon_env_dict().keys()))
		vn_alias_dict.update(task.get_var_alias_map())
		vn_alias_str_list = DictFormat(delimeter=' ').format(vn_alias_dict, format='%s%s%s\n')

		# Resolve wildcards in task input files
		def _get_task_fn_list():
			for fpi in task.get_sb_in_fpi_list():
				matched = glob.glob(fpi.path_abs)
				if matched != []:
					for match in matched:
						yield match
				else:
					yield fpi.path_abs
		return lchain([self._remote_event_handler.get_file_list(), dep_fn_list, _get_task_fn_list(), [
			VirtualFile('_config.sh', sorted(task_config_str_list)),
			VirtualFile('_varmap.dat', sorted(vn_alias_str_list))]])
Example #16
0
    def _checkJobList(self, wms, jobList):
        if self._defect_tries:
            nDefect = len(self._defect_counter
                          )  # Waiting list gets larger in case reported == []
            waitList = self._sample(
                self._defect_counter,
                nDefect - max(1, int(nDefect / 2**self._defect_raster)))
            jobList = lfilter(lambda x: x not in waitList, jobList)

        (change, timeoutList,
         reported) = JobManager._checkJobList(self, wms, jobList)
        for jobNum in reported:
            self._defect_counter.pop(jobNum, None)

        if self._defect_tries and (change is not None):
            self._defect_raster = utils.QM(
                reported, 1,
                self._defect_raster + 1)  # make 'raster' iteratively smaller
            for jobNum in ifilter(lambda x: x not in reported, jobList):
                self._defect_counter[jobNum] = self._defect_counter.get(
                    jobNum, 0) + 1
            kickList = lfilter(
                lambda jobNum: self._defect_counter[jobNum] >= self.
                _defect_tries, self._defect_counter)
            for jobNum in set(kickList + utils.QM(
                (len(reported) == 0) and (len(jobList) == 1), jobList, [])):
                timeoutList.append(jobNum)
                self._defect_counter.pop(jobNum)

        return (change, timeoutList, reported)
Example #17
0
	def _get_sandbox_file_list(self, task, sm_list):
		# Prepare all input files
		dep_list = set(ichain(imap(lambda x: x.get_dependency_list(), [task] + sm_list)))
		dep_fn_list = lmap(lambda dep: resolve_path('env.%s.sh' % dep,
			lmap(lambda pkg: get_path_share('', pkg=pkg), os.listdir(get_path_pkg()))), dep_list)
		task_config_dict = dict_union(self._remote_event_handler.get_mon_env_dict(),
			*imap(lambda x: x.get_task_dict(), [task] + sm_list))
		task_config_dict.update({'GC_DEPFILES': str.join(' ', dep_list),
			'GC_USERNAME': self._token.get_user_name(), 'GC_WMS_NAME': self._name})
		task_config_str_list = DictFormat(escape_strings=True).format(
			task_config_dict, format='export %s%s%s\n')
		vn_alias_dict = dict(izip(self._remote_event_handler.get_mon_env_dict().keys(),
			self._remote_event_handler.get_mon_env_dict().keys()))
		vn_alias_dict.update(task.get_var_alias_map())
		vn_alias_str_list = DictFormat(delimeter=' ').format(vn_alias_dict, format='%s%s%s\n')

		# Resolve wildcards in task input files
		def _get_task_fn_list():
			for fpi in task.get_sb_in_fpi_list():
				matched = glob.glob(fpi.path_abs)
				if matched != []:
					for match in matched:
						yield match
				else:
					yield fpi.path_abs
		return lchain([self._remote_event_handler.get_file_list(), dep_fn_list, _get_task_fn_list(), [
			VirtualFile('_config.sh', sorted(task_config_str_list)),
			VirtualFile('_varmap.dat', sorted(vn_alias_str_list))]])
Example #18
0
def collapse_psp_list(psp_list, tracked_list, opts):
    psp_dict = {}
    psp_dict_nicks = {}
    header_list = [('COLLATE_JOBS', '# of jobs')]
    if 'DATASETSPLIT' in tracked_list:
        tracked_list.remove('DATASETSPLIT')
        if opts.collapse == 1:
            tracked_list.append('DATASETNICK')
            header_list.append(('DATASETNICK', 'DATASETNICK'))
        elif opts.collapse == 2:
            header_list.append(('COLLATE_NICK', '# of nicks'))
    for pset in psp_list:
        if ('DATASETSPLIT' in pset) and (opts.collapse == 1):
            pset.pop('DATASETSPLIT')
        nickname = None
        if ('DATASETNICK' in pset) and (opts.collapse == 2):
            nickname = pset.pop('DATASETNICK')
        hash_str = md5_hex(
            repr(lmap(lambda key: pset.get(str(key)), tracked_list)))
        psp_dict.setdefault(hash_str, []).append(pset)
        psp_dict_nicks.setdefault(hash_str, set()).add(nickname)

    def _do_collate(hash_str):
        psp = psp_dict[hash_str][0]
        psp['COLLATE_JOBS'] = len(psp_dict[hash_str])
        psp['COLLATE_NICK'] = len(psp_dict_nicks[hash_str])
        return psp

    psp_list = sorted(imap(_do_collate, psp_dict),
                      key=lambda x: tuple(imap(str, x.values())))
    return (header_list, psp_list)
Example #19
0
def logging_setup(config):
    if config.getBool('debug mode', False, onChange=None):
        config.set('level', 'NOTSET', '?=')
        config.set('detail lower limit', 'NOTSET')
        config.set('detail upper limit', 'NOTSET')
        config.set('abort handler', 'stdout debug_file', '?=')
        config.setInt('abort code context', 2)
        config.setInt('abort variables', 2)
        config.setInt('abort file stack', 2)
        config.setInt('abort tree', 2)
    display_logger = config.getBool('display logger', False, onChange=None)

    # Find logger names in options
    logger_names = set()
    for option in config.getOptions():
        if option in ['debug mode', 'display logger']:
            pass
        elif option.count(' ') == 0:
            logger_names.add('')
        else:
            logger_names.add(option.split(' ')[0].strip())
    logger_names = sorted(logger_names)
    logger_names.reverse()
    for logger_name in logger_names:
        logging_create_handlers(config, logger_name)

    if display_logger:
        dump_log_setup(logging.WARNING)
def main(opts, args):
	config = get_dataset_config(opts, args)

	provider = config.getPlugin('dataset', cls = DataProvider)
	blocks = provider.getBlocks()
	if len(blocks) == 0:
		raise DatasetError('No blocks!')

	datasets = set(imap(itemgetter(DataProvider.Dataset), blocks))
	if len(datasets) > 1 or opts.info:
		headerbase = [(DataProvider.Dataset, 'Dataset')]
	else:
		print('Dataset: %s' % blocks[0][DataProvider.Dataset])
		headerbase = []

	if opts.list_datasets:
		list_datasets(blocks)
	if opts.list_blocks:
		list_blocks(blocks, headerbase)
	if opts.list_files:
		list_files(datasets, blocks)
	if opts.list_storage:
		list_storage(blocks, headerbase)
	if opts.metadata and not opts.save:
		list_metadata(datasets, blocks)
	if opts.block_metadata and not opts.save:
		list_block_metadata(datasets, blocks)
	if opts.config_entry:
		list_config_entries(opts, blocks, provider)
	if opts.info:
		list_infos(blocks)
	if opts.save:
		save_dataset(opts, provider)
Example #21
0
 def doTransfer(self, listDescSourceTarget):
     for (desc, source, target) in listDescSourceTarget:
         if not self.smPaths:
             raise ConfigError(
                 "%s can't be transferred because '%s path wasn't set" %
                 (desc, self.smOptPrefix))
         for idx, sePath in enumerate(set(self.smPaths)):
             utils.vprint('Copy %s to SE %d ' % (desc, idx + 1),
                          -1,
                          newline=False)
             sys.stdout.flush()
             proc = se_copy(source, os.path.join(sePath, target),
                            self.smForce)
             if proc.status(timeout=5 * 60, terminate=True) == 0:
                 utils.vprint('finished', -1)
             else:
                 utils.vprint('failed', -1)
                 utils.eprint(proc.stderr.read(timeout=0))
                 utils.eprint(
                     'Unable to copy %s! You can try to copy it manually.' %
                     desc)
                 if not utils.getUserBool(
                         'Is %s (%s) available on SE %s?' %
                     (desc, source, sePath), False):
                     raise StorageError('%s is missing on SE %s!' %
                                        (desc, sePath))
Example #22
0
def logging_setup(config):
	if config.getBool('debug mode', False, onChange = None):
		config.set('level', 'NOTSET', '?=')
		config.set('detail lower limit', 'NOTSET')
		config.set('detail upper limit', 'NOTSET')
		config.set('abort handler', 'stdout debug_file', '?=')
		config.setInt('abort code context', 2)
		config.setInt('abort variables', 2)
		config.setInt('abort file stack', 2)
		config.setInt('abort tree', 2)
	display_logger = config.getBool('display logger', False, onChange = None)

	# Find logger names in options
	logger_names = set()
	for option in config.getOptions():
		if option in ['debug mode', 'display logger']:
			pass
		elif option.count(' ') == 0:
			logger_names.add('')
		else:
			logger_names.add(option.split(' ')[0].strip())
	logger_names = sorted(logger_names)
	logger_names.reverse()
	for logger_name in logger_names:
		logging_create_handlers(config, logger_name)

	if display_logger:
		dump_log_setup(logging.WARNING)
def create_dbs3_proto_blocks(opts, dataset_blocks):
	for dataset in dataset_blocks:
		missing_info_blocks = []
		dataset_types = set()
		for block in dataset_blocks[dataset]:
			block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []}
			(block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump)
			if len(block_dataset_types) > 1:
				raise Exception('Data and MC files are mixed in block %s#%s' % (dataset, block[DataProvider.BlockName]))
			elif len(block_dataset_types) == 1:
				yield (block, block_dump, block_size, block_dataset_types.pop())
			else:
				missing_info_blocks.append((block, block_dump, block_size))
			dataset_types.update(block_dataset_types) # collect dataset types in this dataset for blocks with missing type information

		if missing_info_blocks:
			if len(dataset_types) > 1:
				raise Exception('Data and MC files are mixed in dataset %s! Unable to determine dataset type for blocks without type info')
			elif len(dataset_types) == 0:
				if not opts.datatype:
					raise Exception('Please supply dataset type via --datatype!')
				dataset_type = opts.datatype
			else:
				dataset_type = dataset_types.pop()
			for (block, block_dump, block_size) in missing_info_blocks:
				yield (block, block_dump, block_size, dataset_type)
Example #24
0
 def _interact_with_child(self, pid, fd_parent_stdin, fd_parent_stdout,
                          fd_parent_stderr):
     thread_in = self._start_watcher('stdin', False, pid,
                                     self._handle_input, fd_parent_stdin,
                                     self._buffer_stdin,
                                     self._event_shutdown)
     thread_out = self._start_watcher('stdout', False, pid,
                                      self._handle_output, fd_parent_stdout,
                                      self._buffer_stdout,
                                      self._event_shutdown)
     thread_err = self._start_watcher('stderr', False, pid,
                                      self._handle_output, fd_parent_stderr,
                                      self._buffer_stderr,
                                      self._event_shutdown)
     while self._status is None:
         # blocking (with spurious wakeups!) - OSError=unable to wait for child - status=False => OS_ABORT
         (result_pid, status) = ignore_exception(OSError, (pid, False),
                                                 os.waitpid, pid, 0)
         if result_pid == pid:
             self._status = status
     self._time_finished = time.time()
     self._event_shutdown.set(
     )  # start shutdown of handlers and wait for it to finish
     self._buffer_stdin.finish()  # wakeup process input handler
     thread_in.join()
     thread_out.join()
     thread_err.join()
     for fd_open in set(
         [fd_parent_stdin, fd_parent_stdout, fd_parent_stderr]):
         os.close(fd_open)  # fd_parent_stdin == fd_parent_stdout for pty
     self._buffer_stdout.finish()  # wakeup pending output buffer waits
     self._buffer_stderr.finish()
     self._event_finished.set()
Example #25
0
    def _display_setup(self, dataset_fn, head):
        if os.path.exists(dataset_fn):
            nick_name_set = set()
            for block in DataProvider.load_from_file(
                    dataset_fn).get_block_list_cached(show_stats=False):
                nick_name_set.add(block[DataProvider.Nickname])
            self._log.info('Mapping between nickname and other settings:')
            report = []

            def _get_dataset_lookup_psrc(psrc):
                is_lookup_cls = isinstance(
                    psrc,
                    ParameterSource.get_class('LookupBaseParameterSource'))
                return is_lookup_cls and ('DATASETNICK'
                                          in psrc.get_parameter_deps())

            ps_lookup = lfilter(_get_dataset_lookup_psrc,
                                self._source.get_used_psrc_list())
            for nick in sorted(nick_name_set):
                tmp = {'DATASETNICK': nick}
                for src in ps_lookup:
                    src.fill_parameter_content(None, tmp)
                tmp[1] = str.join(
                    ', ',
                    imap(os.path.basename,
                         self._nm_cfg.lookup(nick, '', is_selector=False)))
                tmp[2] = str_lumi_nice(
                    self._nm_lumi.lookup(nick, '', is_selector=False))
                report.append(tmp)
            ConsoleTable.create(head, report, 'cl')
Example #26
0
 def do_transfer(self, desc_source_target_list):
     for (desc, source, target) in desc_source_target_list:
         if not self._storage_paths:
             raise ConfigError(
                 "%s can't be transferred because '%s path wasn't set" %
                 (desc, self._storage_channel))
         for idx, se_path in enumerate(set(self._storage_paths)):
             activity = Activity('Copy %s to SE %d ' % (desc, idx + 1))
             proc = se_copy(source, os.path.join(se_path, target),
                            self._storage_force)
             proc.status(timeout=5 * 60, terminate=True)
             activity.finish()
             if proc.status(timeout=0) == 0:
                 self._log.info('Copy %s to SE %d finished', desc, idx + 1)
             else:
                 self._log.info('Copy %s to SE %d failed', desc, idx + 1)
                 self._log.log_process(proc)
                 self._log.critical(
                     'Unable to copy %s! You can try to copy it manually.',
                     desc)
                 msg = 'Is %s (%s) available on SE %s?' % (desc, source,
                                                           se_path)
                 if not UserInputInterface().prompt_bool(msg, False):
                     raise StorageError('%s is missing on SE %s!' %
                                        (desc, se_path))
Example #27
0
 def _displaySetup(self, dsPath, head):
     if os.path.exists(dsPath):
         nickNames = set()
         for block in DataProvider.loadFromFile(dsPath).getBlocks():
             nickNames.add(block[DataProvider.Nickname])
         log = logging.getLogger('user')
         log.info('Mapping between nickname and other settings:')
         report = []
         (ps_basic, ps_nested) = self._pfactory.getLookupSources()
         if ps_nested:
             log.info(
                 'This list doesn\'t show "nickname constants" with multiple values!'
             )
         for nick in sorted(nickNames):
             tmp = {'DATASETNICK': nick}
             for src in ps_basic:
                 src.fillParameterInfo(None, tmp)
             tmp[1] = str.join(
                 ', ',
                 imap(os.path.basename,
                      self._nmCfg.lookup(nick, '', is_selector=False)))
             tmp[2] = formatLumiNice(
                 self._nmLumi.lookup(nick, '', is_selector=False))
             report.append(tmp)
         utils.printTabular(head, report, 'cl')
def collapse_psp_list(psp_list, tracked_list, opts):
	psp_dict = {}
	psp_dict_nicks = {}
	header_list = [('COLLATE_JOBS', '# of jobs')]
	if 'DATASETSPLIT' in tracked_list:
		tracked_list.remove('DATASETSPLIT')
		if opts.collapse == 1:
			tracked_list.append('DATASETNICK')
			header_list.append(('DATASETNICK', 'DATASETNICK'))
		elif opts.collapse == 2:
			header_list.append(('COLLATE_NICK', '# of nicks'))
	for pset in psp_list:
		if ('DATASETSPLIT' in pset) and (opts.collapse == 1):
			pset.pop('DATASETSPLIT')
		nickname = None
		if ('DATASETNICK' in pset) and (opts.collapse == 2):
			nickname = pset.pop('DATASETNICK')
		hash_str = md5_hex(repr(lmap(lambda key: pset.get(str(key)), tracked_list)))
		psp_dict.setdefault(hash_str, []).append(pset)
		psp_dict_nicks.setdefault(hash_str, set()).add(nickname)

	def _do_collate(hash_str):
		psp = psp_dict[hash_str][0]
		psp['COLLATE_JOBS'] = len(psp_dict[hash_str])
		psp['COLLATE_NICK'] = len(psp_dict_nicks[hash_str])
		return psp
	psp_list = sorted(imap(_do_collate, psp_dict), key=lambda x: tuple(imap(str, x.values())))
	return (header_list, psp_list)
Example #29
0
def _get_graph(instance, graph=None, visited=None):
	graph = graph or {}
	children = _get_instance_children(instance)

	visited = visited or set()
	for child in children:
		child_module = ''
		if hasattr(child, '__module__'):
			child_module = child.__module__ or ''
		child_name = ''
		if hasattr(child, '__name__'):
			child_name = child.__name__ or ''
		child_class_name = child.__class__.__name__ or ''

		if 'grid_control' not in child_module:
			continue
		if 'testsuite' in child_name:
			continue
		if not issubclass(child.__class__, Plugin):
			continue
		if child_class_name in ['instancemethod', 'function', 'type', 'method-wrapper']:
			continue
		if child in (None, True, False):
			continue
		graph.setdefault(instance, []).append(child)
		if child not in visited:
			visited.add(child)
			_get_graph(child, graph, visited)

	return (graph, list(visited))
Example #30
0
def _get_workflow_graph(workflow):
	(graph, node_list) = _get_graph(workflow)

	# Process nodes
	node_str_list = []
	map_node2name = {}
	map_node2color = {}
	for node in sorted(node_list, key=lambda x: x.__class__.__name__):
		node_props = {
			'label': '"%s"' % _get_node_label(node),
			'fillcolor': '"%s"' % _get_node_color(node, map_node2color),
			'style': '"filled"',
		}
		if node == workflow:
			node_props['root'] = 'True'
		node_prop_str = str.join('; ', imap(lambda key: '%s = %s' % (key, node_props[key]), node_props))
		node_str_list.append('%s [%s];\n' % (_get_node_name(node, map_node2name), node_prop_str))

	# Process edges
	edge_str_list = []
	for entry in sorted(graph, key=lambda x: x.__class__.__name__):
		for child in sorted(set(graph[entry]), key=lambda x: x.__class__.__name__):
			edge_str_list.append('%s -> %s;\n' % (_get_node_name(entry, map_node2name),
				_get_node_name(child, map_node2name)))

	cluster_str_list = []

	dot_header = 'digraph mygraph {\nmargin=0;\nedge [len=2];\noverlap=compress;splines=True;\n'
	dot_format_string_list = [dot_header] + node_str_list + cluster_str_list + edge_str_list + ['}\n']
	return str.join('', dot_format_string_list)
Example #31
0
def _get_graph(instance, graph=None, visited=None):
    graph = graph or {}
    children = _get_instance_children(instance)

    visited = visited or set()
    for child in children:
        child_module = ''
        if hasattr(child, '__module__'):
            child_module = child.__module__ or ''
        child_name = ''
        if hasattr(child, '__name__'):
            child_name = child.__name__ or ''
        child_class_name = child.__class__.__name__ or ''

        if 'grid_control' not in child_module:
            continue
        if 'testsuite' in child_name:
            continue
        if not issubclass(child.__class__, Plugin):
            continue
        if child_class_name in [
                'instancemethod', 'function', 'type', 'method-wrapper'
        ]:
            continue
        if child in (None, True, False):
            continue
        graph.setdefault(instance, []).append(child)
        if child not in visited:
            visited.add(child)
            _get_graph(child, graph, visited)

    return (graph, list(visited))
Example #32
0
 def _interact_with_child(self, pid, fd_parent_stdin, fd_parent_stdout,
                          fd_parent_stderr):
     thread_in = self._start_thread('stdin', False, pid, self._handle_input,
                                    fd_parent_stdin, self._buffer_stdin,
                                    self._event_shutdown)
     thread_out = self._start_thread('stdout', False, pid,
                                     self._handle_output, fd_parent_stdout,
                                     self._buffer_stdout,
                                     self._event_shutdown)
     thread_err = self._start_thread('stderr', False, pid,
                                     self._handle_output, fd_parent_stderr,
                                     self._buffer_stderr,
                                     self._event_shutdown)
     while self._status is None:
         try:
             (result_pid,
              status) = os.waitpid(pid,
                                   0)  # blocking (with spurious wakeups!)
         except OSError:  # unable to wait for child
             (result_pid, status) = (pid, False)  # False == 'OS_ABORT'
         if result_pid == pid:
             self._status = status
     self._time_ended = time.time()
     self._event_shutdown.set(
     )  # start shutdown of handlers and wait for it to finish
     self._buffer_stdin.finish()  # wakeup process input handler
     thread_in.join()
     thread_out.join()
     thread_err.join()
     for fd in set([fd_parent_stdin, fd_parent_stdout, fd_parent_stderr
                    ]):  # fd_parent_stdin == fd_parent_stdout for pty
         os.close(fd)
     self._buffer_stdout.finish()  # wakeup pending output buffer waits
     self._buffer_stderr.finish()
     self._event_finished.set()
Example #33
0
def create_dbs3_proto_blocks(opts, dataset_blocks):
	for dataset in dataset_blocks:
		missing_info_blocks = []
		dataset_types = set()
		for block in dataset_blocks[dataset]:
			block_dump = {'dataset_conf_list': [], 'files': [], 'file_conf_list': [], 'file_parent_list': []}
			(block_size, block_dataset_types) = create_dbs3_json_files(opts, block, block_dump)
			if len(block_dataset_types) > 1:
				raise Exception('Data and MC files are mixed in block %s' % DataProvider.bName(block))
			elif len(block_dataset_types) == 1:
				yield (block, block_dump, block_size, block_dataset_types.pop())
			else:
				missing_info_blocks.append((block, block_dump, block_size))
			dataset_types.update(block_dataset_types) # collect dataset types in this dataset for blocks with missing type information

		if missing_info_blocks:
			if len(dataset_types) > 1:
				raise Exception('Data and MC files are mixed in dataset %s! Unable to determine dataset type for blocks without type info')
			elif len(dataset_types) == 0:
				if not opts.datatype:
					raise Exception('Please supply dataset type via --datatype!')
				dataset_type = opts.datatype
			else:
				dataset_type = dataset_types.pop()
			for (block, block_dump, block_size) in missing_info_blocks:
				yield (block, block_dump, block_size, dataset_type)
Example #34
0
    def _check_get_jobnum_list(self, task, wms, jobnum_list):
        if self._defect_tries:
            num_defect = len(
                self._defect_counter
            )  # Waiting list gets larger in case reported == []
            num_wait = num_defect - max(
                1, int(num_defect / 2**self._defect_raster))
            jobnum_list_wait = self._sample(self._defect_counter, num_wait)
            jobnum_list = lfilter(
                lambda jobnum: jobnum not in jobnum_list_wait, jobnum_list)

        (change, jobnum_list_timeout,
         reported) = JobManager._check_get_jobnum_list(self, task, wms,
                                                       jobnum_list)
        for jobnum in reported:
            self._defect_counter.pop(jobnum, None)

        if self._defect_tries and (change is not None):
            # make 'raster' iteratively smaller
            self._defect_raster += 1
            if reported:
                self._defect_raster = 1
            for jobnum in ifilter(lambda x: x not in reported, jobnum_list):
                self._defect_counter[jobnum] = self._defect_counter.get(
                    jobnum, 0) + 1
            jobnum_list_kick = lfilter(
                lambda jobnum: self._defect_counter[jobnum] >= self.
                _defect_tries, self._defect_counter)
            if (len(reported) == 0) and (len(jobnum_list) == 1):
                jobnum_list_kick.extend(jobnum_list)
            for jobnum in set(jobnum_list_kick):
                jobnum_list_timeout.append(jobnum)
                self._defect_counter.pop(jobnum)

        return (change, jobnum_list_timeout, reported)
Example #35
0
 def _displaySetup(self, dsPath, head):
     if os.path.exists(dsPath):
         nickNames = set()
         for block in DataProvider.loadFromFile(dsPath).getBlocks():
             nickNames.add(block[DataProvider.Nickname])
         utils.vprint('Mapping between nickname and other settings:\n', -1)
         report = []
         for nick in sorted(nickNames):
             lumi_filter_str = formatLumi(
                 self._nmLumi.lookup(nick, '', is_selector=False))
             if len(lumi_filter_str) > 4:
                 nice_lumi_filter = '%s ... %s (%d entries)' % (
                     lumi_filter_str[0], lumi_filter_str[-1],
                     len(lumi_filter_str))
             else:
                 nice_lumi_filter = str.join(', ', lumi_filter_str)
             config_files = self._nmCfg.lookup(nick, '', is_selector=False)
             tmp = {
                 0: nick,
                 1: str.join(', ', imap(os.path.basename, config_files)),
                 2: nice_lumi_filter
             }
             lookupvars = {'DATASETNICK': nick}
             for src in self._pm.lookupSources:
                 src.fillParameterInfo(None, lookupvars)
             tmp.update(lookupvars)
             report.append(tmp)
         utils.printTabular(head, report, 'cl')
         utils.vprint(level=-1)
Example #36
0
	def __init__(self, config, name):
		self._name = name # needed for changeView calls before the constructor
		head = [('DATASETNICK', 'Nickname')]

		# Mapping between nickname and config files:
		self._nmCfg = config.getLookup('nickname config', {}, defaultMatcher = 'regex',
			parser = lambda x: lmap(str.strip, x.split(',')), strfun = lambda x: str.join(',', x))
		if not self._nmCfg.empty():
			allConfigFiles = sorted(set(ichain(self._nmCfg.get_values())))
			config.set('config file', str.join('\n', allConfigFiles))
			head.append((1, 'Config file'))
		elif config.get('config file', ''):
			raise ConfigError("Please use 'nickname config' instead of 'config file'")

		# Mapping between nickname and constants - only display - work is handled by the 'normal' parameter factory
		nmCName = config.getList('nickname constants', [], onChange = None)
		param_config = config.changeView(viewClass = 'TaggedConfigView', setClasses = None, setNames = None, addSections = ['parameters'])
		param_config.set('constants', str.join(' ', nmCName), '+=')
		for cName in nmCName:
			param_config.set(cName + ' matcher', 'regex')
			param_config.set(cName + ' lookup', 'DATASETNICK')
			head.append((cName, cName))

		# Mapping between nickname and lumi filter - only display - work is handled by the 'normal' lumi filter
		config.set('lumi filter matcher', 'regex')
		if 'nickname lumi filter' in config.getOptions():
			config.set('lumi filter', strDictLong(config.getDict('nickname lumi filter', {}, onChange = None)))
		self._nmLumi = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = None)
		if not self._nmLumi.empty():
			head.append((2, 'Lumi filter'))

		CMSSW.__init__(self, config, name)
		self._displaySetup(config.getWorkPath('datacache.dat'), head)
Example #37
0
def get_workflow_graph(workflow):
    graph = getGraph(workflow)
    classCluster = {}
    for entry in graph:
        classCluster.setdefault(getNodeParent(entry.__class__),
                                []).append(entry)
    clusters = ''

    globalNodes = []
    colors = {}
    for (cluster_id, classClusterEntries) in enumerate(classCluster.values()):
        if len(classClusterEntries) == 1:
            globalNodes.append(classClusterEntries[0])
        clusters += 'subgraph cluster_%d {' % cluster_id
        for node in classClusterEntries:
            clusters += '%s [label="%s", fillcolor="%s", style="filled"];\n' % (
                getNodeName(node), getNodeLabel(node),
                getNodeColor(node, colors))
        clusters += '}\n'

    edgeStr = ''
    for entry in sorted(graph, key=lambda x: x.__class__.__name__):
        for child in sorted(set(graph[entry]),
                            key=lambda x: x.__class__.__name__):
            edgeStr += '%s -> %s;\n' % (getNodeName(entry), getNodeName(child))
    header = 'digraph mygraph {\nmargin=0;\noverlap=scale;splines=True;\n'
    footer = '}\n'
    return header + clusters + edgeStr + footer
Example #38
0
 def doTransfer(self, listDescSourceTarget):
     for (desc, source, target) in listDescSourceTarget:
         if not self.smPaths:
             raise ConfigError(
                 "%s can't be transferred because '%s path wasn't set" %
                 (desc, self.smOptPrefix))
         for idx, sePath in enumerate(set(self.smPaths)):
             activity = Activity('Copy %s to SE %d ' % (desc, idx + 1))
             proc = se_copy(source, os.path.join(sePath, target),
                            self.smForce)
             proc.status(timeout=5 * 60, terminate=True)
             activity.finish()
             if proc.status(timeout=0) == 0:
                 self._log.info('Copy %s to SE %d finished', desc, idx + 1)
             else:
                 self._log.info('Copy %s to SE %d failed', desc, idx + 1)
                 self._log.critical(proc.stderr.read(timeout=0))
                 self._log.critical(
                     'Unable to copy %s! You can try to copy it manually.',
                     desc)
                 if not utils.getUserBool(
                         'Is %s (%s) available on SE %s?' %
                     (desc, source, sePath), False):
                     raise StorageError('%s is missing on SE %s!' %
                                        (desc, sePath))
Example #39
0
 def getEntries(self, path, metadata, events, seList, objStore):
     datacachePath = os.path.join(objStore.get('GC_WORKDIR', ''),
                                  'datacache.dat')
     source = utils.QM((self._source == '')
                       and os.path.exists(datacachePath), datacachePath,
                       self._source)
     if source and (source not in self._lfnMap):
         pSource = DataProvider.createInstance('ListProvider',
                                               createConfig(), source)
         for (n, fl) in imap(
                 lambda b:
             (b[DataProvider.Dataset], b[DataProvider.FileList]),
                 pSource.getBlocks()):
             self._lfnMap.setdefault(source, {}).update(
                 dict(
                     imap(
                         lambda fi:
                         (self.lfnTrans(fi[DataProvider.URL]), n), fl)))
     pList = set()
     for key in ifilter(lambda k: k in metadata, self._parentKeys):
         pList.update(
             imap(
                 lambda pPath: self._lfnMap.get(source, {}).get(
                     self.lfnTrans(pPath)), metadata[key]))
     metadata['PARENT_PATH'] = lfilter(identity, pList)
     yield (path, metadata, events, seList, objStore)
Example #40
0
def uniqueListLR(inList): # (left to right)
	tmpSet, result = (set(), []) # Duplicated items are removed from the right [a,b,a] -> [a,b]
	for x in inList:
		if x not in tmpSet:
			result.append(x)
			tmpSet.add(x)
	return result
def getGraph(instance, graph=None, visited=None):
    graph = graph or {}
    visited = visited or set()
    children = []
    for attr in dir(instance):
        child = getattr(instance, attr)
        try:
            children.extend(child)
            children.extend(child.values())
        except Exception:
            children.append(child)
    for child in children:
        try:
            if 'grid_control' not in child.__module__:
                continue
            if child.__class__.__name__ in [
                    'instancemethod', 'function', 'type'
            ]:
                continue
            graph.setdefault(instance, []).append(child)
            if child not in visited:
                visited.add(child)
                getGraph(child, graph, visited)
        except Exception:
            pass
    return graph
def logging_create_handlers(config, logger_name):
	LogLevelEnum = makeEnum(lmap(lambda level: logging.getLevelName(level).upper(), irange(51)))

	logger = logging.getLogger(logger_name.lower())
	# Set logging level
	logger.setLevel(config.getEnum(logger_name + ' level', LogLevelEnum, logger.level, onChange = None))
	# Set propagate status
	logger.propagate = config.getBool(logger_name + ' propagate', bool(logger.propagate), onChange = None)
	# Setup handlers
	if logger_name + ' handler' in config.getOptions():
		# remove any standard handlers:
		for handler in list(logger.handlers):
			logger.removeHandler(handler)
		handler_list = config.getList(logger_name + ' handler', [], onChange = None)
		for handler_str in set(handler_list): # add only unique output handlers
			if handler_str == 'stdout':
				handler = StdoutStreamHandler()
			elif handler_str == 'stderr':
				handler = StderrStreamHandler()
			elif handler_str == 'file':
				handler = logging.FileHandler(config.get(logger_name + ' file', onChange = None), 'w')
			elif handler_str == 'debug_file':
				handler = GCLogHandler(config.get(logger_name + ' debug file', onChange = None), 'w')
			else:
				raise Exception('Unknown handler %s for logger %s' % (handler_str, logger_name))
			logger.addHandler(logging_configure_handler(config, logger_name, handler_str, handler))
Example #43
0
def getGraph(instance, graph = None, visited = None):
	graph = graph or {}
	visited = visited or set()
	children = []
	for attr in dir(instance):
		child = getattr(instance, attr)
		try:
			children.extend(child)
			children.extend(child.values())
		except Exception:
			children.append(child)
	for child in children:
		try:
			if 'grid_control' not in child.__module__:
				continue
			if child.__class__.__name__ in ['instancemethod', 'function', 'type']:
				continue
			if child in (None, True, False):
				continue
			graph.setdefault(instance, []).append(child)
			if child not in visited:
				visited.add(child)
				getGraph(child, graph, visited)
		except Exception:
			clear_current_exception()
	return graph
Example #44
0
    def display(self):
        (catStateDict, catDescDict,
         _) = CategoryBaseReport._getCategoryStateSummary(self)
        infos = []
        head = set()
        stateCat = {
            Job.SUCCESS: 'SUCCESS',
            Job.FAILED: 'FAILED',
            Job.RUNNING: 'RUNNING',
            Job.DONE: 'RUNNING'
        }
        for catKey in catDescDict:
            tmp = dict(catDescDict[catKey])
            head.update(tmp.keys())
            for stateKey in catStateDict[catKey]:
                state = stateCat.get(stateKey, 'WAITING')
                tmp[state] = tmp.get(state, 0) + catStateDict[catKey][stateKey]
            infos.append(tmp)

        stateCatList = ['WAITING', 'RUNNING', 'FAILED', 'SUCCESS']
        utils.printTabular(lmap(lambda x: (x, x),
                                sorted(head) + stateCatList),
                           infos,
                           'c' * len(head),
                           fmt=dict.fromkeys(
                               stateCatList,
                               lambda x: '%7d' % parseStr(x, int, 0)))
Example #45
0
	def _check_get_jobnum_list(self, task, wms, jobnum_list):
		if self._defect_tries:
			num_defect = len(self._defect_counter)  # Waiting list gets larger in case reported == []
			num_wait = num_defect - max(1, int(num_defect / 2 ** self._defect_raster))
			jobnum_list_wait = self._sample(self._defect_counter, num_wait)
			jobnum_list = lfilter(lambda jobnum: jobnum not in jobnum_list_wait, jobnum_list)

		(change, jobnum_list_timeout, reported) = JobManager._check_get_jobnum_list(
			self, task, wms, jobnum_list)
		for jobnum in reported:
			self._defect_counter.pop(jobnum, None)

		if self._defect_tries and (change is not None):
			# make 'raster' iteratively smaller
			self._defect_raster += 1
			if reported:
				self._defect_raster = 1
			for jobnum in ifilter(lambda x: x not in reported, jobnum_list):
				self._defect_counter[jobnum] = self._defect_counter.get(jobnum, 0) + 1
			jobnum_list_kick = lfilter(lambda jobnum: self._defect_counter[jobnum] >= self._defect_tries,
				self._defect_counter)
			if (len(reported) == 0) and (len(jobnum_list) == 1):
				jobnum_list_kick.extend(jobnum_list)
			for jobnum in set(jobnum_list_kick):
				jobnum_list_timeout.append(jobnum)
				self._defect_counter.pop(jobnum)

		return (change, jobnum_list_timeout, reported)
def process_dbs3_json_blocks(opts, block_dump_iter):
	log = logging.getLogger('dbs3-migration')
	log.setLevel(logging.DEBUG)

	# dry run without import - just store block dumps in temp dir
	if opts.do_import:
		return dump_dbs3_json(opts.tempdir, block_dump_iter)
	# set-up dbs clients
	dbs3_target_client = DBS3LiteClient(url=opts.target_instance)
	dbs3_source_client = DBS3LiteClient(url=opts.source_instance)
	dbs3_migration_queue = DBS3MigrationQueue()
	dbs3_migration_file = os.path.join(opts.tempdir, 'dbs3_migration.pkl')

	# migrate parents and register datasets with dbs3
	for block_dump in block_dump_iter:
		if not opts.continue_migration:
			# initiate the dbs3 to dbs3 migration of parent blocks
			log.debug('Checking parentage for block: %s', block_dump['block']['block_name'])
			unique_parent_lfns = set(imap(lambda parent: parent['parent_logical_file_name'],
				block_dump['file_parent_list']))
			unique_blocks = set()
			for parent_lfn in unique_parent_lfns:
				for block in dbs3_source_client.get_dbs_block_list(logical_file_name=parent_lfn):
					unique_blocks.add(block['block_name'])
			for parent_block in unique_blocks:
				if dbs3_target_client.get_dbs_block_list(block_name=parent_block):
					log.debug('Block %s is already at destination', parent_block)
					continue
				migration_task = MigrationTask(block_name=parent_block,
					migration_url=opts.dbsSource, dbs_client=dbs3_target_client)
				try:
					dbs3_migration_queue.add_migration_task(migration_task)
				except AlreadyQueued:
					log.exception('Already queued')
					clear_current_exception()
			dbs3_migration_queue.save_to_disk(dbs3_migration_file)
		else:
			try:
				dbs3_migration_queue = DBS3MigrationQueue.read_from_disk(dbs3_migration_file)
			except IOError:
				log.exception('Probably, there is no DBS 3 migration for this dataset ongoing')
				raise

		# wait for all parent blocks migrated to dbs3
		do_migration(dbs3_migration_queue)
		# insert block into dbs3
		dbs3_target_client.insert_dbs_block_dump(block_dump)
Example #47
0
 def process(self, pNum, splitInfo, result):
     for idx, mkey in enumerate(
             splitInfo.get(DataSplitter.MetadataHeader, [])):
         if mkey in self._metadata:
             tmp = set(
                 imap(lambda x: x[idx], splitInfo[DataSplitter.Metadata]))
             if len(tmp) == 1:
                 result[mkey] = tmp.pop()
 def report():
     for nick in sorted(set(self.nmCfg.keys() + self.nmConst.keys() + self.nmLumi.keys())):
         tmp = {
             0: nick,
             1: str.join(", ", map(os.path.basename, self.nmCfg.get(nick, ""))),
             2: self.displayLumi(self.nmLumi.get(nick, "")),
         }
         yield utils.mergeDicts([tmp, self.nmConst.get(nick, {})])
Example #49
0
	def getNodes(self):
		(result, active) = (set(), False)
		for group in utils.LoggedProcess(self.configExec, '-shgrpl').iter():
			result.add(group.strip())
			for host in utils.LoggedProcess(self.configExec, '-shgrp_resolved %s' % group).iter():
				result.update(host.split())
		if len(result) > 0:
			return list(result)
Example #50
0
	def execute(self, wmsIDs): # yields list of (wmsID, job_status, job_info)
		checked_ids = set()
		for (wmsID, job_status, job_info) in self._executor.execute(wmsIDs):
			checked_ids.add(wmsID)
			yield (wmsID, job_status, job_info)
		if self._executor.get_status() == CheckStatus.OK:
			for wmsID in wmsIDs:
				if wmsID not in checked_ids:
					yield (wmsID, self._missing_state, {})
Example #51
0
	def show_report(self, job_db, jobnum_list):
		(header_list, job_env_dict_list, vn_set) = ([], [], set())
		for jobnum in jobnum_list:
			job_env_dict = self._task.get_job_dict(jobnum)
			vn_set.update(job_env_dict)
			job_env_dict.update(self._task.get_transient_variables())
			job_env_dict_list.append(job_env_dict)
		header_list.extend(imap(lambda key: (key, '<%s>' % key), self._task.get_transient_variables()))
		self._show_table(sorted(header_list + lzip(vn_set, vn_set)), job_env_dict_list)
Example #52
0
	def cms_name_to_se(self, cms_name):
		cms_name_regex = re.compile(cms_name.replace('*', '.*').replace('%', '.*'))
		def _select_psn_site(site):
			return site['type'] == 'psn' and cms_name_regex.match(site['alias'])
		psn_site_names = ifilter(_select_psn_site, self._query('site-names'))
		site_aliases = set(imap(lambda x: x['alias'], psn_site_names))
		def _select_se(resource):
			return (resource['type'] == 'SE') and (resource['alias'] in site_aliases)
		return lmap(lambda x: x['fqdn'], ifilter(_select_se, self._query('site-resources')))
Example #53
0
	def _cleanup_running(self):
		# clean running activity list
		running_thread_names = set(imap(get_thread_name, threading.enumerate()))
		for thread_name in list(Activity.running_by_thread_name):
			if thread_name not in running_thread_names:
				finished_activities = Activity.running_by_thread_name.get(thread_name, [])
				while finished_activities:
					finished_activities[-1].finish()
				Activity.running_by_thread_name.pop(thread_name, None)
Example #54
0
def list_parameters(opts, psource):
    (result, needGCParam) = get_parameters(opts, psource)
    enabledOutput = opts.output.split(',')
    output = lfilter(lambda k: not opts.output or k in enabledOutput,
                     psource.getJobKeys())
    stored = lfilter(lambda k: k.untracked == False, output)
    untracked = lfilter(lambda k: k.untracked == True, output)

    if opts.collapse > 0:
        result_old = result
        result = {}
        result_nicks = {}
        head = [('COLLATE_JOBS', '# of jobs')]
        if 'DATASETSPLIT' in stored:
            stored.remove('DATASETSPLIT')
            if opts.collapse == 1:
                stored.append('DATASETNICK')
                head.append(('DATASETNICK', 'DATASETNICK'))
            elif opts.collapse == 2:
                head.append(('COLLATE_NICK', '# of nicks'))
        for pset in result_old:
            if ('DATASETSPLIT' in pset) and (opts.collapse == 1):
                pset.pop('DATASETSPLIT')
            nickname = None
            if ('DATASETNICK' in pset) and (opts.collapse == 2):
                nickname = pset.pop('DATASETNICK')
            h = md5_hex(repr(lmap(lambda key: pset.get(str(key)), stored)))
            result.setdefault(h, []).append(pset)
            result_nicks.setdefault(h, set()).add(nickname)

        def doCollate(h):
            tmp = result[h][0]
            tmp['COLLATE_JOBS'] = len(result[h])
            tmp['COLLATE_NICK'] = len(result_nicks[h])
            return tmp

        result = lmap(doCollate, result)
    else:
        head = [('GC_JOB_ID', '#')]
        if needGCParam:
            head.append(('GC_PARAM', 'GC_PARAM'))
    if opts.active:
        head.append((ParameterInfo.ACTIVE, 'ACTIVE'))
    if opts.visible:
        stored = opts.visible.split(',')
    head.extend(sorted(izip(stored, stored)))
    if opts.untracked:
        head.extend(
            sorted(
                imap(
                    lambda n: (n, '(%s)' % n),
                    ifilter(lambda n: n not in ['GC_PARAM', 'GC_JOB_ID'],
                            untracked))))
    utils.vprint('')
    utils.printTabular(head, result)
Example #55
0
 def execute(self,
             wms_id_list):  # yields list of (wms_id, job_status, job_info)
     checked_ids = set()
     for (wms_id, job_status,
          job_info) in self._executor.execute(wms_id_list):
         checked_ids.add(wms_id)
         yield (wms_id, job_status, job_info)
     if self._executor.get_status() == CheckStatus.OK:
         for wms_id in wms_id_list:
             if wms_id not in checked_ids:
                 yield (wms_id, self._missing_state, {})
Example #56
0
	def get_dataset_name_list(self):
		if self._cache_dataset is None:
			self._cache_dataset = set()
			exc = ExceptionCollector()
			for provider in self._provider_list:
				try:
					self._cache_dataset.update(provider.get_dataset_name_list())
				except Exception:
					exc.collect()
			exc.raise_any(DatasetError('Could not retrieve all datasets!'))
		return list(self._cache_dataset)
Example #57
0
	def _processFI(self, block, idxRuns, idxLumi):
		for fi in block[DataProvider.FileList]:
			if (not self._lumi_filter.empty()) and not self._acceptLumi(block, fi, idxRuns, idxLumi):
				continue
			if (self._lumi_keep == LumiKeep.Run) and (idxLumi is not None):
				if idxRuns is not None:
					fi[DataProvider.Metadata][idxRuns] = list(set(fi[DataProvider.Metadata][idxRuns]))
				fi[DataProvider.Metadata].pop(idxLumi)
			elif self._lumi_keep == LumiKeep.none:
				removeRunLumi(fi[DataProvider.Metadata], idxRuns, idxLumi)
			yield fi
	def process(self, pNum, splitInfo, result):
		for idx, mkey in enumerate(splitInfo.get(DataSplitter.MetadataHeader, [])):
			if mkey in self._metadata:
				def getMetadataProtected(x):
					if idx < len(x):
						return x[idx]
				tmp = set(imap(getMetadataProtected, splitInfo[DataSplitter.Metadata]))
				if len(tmp) == 1:
					value = tmp.pop()
					if value is not None:
						result[mkey] = value