Example #1
0
	def _getResyncSource(self, psource_old, psource_new, mapJob2PID, pAdded, pMissing, disableNewPNum):
		# Construct complete parameter space psource with missing parameter entries and intervention state
		# NNNNNNNNNNNNN OOOOOOOOO | source: NEW (==self) and OLD (==from file)
		# <same><added> <missing> | same: both in NEW and OLD, added: only in NEW, missing: only in OLD
		oldMaxJobs = psource_old.getMaxJobs()
		# assign sequential job numbers to the added parameter entries
		sort_inplace(pAdded, key = itemgetter('GC_PARAM'))
		for (idx, entry) in enumerate(pAdded):
			if oldMaxJobs + idx != entry['GC_PARAM']:
				mapJob2PID[oldMaxJobs + idx] = entry['GC_PARAM']

		missingInfos = []
		newMaxJobs = psource_new.getMaxJobs()
		sort_inplace(pMissing, key = itemgetter('GC_PARAM'))
		for (idx, entry) in enumerate(pMissing):
			mapJob2PID[entry['GC_PARAM']] = newMaxJobs + idx
			tmp = psource_old.getJobInfo(newMaxJobs + idx, entry['GC_PARAM'])
			tmp.pop('GC_PARAM')
			if tmp[ParameterInfo.ACTIVE]:
				tmp[ParameterInfo.ACTIVE] = False
				disableNewPNum.add(newMaxJobs + idx)
			missingInfos.append(tmp)

		if missingInfos:
			return self._createAggregatedSource(psource_old, psource_new, missingInfos)
		return self._source
Example #2
0
def get_dataset_info(opts, args, query_blocks=True):
    config = get_dataset_config(opts, args)
    if opts.threads is not None:
        config.set_int('dataprovider thread max', int(opts.threads) or 1)
    provider = config.get_composited_plugin(
        'dataset',
        cls=DataProvider,
        bind_kwargs={'provider_name_default': config.get('dataset provider')},
        default_compositor=':ThreadedMultiDatasetProvider:')
    dataset_list = sorted(provider.get_dataset_name_list())
    if len(dataset_list) == 0:
        raise DatasetError('No datasets matched!')

    # Query blocks only if needed
    query_blocks = False
    for option in opts.__dict__:
        if option.startswith('list_') and (option != 'list_dataset_names') or (
                option == 'save'):
            if getattr(opts, option):
                query_blocks = True

    block_list = None
    if query_blocks:
        block_list = provider.get_block_list_cached(show_stats=False)
        if len(block_list) == 0:
            raise DatasetError('No blocks matched!')
        if opts.ordered:
            sort_inplace(block_list,
                         key=itemgetter(DataProvider.Dataset,
                                        DataProvider.BlockName))
            for block in block_list:
                sort_inplace(block[DataProvider.FileList],
                             key=itemgetter(DataProvider.URL))
    return (provider, dataset_list, block_list)
Example #3
0
    def process(self, blockIter):
        if self._sortDS:
            dsCache = {}
            for block in blockIter:
                dsCache.setdefault(block[DataProvider.Dataset],
                                   []).append(block)

            def ds_generator():
                for ds in sorted(dsCache):
                    if self._sortBlock:
                        sort_inplace(dsCache[ds],
                                     key=itemgetter(DataProvider.BlockName))
                    for block in dsCache[ds]:
                        yield block

            blockIter = ds_generator()
        elif self._sortBlock:
            blockIter = sorted(blockIter,
                               key=itemgetter(DataProvider.BlockName))
        # Yield blocks
        for block in blockIter:
            if self._sortFiles:
                sort_inplace(block[DataProvider.FileList],
                             key=itemgetter(DataProvider.URL))
            if self._sortLocation:
                sort_inplace(block[DataProvider.Locations])
            yield block
def get_dataset_info(opts, args, query_blocks=True):
	config = get_dataset_config(opts, args)
	provider = config.get_composited_plugin('dataset', cls=DataProvider,
		bind_kwargs={'provider_name_default': config.get('dataset provider')},
		default_compositor=':ThreadedMultiDatasetProvider:')  # -T disables multi-threading further below
	dataset_list = sorted(provider.get_dataset_name_list())
	if len(dataset_list) == 0:
		raise DatasetError('No datasets matched!')

	# Query blocks only if needed
	query_blocks = False
	for option in opts.__dict__:
		if option.startswith('list_') and (option != 'list_dataset_names') or (option == 'save'):
			if getattr(opts, option):
				query_blocks = True

	block_list = None
	if query_blocks:
		block_list = provider.get_block_list_cached(show_stats=False)
		if len(block_list) == 0:
			raise DatasetError('No blocks matched!')
		if opts.ordered:
			sort_inplace(block_list, key=itemgetter(DataProvider.Dataset, DataProvider.BlockName))
			for block in block_list:
				sort_inplace(block[DataProvider.FileList], key=itemgetter(DataProvider.URL))
	return (provider, dataset_list, block_list)
Example #5
0
        def _handle_matching_block(block_list_added, block_list_missing,
                                   block_list_matching, block_old, block_new):
            # Compare different files according to their name - NOT full content
            get_file_key = itemgetter(DataProvider.URL)
            sort_inplace(block_old[DataProvider.FileList], key=get_file_key)
            sort_inplace(block_new[DataProvider.FileList], key=get_file_key)

            def _handle_matching_fi(fi_list_added, fi_list_missing,
                                    fi_list_matched, fi_old, fi_new):
                fi_list_matched.append((fi_old, fi_new))

            (fi_list_added, fi_list_missing,
             fi_list_matched) = get_list_difference(
                 block_old[DataProvider.FileList],
                 block_new[DataProvider.FileList],
                 get_file_key,
                 _handle_matching_fi,
                 is_sorted=True)
            if fi_list_added:  # Create new block for added files in an existing block
                block_added = copy.copy(block_new)
                block_added[DataProvider.FileList] = fi_list_added
                block_added[DataProvider.NEntries] = sum(
                    imap(itemgetter(DataProvider.NEntries), fi_list_added))
                block_list_added.append(block_added)
            block_list_matching.append(
                (block_old, block_new, fi_list_missing, fi_list_matched))
def save_dataset(opts, provider):
	print('')
	blocks = provider.getBlocks()
	if opts.ordered:
		sort_inplace(blocks, key = itemgetter(DataProvider.Dataset, DataProvider.BlockName))
		for b in blocks:
			sort_inplace(b[DataProvider.FileList], key = itemgetter(DataProvider.URL))
	DataProvider.saveToFile(opts.save, blocks)
	print('Dataset information saved to ./%s' % opts.save)
	def _finish_partition(self, block, partition, fi_list=None):
		# Copy infos from block
		for (dp_prop, ds_prop) in self._dp_ds_prop_list:
			if dp_prop in block:
				partition[ds_prop] = block[dp_prop]
		if DataProvider.Metadata in block:
			partition[DataSplitter.MetadataHeader] = block[DataProvider.Metadata]
		# Helper for very simple splitter
		if fi_list:
			partition[DataSplitter.FileList] = lmap(itemgetter(DataProvider.URL), fi_list)
			partition[DataSplitter.NEntries] = sum(imap(itemgetter(DataProvider.NEntries), fi_list))
			if DataProvider.Metadata in block:
				partition[DataSplitter.Metadata] = lmap(itemgetter(DataProvider.Metadata), fi_list)
		return partition
Example #8
0
 def _iter_blocks_by_dataset(self, map_dataset2block_list):
     for dataset_name in sorted(map_dataset2block_list):
         if self._sort_block:
             sort_inplace(map_dataset2block_list[dataset_name],
                          key=itemgetter(DataProvider.BlockName))
         for block in map_dataset2block_list[dataset_name]:
             yield block
	def __init__(self, block_list_old, block_list_new):
		activity = Activity('Performing resynchronization of dataset')
		block_resync_tuple = DataProvider.resync_blocks(block_list_old, block_list_new)
		(self.block_list_added, self._block_list_missing, self._block_list_matching) = block_resync_tuple
		for block_missing in self._block_list_missing:  # Files in matching blocks are already sorted
			sort_inplace(block_missing[DataProvider.FileList], key=itemgetter(DataProvider.URL))
		activity.finish()
Example #10
0
	def _getBlocksInternal(self):
		# Split files into blocks/datasets via key functions and determine metadata intersection
		(protoBlocks, commonDS, commonB) = ({}, {}, {})
		def getActiveKeys(kUser, kGuard, gIdx):
			return kUser + (kGuard or lchain(imap(lambda x: x.getGuards()[gIdx], self._scanner)))
		keysDS = getActiveKeys(self._ds_keys_user, self._ds_keys_guard, 0)
		keysB = getActiveKeys(self._b_keys_user, self._b_keys_guard, 1)
		for fileInfo in ifilter(itemgetter(0), self._collectFiles()):
			hashDS = self._generateKey(keysDS, md5_hex(repr(self._datasetExpr)) + md5_hex(repr(self._datasetNick)), *fileInfo)
			hashB = self._generateKey(keysB, hashDS + md5_hex(repr(fileInfo[3])), *fileInfo) # [3] == SE list
			if not self._ds_select or (hashDS in self._ds_select):
				if not self._b_select or (hashB in self._b_select):
					fileInfo[1].update({'DS_KEY': hashDS, 'BLOCK_KEY': hashB})
					protoBlocks.setdefault(hashDS, {}).setdefault(hashB, []).append(fileInfo)
					utils.intersectDict(commonDS.setdefault(hashDS, dict(fileInfo[1])), fileInfo[1])
					utils.intersectDict(commonB.setdefault(hashDS, {}).setdefault(hashB, dict(fileInfo[1])), fileInfo[1])

		# Generate names for blocks/datasets using common metadata
		(hashNameDictDS, hashNameDictB) = ({}, {})
		for hashDS in protoBlocks:
			hashNameDictDS[hashDS] = self._generateDatasetName(hashDS, commonDS[hashDS])
			for hashB in protoBlocks[hashDS]:
				hashNameDictB[hashB] = (hashDS, self._generateBlockName(hashB, commonB[hashDS][hashB]))

		self._findCollision('dataset', hashNameDictDS, commonDS, keysDS, lambda name, key: [key])
		self._findCollision('block', hashNameDictB, commonB, keysDS + keysB, lambda name, key: [name[0], key], lambda name: name[1])

		for block in self._buildBlocks(protoBlocks, hashNameDictDS, hashNameDictB):
			yield block
Example #11
0
 def iter_blocks_normed(self):
     activity = Activity('Retrieving %s' % self._dataset_expr)
     try:
         # Validation, Naming:
         for block in self._iter_blocks_raw():
             if not block.get(DataProvider.Dataset):
                 raise DatasetError(
                     'Block does not contain the dataset name!')
             block.setdefault(DataProvider.BlockName, '0')
             block.setdefault(DataProvider.Provider,
                              self.__class__.__name__)
             block.setdefault(DataProvider.Query, self._dataset_expr)
             block.setdefault(DataProvider.Locations, None)
             events = sum(
                 imap(itemgetter(DataProvider.NEntries),
                      block[DataProvider.FileList]))
             block.setdefault(DataProvider.NEntries, events)
             if self._dataset_nick_override:
                 block[DataProvider.Nickname] = self._dataset_nick_override
             elif self._nick_producer:
                 block = self._nick_producer.process_block(block)
                 if not block:
                     raise DatasetError('Nickname producer failed!')
             yield block
     except Exception:
         raise DatasetRetrievalError('Unable to retrieve dataset %s' %
                                     repr(self._dataset_expr))
     activity.finish()
Example #12
0
	def _build_blocks(self, map_key2fm_list, map_key2name, map_key2metadata_dict):
		# Return named dataset
		for key in sorted(map_key2fm_list):
			result = {
				DataProvider.Dataset: map_key2name[key[:1]],
				DataProvider.BlockName: map_key2name[key[:2]],
			}
			fm_list = map_key2fm_list[key]

			# Determine location_list
			location_list = None
			for file_location_list in ifilter(lambda s: s is not None, imap(itemgetter(3), fm_list)):
				location_list = location_list or []
				location_list.extend(file_location_list)
			if location_list is not None:
				result[DataProvider.Locations] = list(UniqueList(location_list))

			# use first file [0] to get the initial metadata_dict [1]
			metadata_name_list = list(fm_list[0][1].keys())
			result[DataProvider.Metadata] = metadata_name_list

			# translate file metadata into data provider file info entries
			def _translate_fm2fi(url, metadata_dict, entries, location_list, obj_dict):
				if entries is None:
					entries = -1
				return {DataProvider.URL: url, DataProvider.NEntries: entries,
					DataProvider.Metadata: lmap(metadata_dict.get, metadata_name_list)}
			result[DataProvider.FileList] = lsmap(_translate_fm2fi, fm_list)
			yield result
Example #13
0
	def _diffParams(self, psource_old, psource_new, mapJob2PID, redoNewPNum, disableNewPNum):
		# Reduces psource output to essential information for diff - faster than keying
		def translatePSource(psource):
			keys_store = sorted(ifilter(lambda k: not k.untracked, psource.getJobKeys()))
			def translateEntry(meta): # Translates parameter setting into hash
				tmp = md5()
				for key in ifilter(lambda k: k in meta, keys_store):
					if str(meta[key]):
						tmp.update(str2bytes(key))
						tmp.update(str2bytes(str(meta[key])))
				return { ParameterInfo.HASH: tmp.hexdigest(), 'GC_PARAM': meta['GC_PARAM'],
					ParameterInfo.ACTIVE: meta[ParameterInfo.ACTIVE] }
			for entry in psource.iterJobs():
				yield translateEntry(entry)

		params_old = list(translatePSource(psource_old))
		params_new = list(translatePSource(psource_new))

		def sameParams(paramsAdded, paramsMissing, paramsSame, oldParam, newParam):
			mapJob2PID[oldParam['GC_PARAM']] = newParam['GC_PARAM']
			if not oldParam[ParameterInfo.ACTIVE] and newParam[ParameterInfo.ACTIVE]:
				redoNewPNum.add(newParam['GC_PARAM'])
			if oldParam[ParameterInfo.ACTIVE] and not newParam[ParameterInfo.ACTIVE]:
				disableNewPNum.add(newParam['GC_PARAM'])
		return utils.DiffLists(params_old, params_new, itemgetter(ParameterInfo.HASH), sameParams)
Example #14
0
    def _processReplicas(self, blockPath, replica_infos):
        def empty_with_warning(*args):
            self._log.warning(*args)
            return []

        def expanded_replica_locations(replica_infos):
            for replica_info in replica_infos:
                for entry in self._replicaLocation(replica_info):
                    yield entry

        if not replica_infos:
            return empty_with_warning(
                'Dataset block %r has no replica information!', blockPath)
        replica_infos_selected = self._phedexFilter.filterList(
            replica_infos, key=itemgetter(0))
        if not replica_infos_selected:
            return empty_with_warning(
                'Dataset block %r is not available at the selected locations!\nAvailable locations: %s',
                blockPath, str.join(', ', self._fmtLocations(replica_infos)))
        if not self._onlyComplete:
            return list(expanded_replica_locations(replica_infos_selected))
        replica_infos_complete = lfilter(lambda nn_nh_c: nn_nh_c[2],
                                         replica_infos_selected)
        if not replica_infos_complete:
            return empty_with_warning(
                'Dataset block %r is not completely available at the selected locations!\nAvailable locations: %s',
                blockPath, str.join(', ', self._fmtLocations(replica_infos)))
        return list(expanded_replica_locations(replica_infos_complete))
Example #15
0
	def display(self):
		stateMap = dict(self._stateMap)

		def transform(data, label, level):
			if None in data:
				total = data.pop(None)
				if (len(data) > 1):
					for result in self._get_entry(stateMap, total, ['Total']):
						yield result
					yield '='
			for idx, entry in enumerate(sorted(data)):
				if level == 1:
					for result in self._get_entry(stateMap, data[entry], [entry] + label):
						yield result
				else:
					for result in transform(data[entry], [entry] + label, level - 1):
						yield result
				if idx != len(data) - 1:
					yield '-'
		stats = self._getHierachicalStats()
		displayStates = lmap(itemgetter(1), self._stateMap)
		header = [('', 'Category')] + lzip(displayStates, displayStates)
		printTabular(header, transform(stats, [], len(self._idxList)),
			fmtString = 'l' + 'c'*len(stateMap), fmt = {'': lambda x: str.join(' ', x)})
		return 0
Example #16
0
 def _get_entries_for_url(url):
     fi = _fast_search(block_old[DataProvider.FileList],
                       itemgetter(DataProvider.URL), url)
     if not fi:
         raise Exception('url %s not found in block %s\n%s' %
                         (url, block_old, partition))
     return fi[DataProvider.NEntries]
Example #17
0
	def __init__(self, name, container_old, container_cur, parent=None,
			set_sections=unspecified, add_sections=None,
			set_names=unspecified, add_names=None,
			set_tags=unspecified, add_tags=None,
			set_classes=unspecified, add_classes=None, inherit_sections=False):
		parent = parent or self
		if inherit_sections and isinstance(parent, TaggedConfigView):
			add_sections = (parent.get_class_section_list() or []) + (add_sections or [])
		SimpleConfigView.__init__(self, name, container_old, container_cur, parent,
			set_sections=set_sections, add_sections=add_sections)

		self._class_section_list = self._init_variable(parent, '_class_section_list', None,
			set_classes, add_classes, norm_config_locations, lambda x: x.config_section_list)
		self._section_name_list = self._init_variable(parent, '_section_name_list', [],
			set_names, add_names, norm_config_locations)

		def _get_tag_tuple(tag_obj):
			try:
				config_tag_name = tag_obj.config_tag_name.lower()
			except Exception:
				raise APIError('Class %r does not define a valid tag name!' % tag_obj.__class__.__name__)
			return [(config_tag_name, tag_obj.get_object_name().lower())]
		self._section_tag_list = self._init_variable(parent, '_section_tag_list', [],
			set_tags, add_tags, identity, _get_tag_tuple)
		self._section_tag_order = lmap(itemgetter(0), self._section_tag_list)
Example #18
0
	def _addEntry(self, container, section, option, value, source):
		option = option.strip()
		opttype = '='
		if option[-1] in imap(itemgetter(0), ConfigEntry.OptTypeDesc.keys()):
			opttype = option[-1] + '='
			option = option[:-1].strip()
		container.append(ConfigEntry(section.strip(), option, value.strip(), opttype, source))
Example #19
0
    def show_report(self, job_db, jobnum_list):
        state_map = dict(self._state_map)

        def _transform(data, label, level):
            if None in data:
                total = data.pop(None)
                if len(data) > 1:
                    for result in self._get_entry(state_map, total, ['Total']):
                        yield result
                    yield '='
            for idx, entry in enumerate(sorted(data)):
                if level == 1:
                    for result in self._get_entry(state_map, data[entry],
                                                  [entry] + label):
                        yield result
                else:
                    for result in _transform(data[entry], [entry] + label,
                                             level - 1):
                        yield result
                if idx != len(data) - 1:
                    yield '-'

        stats = self._get_hierachical_stats_dict(job_db, jobnum_list)
        displace_states_list = lmap(itemgetter(1), self._state_map)
        header = [('', 'Category')] + lzip(displace_states_list,
                                           displace_states_list)
        self._show_table(header,
                         _transform(stats, [], len(self._idx_list)),
                         align_str='l' + 'c' * len(state_map),
                         fmt_dict={'': lambda x: str.join(' ', x)})
def main(opts, args):
	config = get_dataset_config(opts, args)

	provider = config.getPlugin('dataset', cls = DataProvider)
	blocks = provider.getBlocks()
	if len(blocks) == 0:
		raise DatasetError('No blocks!')

	datasets = set(imap(itemgetter(DataProvider.Dataset), blocks))
	if len(datasets) > 1 or opts.info:
		headerbase = [(DataProvider.Dataset, 'Dataset')]
	else:
		print('Dataset: %s' % blocks[0][DataProvider.Dataset])
		headerbase = []

	if opts.list_datasets:
		list_datasets(blocks)
	if opts.list_blocks:
		list_blocks(blocks, headerbase)
	if opts.list_files:
		list_files(datasets, blocks)
	if opts.list_storage:
		list_storage(blocks, headerbase)
	if opts.metadata and not opts.save:
		list_metadata(datasets, blocks)
	if opts.block_metadata and not opts.save:
		list_block_metadata(datasets, blocks)
	if opts.config_entry:
		list_config_entries(opts, blocks, provider)
	if opts.info:
		list_infos(blocks)
	if opts.save:
		save_dataset(opts, provider)
Example #21
0
 def ds_generator():
     for ds in sorted(dsCache):
         if self._sortBlock:
             sort_inplace(dsCache[ds],
                          key=itemgetter(DataProvider.BlockName))
         for block in dsCache[ds]:
             yield block
Example #22
0
 def _resync_files(self, splitter, partition_mod, partition_num, size_list,
                   fi_list_missing, fi_list_matched, block_new,
                   metadata_setup_list, partition_list_added):
     # resync a single file in the partition, return next file index to process
     # Select processing mode for job (disable > complete > changed > ignore)
     #   [ie. disable overrides all] using min
     # Result: one of [disable, complete, ignore] (changed -> complete or igore)
     fi_idx = 0
     metadata_list_current = []
     proc_mode = ResyncMode.ignore
     while fi_idx < len(partition_mod[DataSplitter.FileList]):
         fi_removed = _fast_search(
             fi_list_missing, itemgetter(DataProvider.URL),
             partition_mod[DataSplitter.FileList][fi_idx])
         if fi_removed:
             proc_mode = self._handle_removed_file(proc_mode, fi_idx,
                                                   partition_mod, size_list,
                                                   fi_removed)
         else:
             (proc_mode, fi_idx) = self._handle_changed_file(
                 splitter, proc_mode, fi_idx, partition_mod, partition_num,
                 size_list, block_new, partition_list_added,
                 fi_list_matched, metadata_list_current,
                 metadata_setup_list)
     return (proc_mode, metadata_list_current)
Example #23
0
    def _process_replica_list(self, block_path, replica_infos):
        def _empty_with_warning(error_msg, *args):
            self._log.warning('Dataset block %r ' + error_msg, block_path,
                              *args)
            return []

        def _expanded_replica_locations(replica_infos):
            for replica_info in replica_infos:
                for entry in self._iter_replica_locations(replica_info):
                    yield entry

        if not replica_infos:
            return _empty_with_warning('has no replica information!')
        replica_infos_selected = self._phedex_filter.filter_list(
            replica_infos, key=itemgetter(0))
        if not replica_infos_selected:
            return _empty_with_warning(
                'is not available at the selected locations!\n' +
                'Available locations: %s',
                str.join(', ', self._iter_formatted_locations(replica_infos)))
        if not self._only_complete:
            return list(_expanded_replica_locations(replica_infos_selected))
        replica_infos_complete = lfilter(lambda nn_nh_c: nn_nh_c[2],
                                         replica_infos_selected)
        if not replica_infos_complete:
            return _empty_with_warning(
                'is not completely available at the selected locations!\n' +
                'Available locations: %s',
                str.join(', ', self._iter_formatted_locations(replica_infos)))
        return list(_expanded_replica_locations(replica_infos_complete))
Example #24
0
	def process_block(self, block):
		# Check entry consistency
		events = sum(imap(itemgetter(DataProvider.NEntries), block[DataProvider.FileList]))
		if block.setdefault(DataProvider.NEntries, events) != events:
			error_msg = 'Inconsistency in block %s: Number of events doesn\'t match (b:%d != f:%d)'
			error_msg = error_msg % (DataProvider.get_block_id(block), block[DataProvider.NEntries], events)
			self._handle_error(error_msg, self._mode)
		return block
Example #25
0
	def _get_js_class_infos(self):
		job_class_list = [('AT WMS', JobClass.ATWMS), ('RUNNING', JobClass.RUNNING),
			('FAILING', JobClass.FAILING), ('SUCCESS', JobClass.SUCCESS)]
		state_class_map = {}
		for (job_class_name, job_class) in job_class_list:
			for job_state in job_class.state_list:
				state_class_map[job_state] = job_class_name
		return (state_class_map, lmap(itemgetter(0), job_class_list))
Example #26
0
def _translate_pa2pspi_list(padapter):
	# Reduces parameter adapter output to essential information for diff - faster than keying
	meta_iter = ifilter(lambda k: not k.untracked, padapter.get_job_metadata())
	meta_list = sorted(meta_iter, key=lambda k: k.value)

	for psp in padapter.iter_jobs():  # Translates parameter space point into hash
		psp_item_iter = imap(lambda meta: (meta.value, psp.get(meta.value)), meta_list)
		hash_str = md5_hex(repr(lfilter(itemgetter(1), psp_item_iter)))
		yield (psp[ParameterInfo.ACTIVE], hash_str, psp['GC_PARAM'])
Example #27
0
 def process(self, block_iter):
     if self._sort_ds:
         map_dataset2block_list = {}
         for block in block_iter:
             map_dataset2block_list.setdefault(block[DataProvider.Dataset],
                                               []).append(block)
         block_iter = self._iter_blocks_by_dataset(map_dataset2block_list)
     elif self._sort_block:
         block_iter = sorted(block_iter,
                             key=itemgetter(DataProvider.BlockName))  # pylint:disable=redefined-variable-type
     # Yield blocks
     for block in block_iter:
         if self._sort_files:
             sort_inplace(block[DataProvider.FileList],
                          key=itemgetter(DataProvider.URL))
         if self._sort_location:
             sort_inplace(block[DataProvider.Locations])
         yield block
Example #28
0
 def _reduce_fn_list(self, block, fn_list_limit_map):
     dataset_name = block[DataProvider.Dataset]
     fn_list_limit = fn_list_limit_map[dataset_name]
     fi_list_removed = block[DataProvider.FileList][fn_list_limit:]
     nentry_removed_iter = imap(itemgetter(DataProvider.NEntries),
                                fi_list_removed)
     block[DataProvider.NEntries] -= sum(nentry_removed_iter)
     block[DataProvider.FileList] = block[
         DataProvider.FileList][:fn_list_limit]
     fn_list_limit_map[dataset_name] -= len(block[DataProvider.FileList])
Example #29
0
 def __init__(self, block_list_old, block_list_new):
     activity = Activity('Performing resynchronization of dataset')
     block_resync_tuple = DataProvider.resync_blocks(
         block_list_old, block_list_new)
     (self.block_list_added, self._block_list_missing,
      self._block_list_matching) = block_resync_tuple
     for block_missing in self._block_list_missing:  # Files in matching blocks are already sorted
         sort_inplace(block_missing[DataProvider.FileList],
                      key=itemgetter(DataProvider.URL))
     activity.finish()
Example #30
0
	def __init__(self, config):
		DataProcessor.__init__(self, config)
		internal_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dataprocessor'])
		internal_config.set('dataset processor', 'NullDataProcessor')
		self._url_filter = config.getFilter(['dataset ignore files', 'dataset ignore urls'], '', negate = True,
			filterParser = lambda value: self._parseFilter(internal_config, value),
			filterStr = lambda value: str.join('\n', value.split()),
			matchKey = itemgetter(DataProvider.URL),
			defaultMatcher = 'blackwhite', defaultFilter = 'weak',
			onChange = DataProcessor.triggerDataResync)
Example #31
0
 def _get_js_class_infos(self):
     job_class_list = [('AT WMS', JobClass.ATWMS),
                       ('RUNNING', JobClass.RUNNING),
                       ('FAILING', JobClass.FAILING),
                       ('SUCCESS', JobClass.SUCCESS)]
     state_class_map = {}
     for (job_class_name, job_class) in job_class_list:
         for job_state in job_class.state_list:
             state_class_map[job_state] = job_class_name
     return (state_class_map, lmap(itemgetter(0), job_class_list))
Example #32
0
	def _add_entry(self, container, section, option, value, source):
		opttype = '='
		try:
			option = option.strip()
			if option[-1] in imap(itemgetter(0), ConfigEntry.map_opt_type2desc.keys()):
				opttype = option[-1] + '='
				option = option[:-1].strip()
			container.append(ConfigEntry(section.strip(), option, value.strip(), opttype, source))
		except Exception:
			raise ConfigError('Unable to register config value [%s] %s %s %s (from %s)' % (
				section, option, opttype, value, source))
Example #33
0
 def process_block(self, block):
     # Check entry consistency
     events = sum(
         imap(itemgetter(DataProvider.NEntries),
              block[DataProvider.FileList]))
     if block.setdefault(DataProvider.NEntries, events) != events:
         error_msg = 'Inconsistency in block %s: Number of events doesn\'t match (b:%d != f:%d)'
         error_msg = error_msg % (DataProvider.get_block_id(block),
                                  block[DataProvider.NEntries], events)
         self._handle_error(error_msg, self._mode)
     return block
Example #34
0
	def process(self, blockIter):
		if self._sortDS:
			dsCache = {}
			for block in blockIter:
				dsCache.setdefault(block[DataProvider.Dataset], []).append(block)
			def ds_generator():
				for ds in sorted(dsCache):
					if self._sortBlock:
						sort_inplace(dsCache[ds], key = itemgetter(DataProvider.BlockName))
					for block in dsCache[ds]:
						yield block
			blockIter = ds_generator()
		elif self._sortBlock:
			blockIter = sorted(blockIter, key = itemgetter(DataProvider.BlockName))
		# Yield blocks
		for block in blockIter:
			if self._sortFiles:
				sort_inplace(block[DataProvider.FileList], key = itemgetter(DataProvider.URL))
			if self._sortLocation:
				sort_inplace(block[DataProvider.Locations])
			yield block
Example #35
0
    def _getBlocksInternal(self):
        # Split files into blocks/datasets via key functions and determine metadata intersection
        (protoBlocks, commonDS, commonB) = ({}, {}, {})

        def getActiveKeys(kUser, kGuard, gIdx):
            return kUser + (kGuard or lchain(
                imap(lambda x: x.getGuards()[gIdx], self._scanner)))

        keysDS = getActiveKeys(self._ds_keys_user, self._ds_keys_guard, 0)
        keysB = getActiveKeys(self._b_keys_user, self._b_keys_guard, 1)
        for fileInfo in ifilter(itemgetter(0), self._collectFiles()):
            hashDS = self._generateKey(
                keysDS,
                md5_hex(repr(self._datasetExpr)) +
                md5_hex(repr(self._datasetNick)), *fileInfo)
            hashB = self._generateKey(keysB,
                                      hashDS + md5_hex(repr(fileInfo[3])),
                                      *fileInfo)  # [3] == SE list
            if not self._ds_select or (hashDS in self._ds_select):
                if not self._b_select or (hashB in self._b_select):
                    fileInfo[1].update({'DS_KEY': hashDS, 'BLOCK_KEY': hashB})
                    protoBlocks.setdefault(hashDS,
                                           {}).setdefault(hashB,
                                                          []).append(fileInfo)
                    utils.intersectDict(
                        commonDS.setdefault(hashDS, dict(fileInfo[1])),
                        fileInfo[1])
                    utils.intersectDict(
                        commonB.setdefault(hashDS, {}).setdefault(
                            hashB, dict(fileInfo[1])), fileInfo[1])

        # Generate names for blocks/datasets using common metadata
        (hashNameDictDS, hashNameDictB) = ({}, {})
        for hashDS in protoBlocks:
            hashNameDictDS[hashDS] = self._generateDatasetName(
                hashDS, commonDS[hashDS])
            for hashB in protoBlocks[hashDS]:
                hashNameDictB[hashB] = (hashDS,
                                        self._generateBlockName(
                                            hashB, commonB[hashDS][hashB]))

        self._findCollision('dataset', hashNameDictDS, commonDS, keysDS,
                            lambda name, key: [key])
        self._findCollision('block', hashNameDictB, commonB, keysDS + keysB,
                            lambda name, key: [name[0], key],
                            lambda name: name[1])

        for block in self._buildBlocks(protoBlocks, hashNameDictDS,
                                       hashNameDictB):
            yield block
Example #36
0
def _diff_pspi_list(pa_old, pa_new, result_redo, result_disable):
	map_jobnum2pnum = {}

	def _handle_matching_pspi(pspi_list_added, pspi_list_missing, pspi_list_same, pspi_old, pspi_new):
		map_jobnum2pnum[pspi_old[TrackingInfo.pnum]] = pspi_new[TrackingInfo.pnum]
		if not pspi_old[TrackingInfo.ACTIVE] and pspi_new[TrackingInfo.ACTIVE]:
			result_redo.add(pspi_new[TrackingInfo.pnum])
		if pspi_old[TrackingInfo.ACTIVE] and not pspi_new[TrackingInfo.ACTIVE]:
			result_disable.add(pspi_new[TrackingInfo.pnum])
	# pspi_list_changed is ignored, since it is already processed by the change handler above
	(pspi_list_added, pspi_list_missing, _) = get_list_difference(
		_translate_pa2pspi_list(pa_old), _translate_pa2pspi_list(pa_new),
		itemgetter(TrackingInfo.HASH), _handle_matching_pspi)
	return (map_jobnum2pnum, pspi_list_added, pspi_list_missing)
Example #37
0
	def _resyncFiles(self, modSI, jobNum, sizeInfo, filesMissing, filesMatched, newBlock, metaIdxLookup, extended):
		# Select processing mode for job (disable > complete > changed > ignore) [ie. disable overrides all] using min
		# Result: one of [disable, complete, ignore] (changed -> complete or igore)
		idx = 0
		newMetadata = []
		procMode = ResyncMode.ignore
		while idx < len(modSI[DataSplitter.FileList]):
			rmFI = fast_search(filesMissing, itemgetter(DataProvider.URL), modSI[DataSplitter.FileList][idx])
			if rmFI:
				procMode = min(procMode, self._resyncRemovedFile(idx, modSI, sizeInfo, rmFI))
			else:
				(oldFI, newFI) = fast_search(filesMatched, lambda x: x[0][DataProvider.URL], modSI[DataSplitter.FileList][idx])
				(procMode, idx) = self._resyncChangedFile(procMode, idx, modSI, jobNum, sizeInfo, newBlock, extended, oldFI, newFI, newMetadata, metaIdxLookup)
		return (procMode, newMetadata)
Example #38
0
 def _add_entry(self, container, section, option, value, source):
     opttype = '='
     try:
         option = option.strip()
         if option[-1] in imap(itemgetter(0),
                               ConfigEntry.map_opt_type2desc.keys()):
             opttype = option[-1] + '='
             option = option[:-1].strip()
         container.append(
             ConfigEntry(section.strip(), option, value.strip(), opttype,
                         source))
     except Exception:
         raise ConfigError(
             'Unable to register config value [%s] %s %s %s (from %s)' %
             (section, option, opttype, value, source))
Example #39
0
    def __init__(self,
                 name,
                 container_old,
                 container_cur,
                 parent=None,
                 set_sections=unspecified,
                 add_sections=None,
                 set_names=unspecified,
                 add_names=None,
                 set_tags=unspecified,
                 add_tags=None,
                 set_classes=unspecified,
                 add_classes=None,
                 inherit_sections=False):
        parent = parent or self
        if inherit_sections and isinstance(parent, TaggedConfigView):
            add_sections = (parent.get_class_section_list()
                            or []) + (add_sections or [])
        SimpleConfigView.__init__(self,
                                  name,
                                  container_old,
                                  container_cur,
                                  parent,
                                  set_sections=set_sections,
                                  add_sections=add_sections)

        self._class_section_list = self._init_variable(
            parent, '_class_section_list', None, set_classes, add_classes,
            norm_config_locations, lambda x: x.config_section_list)
        self._section_name_list = self._init_variable(parent,
                                                      '_section_name_list', [],
                                                      set_names, add_names,
                                                      norm_config_locations)

        def _get_tag_tuple(tag_obj):
            try:
                config_tag_name = tag_obj.config_tag_name.lower()
            except Exception:
                raise APIError('Class %r does not define a valid tag name!' %
                               tag_obj.__class__.__name__)
            return [(config_tag_name, tag_obj.get_object_name().lower())]

        self._section_tag_list = self._init_variable(parent,
                                                     '_section_tag_list', [],
                                                     set_tags, add_tags,
                                                     identity, _get_tag_tuple)
        self._section_tag_order = lmap(itemgetter(0), self._section_tag_list)
Example #40
0
	def _iter_blocks_raw(self):
		# Handling dataset and block information separately leads to nasty, nested code
		(map_key2fm_list, map_key2metadata_dict) = ({}, {})
		self._assign_dataset_block(map_key2fm_list, map_key2metadata_dict,
			ifilter(itemgetter(0), self._iter_file_infos()))
		# Generate names for blocks/datasets using common metadata - creating map id -> name
		map_key2name = {}
		for (key, metadata_dict) in map_key2metadata_dict.items():
			if len(key) == 1:
				map_key2name[key] = self._get_dataset_name(metadata_dict, hash_dataset=key[0])
			else:
				map_key2name[key] = self._get_block_name(metadata_dict, hash_block=key[1])
		# Check for bijective mapping id <-> name:
		self._check_map_name2key(map_key2name, map_key2metadata_dict)
		# Yield finished dataset blocks
		for block in self._build_blocks(map_key2fm_list, map_key2name, map_key2metadata_dict):
			yield block
Example #41
0
	def __init__(self, stream, screen):
		(self.stream, self.screen, self.logged) = (stream, screen, True)

		# This is a list of (regular expression, GUI attributes).  The
		# attributes are applied to matches of the regular expression in
		# the output written into this stream.  Lookahead expressions
		# should not overlap with other regular expressions.
		self.attrs = [
			(r'DONE(?!:)', [Console.COLOR_BLUE, Console.BOLD]),
			(r'FAILED(?!:)', [Console.COLOR_RED, Console.BOLD]),
			(r'SUCCESS(?!:)', [Console.COLOR_GREEN, Console.BOLD]),
			(r'(?<=DONE:)\s+[1-9]\d*', [Console.COLOR_BLUE, Console.BOLD]),
			(r'(?<=Failing jobs:)\s+[1-9]\d*', [Console.COLOR_RED, Console.BOLD]),
			(r'(?<=FAILED:)\s+[1-9]\d*', [Console.COLOR_RED, Console.BOLD]),
			(r'(?<=Successful jobs:)\s+[1-9]\d*', [Console.COLOR_GREEN, Console.BOLD]),
			(r'(?<=SUCCESS:)\s+[1-9]\d*', [Console.COLOR_GREEN, Console.BOLD]),
		]
		self.regex = re.compile('(%s)' % '|'.join(imap(itemgetter(0), self.attrs)))
Example #42
0
	def __init__(self, stream, console, lock):
		(self._stream, self._console, self.logged, self._log, self._lock) = (stream, console, True, [None] * 100, lock)

		# This is a list of (regular expression, GUI attributes).  The
		# attributes are applied to matches of the regular expression in
		# the output written into this stream.  Lookahead expressions
		# should not overlap with other regular expressions.
		attrs = [
			(r'DONE(?!:)', [Console.COLOR_BLUE, Console.BOLD]),
			(r'FAILED(?!:)', [Console.COLOR_RED, Console.BOLD]),
			(r'SUCCESS(?!:)', [Console.COLOR_GREEN, Console.BOLD]),
			(r'(?<=DONE:)\s+[1-9]\d*', [Console.COLOR_BLUE, Console.BOLD]),
			(r'(?<=Failing jobs:)\s+[1-9]\d*', [Console.COLOR_RED, Console.BOLD]),
			(r'(?<=FAILED:)\s+[1-9]\d*', [Console.COLOR_RED, Console.BOLD]),
			(r'(?<=Successful jobs:)\s+[1-9]\d*', [Console.COLOR_GREEN, Console.BOLD]),
			(r'(?<=SUCCESS:)\s+[1-9]\d*', [Console.COLOR_GREEN, Console.BOLD]),
		]
		self._match_any_attr = re.compile('(%s)' % '|'.join(imap(itemgetter(0), attrs)))
		self._attrs = lmap(lambda expr_attr: (re.compile(expr_attr[0]), expr_attr[1]), attrs)
Example #43
0
def _create_placeholder_psrc(pa_old, pa_new, map_jobnum2pnum, pspi_list_missing, result_disable):
	# Construct placeholder parameter source with missing parameter entries and intervention state
	psp_list_missing = []
	missing_pnum_start = pa_new.get_job_len()
	sort_inplace(pspi_list_missing, key=itemgetter(TrackingInfo.pnum))
	for (idx, pspi_missing) in enumerate(pspi_list_missing):
		map_jobnum2pnum[pspi_missing[TrackingInfo.pnum]] = missing_pnum_start + idx
		psp_missing = pa_old.get_job_content(missing_pnum_start + idx, pspi_missing[TrackingInfo.pnum])
		psp_missing.pop('GC_PARAM')
		if psp_missing[ParameterInfo.ACTIVE]:
			psp_missing[ParameterInfo.ACTIVE] = False
			result_disable.add(missing_pnum_start + idx)
		psp_list_missing.append(psp_missing)
	meta_list_new = pa_new.get_job_metadata()
	meta_name_list_new = lmap(lambda key: key.value, meta_list_new)
	meta_list_old = pa_old.get_job_metadata()
	meta_list_missing = lfilter(lambda key: key.value not in meta_name_list_new, meta_list_old)
	return ParameterSource.create_instance('InternalParameterSource',
		psp_list_missing, meta_list_missing)
	def _resync_files(self, splitter, partition_mod, partition_num, size_list,
			fi_list_missing, fi_list_matched, block_new, metadata_setup_list, partition_list_added):
		# resync a single file in the partition, return next file index to process
		# Select processing mode for job (disable > complete > changed > ignore)
		#   [ie. disable overrides all] using min
		# Result: one of [disable, complete, ignore] (changed -> complete or igore)
		fi_idx = 0
		metadata_list_current = []
		proc_mode = ResyncMode.ignore
		while fi_idx < len(partition_mod[DataSplitter.FileList]):
			fi_removed = _fast_search(fi_list_missing, itemgetter(DataProvider.URL),
				partition_mod[DataSplitter.FileList][fi_idx])
			if fi_removed:
				proc_mode = self._handle_removed_file(proc_mode, fi_idx,
					partition_mod, size_list, fi_removed)
			else:
				(proc_mode, fi_idx) = self._handle_changed_file(splitter, proc_mode, fi_idx,
					partition_mod, partition_num, size_list, block_new, partition_list_added,
					fi_list_matched, metadata_list_current, metadata_setup_list)
		return (proc_mode, metadata_list_current)
Example #45
0
	def _processReplicas(self, blockPath, replica_infos):
		def empty_with_warning(*args):
			self._log.warning(*args)
			return []
		def expanded_replica_locations(replica_infos):
			for replica_info in replica_infos:
				for entry in self._replicaLocation(replica_info):
					yield entry

		if not replica_infos:
			return empty_with_warning('Dataset block %r has no replica information!', blockPath)
		replica_infos_selected = self._phedexFilter.filterList(replica_infos, key = itemgetter(0))
		if not replica_infos_selected:
			return empty_with_warning('Dataset block %r is not available at the selected locations!\nAvailable locations: %s', blockPath,
				str.join(', ', self._fmtLocations(replica_infos)))
		if not self._onlyComplete:
			return list(expanded_replica_locations(replica_infos_selected))
		replica_infos_complete = lfilter(lambda nn_nh_c: nn_nh_c[2], replica_infos_selected)
		if not replica_infos_complete:
			return empty_with_warning('Dataset block %r is not completely available at the selected locations!\nAvailable locations: %s', blockPath,
				str.join(', ', self._fmtLocations(replica_infos)))
		return list(expanded_replica_locations(replica_infos_complete))
Example #46
0
	def _process_replica_list(self, block_path, replica_infos):
		def _empty_with_warning(error_msg, *args):
			self._log.warning('Dataset block %r ' + error_msg, block_path, *args)
			return []

		def _expanded_replica_locations(replica_infos):
			for replica_info in replica_infos:
				for entry in self._iter_replica_locations(replica_info):
					yield entry

		if not replica_infos:
			return _empty_with_warning('has no replica information!')
		replica_infos_selected = self._phedex_filter.filter_list(replica_infos, key=itemgetter(0))
		if not replica_infos_selected:
			return _empty_with_warning('is not available at the selected locations!\n' +
				'Available locations: %s', str.join(', ', self._iter_formatted_locations(replica_infos)))
		if not self._only_complete:
			return list(_expanded_replica_locations(replica_infos_selected))
		replica_infos_complete = lfilter(lambda nn_nh_c: nn_nh_c[2], replica_infos_selected)
		if not replica_infos_complete:
			return _empty_with_warning('is not completely available at the selected locations!\n' +
				'Available locations: %s', str.join(', ', self._iter_formatted_locations(replica_infos)))
		return list(_expanded_replica_locations(replica_infos_complete))
Example #47
0
	def show_report(self, job_db, jobnum_list):
		state_map = dict(self._state_map)

		def _transform(data, label, level):
			if None in data:
				total = data.pop(None)
				if len(data) > 1:
					for result in self._get_entry(state_map, total, ['Total']):
						yield result
					yield '='
			for idx, entry in enumerate(sorted(data)):
				if level == 1:
					for result in self._get_entry(state_map, data[entry], [entry] + label):
						yield result
				else:
					for result in _transform(data[entry], [entry] + label, level - 1):
						yield result
				if idx != len(data) - 1:
					yield '-'
		stats = self._get_hierachical_stats_dict(job_db, jobnum_list)
		displace_states_list = lmap(itemgetter(1), self._state_map)
		header = [('', 'Category')] + lzip(displace_states_list, displace_states_list)
		self._show_table(header, _transform(stats, [], len(self._idx_list)),
			align_str='l' + 'c' * len(state_map), fmt_dict={'': lambda x: str.join(' ', x)})
	def _get_entries_for_url(url):
		fi = _fast_search(block_old[DataProvider.FileList], itemgetter(DataProvider.URL), url)
		if not fi:
			raise Exception('url %s not found in block %s\n%s' % (url, block_old, partition))
		return fi[DataProvider.NEntries]
Example #49
0
		def search_url(url):
			return fast_search(oldBlock[DataProvider.FileList], itemgetter(DataProvider.URL), url)
Example #50
0
			def ds_generator():
				for ds in sorted(dsCache):
					if self._sortBlock:
						sort_inplace(dsCache[ds], key = itemgetter(DataProvider.BlockName))
					for block in dsCache[ds]:
						yield block