Ejemplo n.º 1
0
def dataset_show_diff(options):
	if len(options.args) != 2:
		options.parser.exit_with_usage(options.parser.usage('data'))

	provider_a = DataProvider.load_from_file(options.args[0])
	provider_b = DataProvider.load_from_file(options.args[1])
	block_resync_tuple = DataProvider.resync_blocks(provider_a.get_block_list_cached(show_stats=False),
		provider_b.get_block_list_cached(show_stats=False))
	(block_list_added, block_list_missing, block_list_matching) = block_resync_tuple

	def _dataset_iter_matching_blocks():
		for (block_old, block_new, _, _) in block_list_matching:
			def _format_change(old, new):
				if old != new:
					return '%s -> %s' % (old, new)
				return old
			block_old[DataProvider.NFiles] = _format_change(len(block_old.get(DataProvider.FileList, [])),
				len(block_new.get(DataProvider.FileList, [])))
			block_old[DataProvider.NEntries] = _format_change(block_old[DataProvider.NEntries],
				block_new[DataProvider.NEntries])
			yield block_old

	header_list = [(DataProvider.Dataset, 'Dataset'), (DataProvider.BlockName, 'Block'),
		(DataProvider.NFiles, '#Files'), (DataProvider.NEntries, '#Entries')]
	if block_list_added:
		ConsoleTable.create(header_list, dataset_iter_blocks(block_list_added), title='Added blocks')
	if block_list_missing:
		ConsoleTable.create(header_list, dataset_iter_blocks(block_list_missing), title='Removed blocks')
	if block_list_matching:
		ConsoleTable.create(header_list, _dataset_iter_matching_blocks(), title='Matching blocks')
Ejemplo n.º 2
0
    def _display_setup(self, dataset_fn, head):
        if os.path.exists(dataset_fn):
            nick_name_set = set()
            for block in DataProvider.load_from_file(
                    dataset_fn).get_block_list_cached(show_stats=False):
                nick_name_set.add(block[DataProvider.Nickname])
            self._log.info('Mapping between nickname and other settings:')
            report = []

            def _get_dataset_lookup_psrc(psrc):
                is_lookup_cls = isinstance(
                    psrc,
                    ParameterSource.get_class('LookupBaseParameterSource'))
                return is_lookup_cls and ('DATASETNICK'
                                          in psrc.get_parameter_deps())

            ps_lookup = lfilter(_get_dataset_lookup_psrc,
                                self._source.get_used_psrc_list())
            for nick in sorted(nick_name_set):
                tmp = {'DATASETNICK': nick}
                for src in ps_lookup:
                    src.fill_parameter_content(None, tmp)
                tmp[1] = str.join(
                    ', ',
                    imap(os.path.basename,
                         self._nm_cfg.lookup(nick, '', is_selector=False)))
                tmp[2] = str_lumi_nice(
                    self._nm_lumi.lookup(nick, '', is_selector=False))
                report.append(tmp)
            ConsoleTable.create(head, report, 'cl')
Ejemplo n.º 3
0
	def _resync_psrc(self):
		activity = Activity('Performing resync of datasource %r' % self.get_datasource_name())
		# Get old and new dataset information
		provider_old = DataProvider.load_from_file(self._get_data_path('cache.dat'))
		block_list_old = provider_old.get_block_list_cached(show_stats=False)
		self._provider.clear_cache()
		block_list_new = self._provider.get_block_list_cached(show_stats=False)
		self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new)

		# Use old splitting information to synchronize with new dataset infos
		partition_len_old = self.get_parameter_len()
		partition_changes = self._resync_partitions(
			self._get_data_path('map-new.tar'), block_list_old, block_list_new)
		activity.finish()
		if partition_changes is not None:
			# Move current splitting to backup and use the new splitting from now on
			def _rename_with_backup(new, cur, old):
				if self._keep_old:
					os.rename(self._get_data_path(cur), self._get_data_path(old))
				os.rename(self._get_data_path(new), self._get_data_path(cur))
			_rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time())
			_rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time())
			self._set_reader(DataSplitter.load_partitions(self._get_data_path('map.tar')))
			self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len)
			(pnum_list_redo, pnum_list_disable) = partition_changes
			return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)
Ejemplo n.º 4
0
def dataset_show_diff(options):
    if len(options.args) != 2:
        options.parser.exit_with_usage(options.parser.usage('data'))

    provider_a = DataProvider.load_from_file(options.args[0])
    provider_b = DataProvider.load_from_file(options.args[1])
    block_resync_tuple = DataProvider.resync_blocks(
        provider_a.get_block_list_cached(show_stats=False),
        provider_b.get_block_list_cached(show_stats=False))
    (block_list_added, block_list_missing,
     block_list_matching) = block_resync_tuple

    def _dataset_iter_matching_blocks():
        for (block_old, block_new, _, _) in block_list_matching:

            def _format_change(old, new):
                if old != new:
                    return '%s -> %s' % (old, new)
                return old

            block_old[DataProvider.NFiles] = _format_change(
                len(block_old.get(DataProvider.FileList, [])),
                len(block_new.get(DataProvider.FileList, [])))
            block_old[DataProvider.NEntries] = _format_change(
                block_old[DataProvider.NEntries],
                block_new[DataProvider.NEntries])
            yield block_old

    header_list = [(DataProvider.Dataset, 'Dataset'),
                   (DataProvider.BlockName, 'Block'),
                   (DataProvider.NFiles, '#Files'),
                   (DataProvider.NEntries, '#Entries')]
    if block_list_added:
        ConsoleTable.create(header_list,
                            dataset_iter_blocks(block_list_added),
                            title='Added blocks')
    if block_list_missing:
        ConsoleTable.create(header_list,
                            dataset_iter_blocks(block_list_missing),
                            title='Removed blocks')
    if block_list_matching:
        ConsoleTable.create(header_list,
                            _dataset_iter_matching_blocks(),
                            title='Matching blocks')
Ejemplo n.º 5
0
def dataset_show_removed(options):
	if len(options.args) < 2:
		options.parser.exit_with_usage(options.parser.usage('data'))

	block_list_missing = []
	provider_old = DataProvider.load_from_file(options.args[0])
	for dataset_fn in options.args[1:]:
		provider_new = DataProvider.load_from_file(dataset_fn)
		block_resync_tuple = DataProvider.resync_blocks(
			provider_old.get_block_list_cached(show_stats=False),
			provider_new.get_block_list_cached(show_stats=False))
		for block in block_resync_tuple[1]:  # iterate missing block list
			tmp = dict(block)
			tmp[DataProvider.RemovedIn] = dataset_fn
			block_list_missing.append(tmp)
		provider_old = provider_new
	if block_list_missing:
		ConsoleTable.create([(DataProvider.Dataset, 'Dataset'), (DataProvider.BlockName, 'Block'),
			(DataProvider.NFiles, '#Files'), (DataProvider.NEntries, '#Entries'),
			(DataProvider.RemovedIn, 'Removed in file')],
			dataset_iter_blocks(block_list_missing), title='Removed blocks')
Ejemplo n.º 6
0
def dataset_show_removed(options):
    if len(options.args) < 2:
        options.parser.exit_with_usage(options.parser.usage('data'))

    block_list_missing = []
    provider_old = DataProvider.load_from_file(options.args[0])
    for dataset_fn in options.args[1:]:
        provider_new = DataProvider.load_from_file(dataset_fn)
        block_resync_tuple = DataProvider.resync_blocks(
            provider_old.get_block_list_cached(show_stats=False),
            provider_new.get_block_list_cached(show_stats=False))
        for block in block_resync_tuple[1]:  # iterate missing block list
            tmp = dict(block)
            tmp[DataProvider.RemovedIn] = dataset_fn
            block_list_missing.append(tmp)
        provider_old = provider_new
    if block_list_missing:
        ConsoleTable.create([(DataProvider.Dataset, 'Dataset'),
                             (DataProvider.BlockName, 'Block'),
                             (DataProvider.NFiles, '#Files'),
                             (DataProvider.NEntries, '#Entries'),
                             (DataProvider.RemovedIn, 'Removed in file')],
                            dataset_iter_blocks(block_list_missing),
                            title='Removed blocks')
Ejemplo n.º 7
0
	def _init_reader(self):
		# look for aborted inits / resyncs - and try to restore old state if possible
		if self._exists_data_path('map.tar.resync') and self._exists_data_path('cache.dat.resync'):
			rename_file(self._get_data_path('cache.dat.resync'), self._get_data_path('cache.dat'))
			rename_file(self._get_data_path('map.tar.resync'), self._get_data_path('map.tar'))
		elif self._exists_data_path('map.tar.resync') or self._exists_data_path('cache.dat.resync'):
			raise DatasetError('Found broken dataset partition resync state in work directory')

		if self._exists_data_path('map.tar') and not self._exists_data_path('cache.dat'):
			raise DatasetError('Found broken dataset partition in work directory')
		elif not self._exists_data_path('map.tar'):
			# create initial partition map file
			if not self._exists_data_path('cache.dat'):
				provider = self._provider
			else:
				provider = DataProvider.load_from_file(self._get_data_path('cache.dat'))
			block_iter = DataProvider.save_to_file_iter(self._get_data_path('cache.dat.init'),
				provider.get_block_list_cached(show_stats=True))
			partition_iter = self._splitter.split_partitions(block_iter)
			DataSplitter.save_partitions(self._get_data_path('map.tar.init'), partition_iter)
			rename_file(self._get_data_path('cache.dat.init'), self._get_data_path('cache.dat'))
			rename_file(self._get_data_path('map.tar.init'), self._get_data_path('map.tar'))
		return DataSplitter.load_partitions(self._get_data_path('map.tar'))
Ejemplo n.º 8
0
    def _init_reader(self):
        # look for aborted inits / resyncs - and try to restore old state if possible
        if self._exists_data_path('map.tar.resync') and self._exists_data_path(
                'cache.dat.resync'):
            rename_file(self._get_data_path('cache.dat.resync'),
                        self._get_data_path('cache.dat'))
            rename_file(self._get_data_path('map.tar.resync'),
                        self._get_data_path('map.tar'))
        elif self._exists_data_path(
                'map.tar.resync') or self._exists_data_path(
                    'cache.dat.resync'):
            raise DatasetError(
                'Found broken dataset partition resync state in work directory'
            )

        if self._exists_data_path(
                'map.tar') and not self._exists_data_path('cache.dat'):
            raise DatasetError(
                'Found broken dataset partition in work directory')
        elif not self._exists_data_path('map.tar'):
            # create initial partition map file
            if not self._exists_data_path('cache.dat'):
                provider = self._provider
            else:
                provider = DataProvider.load_from_file(
                    self._get_data_path('cache.dat'))
            block_iter = DataProvider.save_to_file_iter(
                self._get_data_path('cache.dat.init'),
                provider.get_block_list_cached(show_stats=True))
            partition_iter = self._splitter.split_partitions(block_iter)
            DataSplitter.save_partitions(self._get_data_path('map.tar.init'),
                                         partition_iter)
            rename_file(self._get_data_path('cache.dat.init'),
                        self._get_data_path('cache.dat'))
            rename_file(self._get_data_path('map.tar.init'),
                        self._get_data_path('map.tar'))
        return DataSplitter.load_partitions(self._get_data_path('map.tar'))
Ejemplo n.º 9
0
    def _resync_psrc(self):
        activity = Activity('Performing resync of datasource %r' %
                            self.get_datasource_name())
        # Get old and new dataset information
        provider_old = DataProvider.load_from_file(
            self._get_data_path('cache.dat'))
        block_list_old = provider_old.get_block_list_cached(show_stats=False)
        self._provider.clear_cache()
        block_list_new = self._provider.get_block_list_cached(show_stats=False)
        self._provider.save_to_file(self._get_data_path('cache-new.dat'),
                                    block_list_new)

        # Use old splitting information to synchronize with new dataset infos
        partition_len_old = self.get_parameter_len()
        partition_changes = self._resync_partitions(
            self._get_data_path('map-new.tar'), block_list_old, block_list_new)
        activity.finish()
        if partition_changes is not None:
            # Move current splitting to backup and use the new splitting from now on
            def _rename_with_backup(new, cur, old):
                if self._keep_old:
                    os.rename(self._get_data_path(cur),
                              self._get_data_path(old))
                os.rename(self._get_data_path(new), self._get_data_path(cur))

            _rename_with_backup('map-new.tar', 'map.tar',
                                'map-old-%d.tar' % time.time())
            _rename_with_backup('cache-new.dat', 'cache.dat',
                                'cache-old-%d.dat' % time.time())
            self._set_reader(
                DataSplitter.load_partitions(self._get_data_path('map.tar')))
            self._log.debug('Dataset resync finished: %d -> %d partitions',
                            partition_len_old, self._len)
            (pnum_list_redo, pnum_list_disable) = partition_changes
            return (set(pnum_list_redo), set(pnum_list_disable),
                    partition_len_old != self._len)