Python DataSplitter.load_partitions 예제들, grid_control.datasets.DataSplitter.load_partitions Python 예제들

예제 #1

0

파일 보기

파일: psource_data.py 프로젝트: grid-control/grid-control

	def _resync_psrc(self):
		activity = Activity('Performing resync of datasource %r' % self.get_datasource_name())
		# Get old and new dataset information
		provider_old = DataProvider.load_from_file(self._get_data_path('cache.dat'))
		block_list_old = provider_old.get_block_list_cached(show_stats=False)
		self._provider.clear_cache()
		block_list_new = self._provider.get_block_list_cached(show_stats=False)
		self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new)

		# Use old splitting information to synchronize with new dataset infos
		partition_len_old = self.get_parameter_len()
		partition_changes = self._resync_partitions(
			self._get_data_path('map-new.tar'), block_list_old, block_list_new)
		activity.finish()
		if partition_changes is not None:
			# Move current splitting to backup and use the new splitting from now on
			def _rename_with_backup(new, cur, old):
				if self._keep_old:
					os.rename(self._get_data_path(cur), self._get_data_path(old))
				os.rename(self._get_data_path(new), self._get_data_path(cur))
			_rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time())
			_rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time())
			self._set_reader(DataSplitter.load_partitions(self._get_data_path('map.tar')))
			self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len)
			(pnum_list_redo, pnum_list_disable) = partition_changes
			return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)

예제 #2

0

파일 보기

파일: parameter_info.py 프로젝트: grid-control/grid-control

def setup_dataset(config, dataset, repository):
	if dataset.lower() == 'true':
		logging.info('Registering dummy data provider data')

		def _create_partition(ds_name, nick, n_events, fn_list):
			return {DataSplitter.Dataset: ds_name, DataSplitter.Nickname: nick,
				DataSplitter.FileList: fn_list, DataSplitter.NEntries: n_events}
		reader = PartitionReader.create_instance('TrivialPartitionReader', [
			_create_partition('ds1', 'data_1', 23, ['a', 'b']),
			_create_partition('ds1', 'data_1', 42, ['1']),
			_create_partition('ds2', 'data_2', 123, ['m', 'n']),
			_create_partition('ds2', 'data_3', 987, ['x', 'y', 'z'])
		])
	else:
		reader = DataSplitter.load_partitions(dataset)
	config = config.change_view(set_sections=None, default_on_change=None)
	ParameterSource.create_instance('BaseDataParameterSource', config, 'dataset', repository, reader)

예제 #3

0

파일 보기

파일: report_lumi.py 프로젝트: grid-control/grid-control

def _main():
	signal.signal(signal.SIGINT, handle_abort_interrupt)

	parser = ScriptOptions()
	parser.section('expr', 'Manipulate lumi filter expressions', '%s <lumi filter expression>')
	parser.add_bool('expr', 'G', 'gc', default=False,
		help='Output grid-control compatible lumi expression')
	parser.add_bool('expr', 'J', 'json', default=False,
		help='Output JSON file with lumi expression')
	parser.add_bool('expr', 'F', 'full', default=False,
		help='Output JSON file with full expression')

	parser.section('calc', 'Options which allow luminosity related calculations',
		'%s <config file> [<job selector>]')
	parser.add_text('calc', 'O', 'output-dir', default=None,
		help='Set output directory (default: work directory)')
	parser.add_bool('calc', 'g', 'job-gc', default=False,
		help='Output grid-control compatible lumi expression for processed lumi sections')
	parser.add_bool('calc', 'j', 'job-json', default=False,
		help='Output JSON file with processed lumi sections')
	parser.add_bool('calc', 'e', 'job-events', default=False,
		help='Get number of events processed')
	parser.add_bool('calc', 'p', 'parameterized', default=False,
		help='Use output file name to categorize output (useful for parameterized tasks)')
	parser.add_bool('calc', ' ', 'replace', default='job_%d_',
		help='Pattern to replace for parameterized jobs (default: job_%%d_')
	options = parser.script_parse()

	if options.opts.gc or options.opts.json or options.opts.full:
		if not options.args:
			options.parser.exit_with_usage(options.parser.usage('expr'))
		return convert_lumi_expr(options.opts, options.args)

	if options.opts.job_json or options.opts.job_gc or options.opts.job_events:
		if not options.args:
			options.parser.exit_with_usage(options.parser.usage('calc'))
		script_obj = get_script_object_cmdline(options.args, only_success=True)
		work_dn = script_obj.config.get_work_path()
		reader = None
		try:
			reader = DataSplitter.load_partitions(os.path.join(work_dn, 'datamap.tar'))
		except Exception:
			clear_current_exception()
		jobnum_list = sorted(script_obj.job_db.get_job_list(ClassSelector(JobClass.SUCCESS)))
		return lumi_calc(options.opts, work_dn, jobnum_list, reader)

예제 #4

0

파일 보기

파일: psource_data.py 프로젝트: grid-control/grid-control

	def _init_reader(self):
		# look for aborted inits / resyncs - and try to restore old state if possible
		if self._exists_data_path('map.tar.resync') and self._exists_data_path('cache.dat.resync'):
			rename_file(self._get_data_path('cache.dat.resync'), self._get_data_path('cache.dat'))
			rename_file(self._get_data_path('map.tar.resync'), self._get_data_path('map.tar'))
		elif self._exists_data_path('map.tar.resync') or self._exists_data_path('cache.dat.resync'):
			raise DatasetError('Found broken dataset partition resync state in work directory')

		if self._exists_data_path('map.tar') and not self._exists_data_path('cache.dat'):
			raise DatasetError('Found broken dataset partition in work directory')
		elif not self._exists_data_path('map.tar'):
			# create initial partition map file
			if not self._exists_data_path('cache.dat'):
				provider = self._provider
			else:
				provider = DataProvider.load_from_file(self._get_data_path('cache.dat'))
			block_iter = DataProvider.save_to_file_iter(self._get_data_path('cache.dat.init'),
				provider.get_block_list_cached(show_stats=True))
			partition_iter = self._splitter.split_partitions(block_iter)
			DataSplitter.save_partitions(self._get_data_path('map.tar.init'), partition_iter)
			rename_file(self._get_data_path('cache.dat.init'), self._get_data_path('cache.dat'))
			rename_file(self._get_data_path('map.tar.init'), self._get_data_path('map.tar'))
		return DataSplitter.load_partitions(self._get_data_path('map.tar'))

예제 #5

0

파일 보기

def setup_dataset(config, dataset, repository):
    if dataset.lower() == 'true':
        logging.info('Registering dummy data provider data')

        def _create_partition(ds_name, nick, n_events, fn_list):
            return {
                DataSplitter.Dataset: ds_name,
                DataSplitter.Nickname: nick,
                DataSplitter.FileList: fn_list,
                DataSplitter.NEntries: n_events
            }

        reader = PartitionReader.create_instance('TrivialPartitionReader', [
            _create_partition('ds1', 'data_1', 23, ['a', 'b']),
            _create_partition('ds1', 'data_1', 42, ['1']),
            _create_partition('ds2', 'data_2', 123, ['m', 'n']),
            _create_partition('ds2', 'data_3', 987, ['x', 'y', 'z'])
        ])
    else:
        reader = DataSplitter.load_partitions(dataset)
    config = config.change_view(set_sections=None, default_on_change=None)
    ParameterSource.create_instance('BaseDataParameterSource', config,
                                    'dataset', repository, reader)

예제 #6

0

파일 보기

    def _init_reader(self):
        # look for aborted inits / resyncs - and try to restore old state if possible
        if self._exists_data_path('map.tar.resync') and self._exists_data_path(
                'cache.dat.resync'):
            rename_file(self._get_data_path('cache.dat.resync'),
                        self._get_data_path('cache.dat'))
            rename_file(self._get_data_path('map.tar.resync'),
                        self._get_data_path('map.tar'))
        elif self._exists_data_path(
                'map.tar.resync') or self._exists_data_path(
                    'cache.dat.resync'):
            raise DatasetError(
                'Found broken dataset partition resync state in work directory'
            )

        if self._exists_data_path(
                'map.tar') and not self._exists_data_path('cache.dat'):
            raise DatasetError(
                'Found broken dataset partition in work directory')
        elif not self._exists_data_path('map.tar'):
            # create initial partition map file
            if not self._exists_data_path('cache.dat'):
                provider = self._provider
            else:
                provider = DataProvider.load_from_file(
                    self._get_data_path('cache.dat'))
            block_iter = DataProvider.save_to_file_iter(
                self._get_data_path('cache.dat.init'),
                provider.get_block_list_cached(show_stats=True))
            partition_iter = self._splitter.split_partitions(block_iter)
            DataSplitter.save_partitions(self._get_data_path('map.tar.init'),
                                         partition_iter)
            rename_file(self._get_data_path('cache.dat.init'),
                        self._get_data_path('cache.dat'))
            rename_file(self._get_data_path('map.tar.init'),
                        self._get_data_path('map.tar'))
        return DataSplitter.load_partitions(self._get_data_path('map.tar'))

예제 #7

0

파일 보기

    def _resync_psrc(self):
        activity = Activity('Performing resync of datasource %r' %
                            self.get_datasource_name())
        # Get old and new dataset information
        provider_old = DataProvider.load_from_file(
            self._get_data_path('cache.dat'))
        block_list_old = provider_old.get_block_list_cached(show_stats=False)
        self._provider.clear_cache()
        block_list_new = self._provider.get_block_list_cached(show_stats=False)
        self._provider.save_to_file(self._get_data_path('cache-new.dat'),
                                    block_list_new)

        # Use old splitting information to synchronize with new dataset infos
        partition_len_old = self.get_parameter_len()
        partition_changes = self._resync_partitions(
            self._get_data_path('map-new.tar'), block_list_old, block_list_new)
        activity.finish()
        if partition_changes is not None:
            # Move current splitting to backup and use the new splitting from now on
            def _rename_with_backup(new, cur, old):
                if self._keep_old:
                    os.rename(self._get_data_path(cur),
                              self._get_data_path(old))
                os.rename(self._get_data_path(new), self._get_data_path(cur))

            _rename_with_backup('map-new.tar', 'map.tar',
                                'map-old-%d.tar' % time.time())
            _rename_with_backup('cache-new.dat', 'cache.dat',
                                'cache-old-%d.dat' % time.time())
            self._set_reader(
                DataSplitter.load_partitions(self._get_data_path('map.tar')))
            self._log.debug('Dataset resync finished: %d -> %d partitions',
                            partition_len_old, self._len)
            (pnum_list_redo, pnum_list_disable) = partition_changes
            return (set(pnum_list_redo), set(pnum_list_disable),
                    partition_len_old != self._len)

예제 #8

0

파일 보기

def _main():
    signal.signal(signal.SIGINT, handle_abort_interrupt)

    parser = ScriptOptions()
    parser.section('expr', 'Manipulate lumi filter expressions',
                   '%s <lumi filter expression>')
    parser.add_bool('expr',
                    'G',
                    'gc',
                    default=False,
                    help='Output grid-control compatible lumi expression')
    parser.add_bool('expr',
                    'J',
                    'json',
                    default=False,
                    help='Output JSON file with lumi expression')
    parser.add_bool('expr',
                    'F',
                    'full',
                    default=False,
                    help='Output JSON file with full expression')

    parser.section('calc',
                   'Options which allow luminosity related calculations',
                   '%s <config file> [<job selector>]')
    parser.add_text('calc',
                    'O',
                    'output-dir',
                    default=None,
                    help='Set output directory (default: work directory)')
    parser.add_bool(
        'calc',
        'g',
        'job-gc',
        default=False,
        help=
        'Output grid-control compatible lumi expression for processed lumi sections'
    )
    parser.add_bool('calc',
                    'j',
                    'job-json',
                    default=False,
                    help='Output JSON file with processed lumi sections')
    parser.add_bool('calc',
                    'e',
                    'job-events',
                    default=False,
                    help='Get number of events processed')
    parser.add_bool(
        'calc',
        'p',
        'parameterized',
        default=False,
        help=
        'Use output file name to categorize output (useful for parameterized tasks)'
    )
    parser.add_bool(
        'calc',
        ' ',
        'replace',
        default='job_%d_',
        help='Pattern to replace for parameterized jobs (default: job_%%d_')
    options = parser.script_parse()

    if options.opts.gc or options.opts.json or options.opts.full:
        if not options.args:
            options.parser.exit_with_usage(options.parser.usage('expr'))
        return convert_lumi_expr(options.opts, options.args)

    if options.opts.job_json or options.opts.job_gc or options.opts.job_events:
        if not options.args:
            options.parser.exit_with_usage(options.parser.usage('calc'))
        script_obj = get_script_object_cmdline(options.args, only_success=True)
        work_dn = script_obj.config.get_work_path()
        reader = None
        try:
            reader = DataSplitter.load_partitions(
                os.path.join(work_dn, 'datamap.tar'))
        except Exception:
            clear_current_exception()
        jobnum_list = sorted(
            script_obj.job_db.get_job_list(ClassSelector(JobClass.SUCCESS)))
        return lumi_calc(options.opts, work_dn, jobnum_list, reader)

예제 #9

0

파일 보기

파일: gc_multi_tool.py 프로젝트: jolange/grid-control

def get_partition_reader(options):
    if len(options.args) != 1:
        options.parser.exit_with_usage(options.parser.usage('part'))
    return DataSplitter.load_partitions(options.args[0])

예제 #10

0

파일 보기

파일: gc_multi_tool.py 프로젝트: grid-control/grid-control

def get_partition_reader(options):
	if len(options.args) != 1:
		options.parser.exit_with_usage(options.parser.usage('part'))
	return DataSplitter.load_partitions(options.args[0])