def _resync_psrc(self): activity = Activity('Performing resync of datasource %r' % self.get_datasource_name()) # Get old and new dataset information provider_old = DataProvider.load_from_file(self._get_data_path('cache.dat')) block_list_old = provider_old.get_block_list_cached(show_stats=False) self._provider.clear_cache() block_list_new = self._provider.get_block_list_cached(show_stats=False) self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new) # Use old splitting information to synchronize with new dataset infos partition_len_old = self.get_parameter_len() partition_changes = self._resync_partitions( self._get_data_path('map-new.tar'), block_list_old, block_list_new) activity.finish() if partition_changes is not None: # Move current splitting to backup and use the new splitting from now on def _rename_with_backup(new, cur, old): if self._keep_old: os.rename(self._get_data_path(cur), self._get_data_path(old)) os.rename(self._get_data_path(new), self._get_data_path(cur)) _rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time()) _rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time()) self._set_reader(DataSplitter.load_partitions(self._get_data_path('map.tar'))) self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len) (pnum_list_redo, pnum_list_disable) = partition_changes return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)
def setup_dataset(config, dataset, repository): if dataset.lower() == 'true': logging.info('Registering dummy data provider data') def _create_partition(ds_name, nick, n_events, fn_list): return {DataSplitter.Dataset: ds_name, DataSplitter.Nickname: nick, DataSplitter.FileList: fn_list, DataSplitter.NEntries: n_events} reader = PartitionReader.create_instance('TrivialPartitionReader', [ _create_partition('ds1', 'data_1', 23, ['a', 'b']), _create_partition('ds1', 'data_1', 42, ['1']), _create_partition('ds2', 'data_2', 123, ['m', 'n']), _create_partition('ds2', 'data_3', 987, ['x', 'y', 'z']) ]) else: reader = DataSplitter.load_partitions(dataset) config = config.change_view(set_sections=None, default_on_change=None) ParameterSource.create_instance('BaseDataParameterSource', config, 'dataset', repository, reader)
def _main(): signal.signal(signal.SIGINT, handle_abort_interrupt) parser = ScriptOptions() parser.section('expr', 'Manipulate lumi filter expressions', '%s <lumi filter expression>') parser.add_bool('expr', 'G', 'gc', default=False, help='Output grid-control compatible lumi expression') parser.add_bool('expr', 'J', 'json', default=False, help='Output JSON file with lumi expression') parser.add_bool('expr', 'F', 'full', default=False, help='Output JSON file with full expression') parser.section('calc', 'Options which allow luminosity related calculations', '%s <config file> [<job selector>]') parser.add_text('calc', 'O', 'output-dir', default=None, help='Set output directory (default: work directory)') parser.add_bool('calc', 'g', 'job-gc', default=False, help='Output grid-control compatible lumi expression for processed lumi sections') parser.add_bool('calc', 'j', 'job-json', default=False, help='Output JSON file with processed lumi sections') parser.add_bool('calc', 'e', 'job-events', default=False, help='Get number of events processed') parser.add_bool('calc', 'p', 'parameterized', default=False, help='Use output file name to categorize output (useful for parameterized tasks)') parser.add_bool('calc', ' ', 'replace', default='job_%d_', help='Pattern to replace for parameterized jobs (default: job_%%d_') options = parser.script_parse() if options.opts.gc or options.opts.json or options.opts.full: if not options.args: options.parser.exit_with_usage(options.parser.usage('expr')) return convert_lumi_expr(options.opts, options.args) if options.opts.job_json or options.opts.job_gc or options.opts.job_events: if not options.args: options.parser.exit_with_usage(options.parser.usage('calc')) script_obj = get_script_object_cmdline(options.args, only_success=True) work_dn = script_obj.config.get_work_path() reader = None try: reader = DataSplitter.load_partitions(os.path.join(work_dn, 'datamap.tar')) except Exception: clear_current_exception() jobnum_list = sorted(script_obj.job_db.get_job_list(ClassSelector(JobClass.SUCCESS))) return lumi_calc(options.opts, work_dn, jobnum_list, reader)
def _init_reader(self): # look for aborted inits / resyncs - and try to restore old state if possible if self._exists_data_path('map.tar.resync') and self._exists_data_path('cache.dat.resync'): rename_file(self._get_data_path('cache.dat.resync'), self._get_data_path('cache.dat')) rename_file(self._get_data_path('map.tar.resync'), self._get_data_path('map.tar')) elif self._exists_data_path('map.tar.resync') or self._exists_data_path('cache.dat.resync'): raise DatasetError('Found broken dataset partition resync state in work directory') if self._exists_data_path('map.tar') and not self._exists_data_path('cache.dat'): raise DatasetError('Found broken dataset partition in work directory') elif not self._exists_data_path('map.tar'): # create initial partition map file if not self._exists_data_path('cache.dat'): provider = self._provider else: provider = DataProvider.load_from_file(self._get_data_path('cache.dat')) block_iter = DataProvider.save_to_file_iter(self._get_data_path('cache.dat.init'), provider.get_block_list_cached(show_stats=True)) partition_iter = self._splitter.split_partitions(block_iter) DataSplitter.save_partitions(self._get_data_path('map.tar.init'), partition_iter) rename_file(self._get_data_path('cache.dat.init'), self._get_data_path('cache.dat')) rename_file(self._get_data_path('map.tar.init'), self._get_data_path('map.tar')) return DataSplitter.load_partitions(self._get_data_path('map.tar'))
def setup_dataset(config, dataset, repository): if dataset.lower() == 'true': logging.info('Registering dummy data provider data') def _create_partition(ds_name, nick, n_events, fn_list): return { DataSplitter.Dataset: ds_name, DataSplitter.Nickname: nick, DataSplitter.FileList: fn_list, DataSplitter.NEntries: n_events } reader = PartitionReader.create_instance('TrivialPartitionReader', [ _create_partition('ds1', 'data_1', 23, ['a', 'b']), _create_partition('ds1', 'data_1', 42, ['1']), _create_partition('ds2', 'data_2', 123, ['m', 'n']), _create_partition('ds2', 'data_3', 987, ['x', 'y', 'z']) ]) else: reader = DataSplitter.load_partitions(dataset) config = config.change_view(set_sections=None, default_on_change=None) ParameterSource.create_instance('BaseDataParameterSource', config, 'dataset', repository, reader)
def _init_reader(self): # look for aborted inits / resyncs - and try to restore old state if possible if self._exists_data_path('map.tar.resync') and self._exists_data_path( 'cache.dat.resync'): rename_file(self._get_data_path('cache.dat.resync'), self._get_data_path('cache.dat')) rename_file(self._get_data_path('map.tar.resync'), self._get_data_path('map.tar')) elif self._exists_data_path( 'map.tar.resync') or self._exists_data_path( 'cache.dat.resync'): raise DatasetError( 'Found broken dataset partition resync state in work directory' ) if self._exists_data_path( 'map.tar') and not self._exists_data_path('cache.dat'): raise DatasetError( 'Found broken dataset partition in work directory') elif not self._exists_data_path('map.tar'): # create initial partition map file if not self._exists_data_path('cache.dat'): provider = self._provider else: provider = DataProvider.load_from_file( self._get_data_path('cache.dat')) block_iter = DataProvider.save_to_file_iter( self._get_data_path('cache.dat.init'), provider.get_block_list_cached(show_stats=True)) partition_iter = self._splitter.split_partitions(block_iter) DataSplitter.save_partitions(self._get_data_path('map.tar.init'), partition_iter) rename_file(self._get_data_path('cache.dat.init'), self._get_data_path('cache.dat')) rename_file(self._get_data_path('map.tar.init'), self._get_data_path('map.tar')) return DataSplitter.load_partitions(self._get_data_path('map.tar'))
def _resync_psrc(self): activity = Activity('Performing resync of datasource %r' % self.get_datasource_name()) # Get old and new dataset information provider_old = DataProvider.load_from_file( self._get_data_path('cache.dat')) block_list_old = provider_old.get_block_list_cached(show_stats=False) self._provider.clear_cache() block_list_new = self._provider.get_block_list_cached(show_stats=False) self._provider.save_to_file(self._get_data_path('cache-new.dat'), block_list_new) # Use old splitting information to synchronize with new dataset infos partition_len_old = self.get_parameter_len() partition_changes = self._resync_partitions( self._get_data_path('map-new.tar'), block_list_old, block_list_new) activity.finish() if partition_changes is not None: # Move current splitting to backup and use the new splitting from now on def _rename_with_backup(new, cur, old): if self._keep_old: os.rename(self._get_data_path(cur), self._get_data_path(old)) os.rename(self._get_data_path(new), self._get_data_path(cur)) _rename_with_backup('map-new.tar', 'map.tar', 'map-old-%d.tar' % time.time()) _rename_with_backup('cache-new.dat', 'cache.dat', 'cache-old-%d.dat' % time.time()) self._set_reader( DataSplitter.load_partitions(self._get_data_path('map.tar'))) self._log.debug('Dataset resync finished: %d -> %d partitions', partition_len_old, self._len) (pnum_list_redo, pnum_list_disable) = partition_changes return (set(pnum_list_redo), set(pnum_list_disable), partition_len_old != self._len)
def _main(): signal.signal(signal.SIGINT, handle_abort_interrupt) parser = ScriptOptions() parser.section('expr', 'Manipulate lumi filter expressions', '%s <lumi filter expression>') parser.add_bool('expr', 'G', 'gc', default=False, help='Output grid-control compatible lumi expression') parser.add_bool('expr', 'J', 'json', default=False, help='Output JSON file with lumi expression') parser.add_bool('expr', 'F', 'full', default=False, help='Output JSON file with full expression') parser.section('calc', 'Options which allow luminosity related calculations', '%s <config file> [<job selector>]') parser.add_text('calc', 'O', 'output-dir', default=None, help='Set output directory (default: work directory)') parser.add_bool( 'calc', 'g', 'job-gc', default=False, help= 'Output grid-control compatible lumi expression for processed lumi sections' ) parser.add_bool('calc', 'j', 'job-json', default=False, help='Output JSON file with processed lumi sections') parser.add_bool('calc', 'e', 'job-events', default=False, help='Get number of events processed') parser.add_bool( 'calc', 'p', 'parameterized', default=False, help= 'Use output file name to categorize output (useful for parameterized tasks)' ) parser.add_bool( 'calc', ' ', 'replace', default='job_%d_', help='Pattern to replace for parameterized jobs (default: job_%%d_') options = parser.script_parse() if options.opts.gc or options.opts.json or options.opts.full: if not options.args: options.parser.exit_with_usage(options.parser.usage('expr')) return convert_lumi_expr(options.opts, options.args) if options.opts.job_json or options.opts.job_gc or options.opts.job_events: if not options.args: options.parser.exit_with_usage(options.parser.usage('calc')) script_obj = get_script_object_cmdline(options.args, only_success=True) work_dn = script_obj.config.get_work_path() reader = None try: reader = DataSplitter.load_partitions( os.path.join(work_dn, 'datamap.tar')) except Exception: clear_current_exception() jobnum_list = sorted( script_obj.job_db.get_job_list(ClassSelector(JobClass.SUCCESS))) return lumi_calc(options.opts, work_dn, jobnum_list, reader)
def get_partition_reader(options): if len(options.args) != 1: options.parser.exit_with_usage(options.parser.usage('part')) return DataSplitter.load_partitions(options.args[0])
def get_partition_reader(options): if len(options.args) != 1: options.parser.exit_with_usage(options.parser.usage('part')) return DataSplitter.load_partitions(options.args[0])