class UserTask(DataTask): alias_list = ['UserMod', 'user', 'script'] config_section_list = DataTask.config_section_list + [ 'UserMod', 'UserTask' ] def __init__(self, config, name): DataTask.__init__(self, config, name) self._exe = TaskExecutableWrapper(config) def get_command(self): return '(%s) > job.stdout 2> job.stderr' % self._exe.get_command() def get_job_arguments(self, jobnum): return DataTask.get_job_arguments( self, jobnum) + ' ' + self._exe.get_arguments() def get_sb_in_fpi_list(self): return DataTask.get_sb_in_fpi_list( self) + self._exe.get_sb_in_fpi_list() def get_sb_out_fn_list(self): job_out_fn_list = ['job.stdout', 'job.stderr'] if self._do_gzip_std_output: job_out_fn_list = lmap(lambda fn: fn + '.gz', job_out_fn_list) return DataTask.get_sb_out_fn_list(self) + job_out_fn_list
class UserTask(DataTask): alias_list = ['UserMod', 'user', 'script'] config_section_list = DataTask.config_section_list + ['UserMod', 'UserTask'] def __init__(self, config, name): DataTask.__init__(self, config, name) self._exe = TaskExecutableWrapper(config) def get_command(self): return '(%s) > job.stdout 2> job.stderr' % self._exe.get_command() def get_job_arguments(self, jobnum): return DataTask.get_job_arguments(self, jobnum) + ' ' + self._exe.get_arguments() def get_sb_in_fpi_list(self): return DataTask.get_sb_in_fpi_list(self) + self._exe.get_sb_in_fpi_list() def get_sb_out_fn_list(self): job_out_fn_list = ['job.stdout', 'job.stderr'] if self._do_gzip_std_output: job_out_fn_list = lmap(lambda fn: fn + '.gz', job_out_fn_list) return DataTask.get_sb_out_fn_list(self) + job_out_fn_list
class CMSSW(SCRAMTask): alias_list = [''] config_section_list = SCRAMTask.config_section_list + ['CMSSW'] def __init__(self, config, name): config.set('se input timeout', '0:30') config.set('application', 'cmsRun', section='dashboard') config.set('dataset provider', 'DBS3Provider') config.set('dataset splitter', 'EventBoundarySplitter') config.set('dataset processor', 'LumiDataProcessor', '+=') config.set('partition processor', 'BasicPartitionProcessor', '-=') config.set( 'partition processor', 'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor', '+=') self._needed_vn_set = set() SCRAMTask.__init__(self, config, name) self._uii = UserInputInterface() # Setup file path informations self._cmsrun_output_files = ['cmssw.dbs.tar.gz'] if self._do_gzip_std_output: self._cmsrun_output_files.append('cmssw.log.gz') self._script_fpi = Result(path_rel='gc-run.cmssw.sh', path_abs=get_path_share( 'gc-run.cmssw.sh', pkg='grid_control_cms')) if self._scram_project != 'CMSSW': raise ConfigError('Project area contains no CMSSW project') self._old_release_top = None if self._project_area: scram_arch_env_path = os.path.join(self._project_area, '.SCRAM', self._scram_arch, 'Environment') self._old_release_top = self._parse_scram_file( scram_arch_env_path).get('RELEASETOP') self._update_map_error_code2msg( get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms')) self._project_area_tarball_on_se = config.get_bool( ['se runtime', 'se project area'], True) self._project_area_tarball = config.get_work_path( 'cmssw-project-area.tar.gz') # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.get_fn_list('executable', []) != []: raise ConfigError( 'Prefix executable and argument options with either prolog or epilog!' ) self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance # Check that for dataset jobs the necessary placeholders are in the config file if not self._has_dataset: self._events_per_job = config.get('events per job', '0') # this can be a variable like @USER_EVENTS@! self._needed_vn_set.add('MAX_EVENTS') fragment = config.get_fn( 'instrumentation fragment', get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms')) self._config_fn_list = self._process_config_file_list( config, config.get_fn_list('config file', self._get_config_file_default()), fragment, auto_prepare=config.get_bool('instrumentation', True), must_prepare=self._has_dataset) # Create project area tarball if self._project_area and not os.path.exists( self._project_area_tarball): config.set_state(True, 'init', detail='sandbox') # Information about search order for software environment self._cmssw_search_dict = self._get_cmssw_path_list(config) if config.get_state('init', detail='sandbox'): msg = 'CMSSW tarball already exists! Do you want to regenerate it?' if os.path.exists( self._project_area_tarball) and not self._uii.prompt_bool( msg, True): return # Generate CMSSW tarball if self._project_area: create_tarball(_match_files( self._project_area, self._project_area_matcher, self._always_matcher.create_matcher(''), self._project_area_base_fn), name=self._project_area_tarball) if self._project_area_tarball_on_se: config.set_state(True, 'init', detail='storage') def get_command(self): return './gc-run.cmssw.sh $@' def get_job_arguments(self, jobnum): return SCRAMTask.get_job_arguments(self, jobnum) + ' ' + self.arguments def get_job_dict(self, jobnum): # Get job dependent environment variables job_env_dict = SCRAMTask.get_job_dict(self, jobnum) if not self._has_dataset: job_env_dict['MAX_EVENTS'] = self._events_per_job job_env_dict.update(dict(self._cmssw_search_dict)) if self._do_gzip_std_output: job_env_dict['GZIP_OUT'] = 'yes' if self._project_area_tarball_on_se: job_env_dict['SE_RUNTIME'] = 'yes' if self._project_area: job_env_dict['HAS_RUNTIME'] = 'yes' job_env_dict['CMSSW_EXEC'] = 'cmsRun' job_env_dict['CMSSW_CONFIG'] = str.join( ' ', imap(os.path.basename, self._config_fn_list)) job_env_dict['CMSSW_OLD_RELEASETOP'] = self._old_release_top if self.prolog.is_active(): job_env_dict['CMSSW_PROLOG_EXEC'] = self.prolog.get_command() job_env_dict['CMSSW_PROLOG_SB_IN_FILES'] = str.join( ' ', imap(lambda x: x.path_rel, self.prolog.get_sb_in_fpi_list())) job_env_dict['CMSSW_PROLOG_ARGS'] = self.prolog.get_arguments() if self.epilog.is_active(): job_env_dict['CMSSW_EPILOG_EXEC'] = self.epilog.get_command() job_env_dict['CMSSW_EPILOG_SB_IN_FILES'] = str.join( ' ', imap(lambda x: x.path_rel, self.epilog.get_sb_in_fpi_list())) job_env_dict['CMSSW_EPILOG_ARGS'] = self.epilog.get_arguments() return job_env_dict def get_sb_in_fpi_list(self): # Get files for input sandbox fpi_list = (SCRAMTask.get_sb_in_fpi_list(self) + self.prolog.get_sb_in_fpi_list() + self.epilog.get_sb_in_fpi_list()) for config_file in self._config_fn_list: fpi_list.append( Result(path_abs=config_file, path_rel=os.path.basename(config_file))) if self._project_area and not self._project_area_tarball_on_se: fpi_list.append( Result(path_abs=self._project_area_tarball, path_rel=os.path.basename(self._project_area_tarball))) return fpi_list + [self._script_fpi] def get_sb_out_fn_list(self): # Get files for output sandbox if not self._config_fn_list: return SCRAMTask.get_sb_out_fn_list(self) return SCRAMTask.get_sb_out_fn_list(self) + self._cmsrun_output_files def get_se_in_fn_list(self): # Get files to be transfered via SE (description, source, target) files = SCRAMTask.get_se_in_fn_list(self) if self._project_area and self._project_area_tarball_on_se: return files + [('CMSSW tarball', self._project_area_tarball, self._task_id + '.tar.gz')] return files def _config_find_uninitialized(self, config, config_file_list, auto_prepare, must_prepare): common_path = os.path.dirname(os.path.commonprefix(config_file_list)) config_file_list_todo = [] config_file_status_list = [] for cfg in config_file_list: cfg_new = config.get_work_path(os.path.basename(cfg)) cfg_new_exists = os.path.exists(cfg_new) if cfg_new_exists: is_instrumented = self._config_is_instrumented(cfg_new) do_copy = False else: is_instrumented = self._config_is_instrumented(cfg) do_copy = True do_prepare = (must_prepare or auto_prepare) and not is_instrumented do_copy = do_copy or do_prepare if do_copy: config_file_list_todo.append((cfg, cfg_new, do_prepare)) config_file_status_list.append({ 1: cfg.split(common_path, 1)[1].lstrip('/'), 2: cfg_new_exists, 3: is_instrumented, 4: do_prepare }) if config_file_status_list: config_file_status_header = [(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')] ConsoleTable.create(config_file_status_header, config_file_status_list, 'lccc') return config_file_list_todo def _config_is_instrumented(self, fn): cfg = SafeFile(fn).read_close() for tag in self._needed_vn_set: if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg): return False return True def _config_store_backup(self, source, target, fragment_path=None): content = SafeFile(source).read_close() if fragment_path: self._log.info('Instrumenting... %s', os.path.basename(source)) content += SafeFile(fragment_path).read_close() SafeFile(target, 'w').write_close(content) def _create_datasource(self, config, name, psrc_repository, psrc_list): psrc_data = SCRAMTask._create_datasource(self, config, name, psrc_repository, psrc_list) if psrc_data is not None: self._needed_vn_set.update(psrc_data.get_needed_dataset_keys()) return psrc_data def _get_cmssw_path_list(self, config): result = [] path_cmssw_user = config.get(['cmssw dir', 'vo software dir'], '') if path_cmssw_user: path_cmssw_local = os.path.abspath(clean_path(path_cmssw_user)) if os.path.exists(path_cmssw_local): path_cmssw_user = path_cmssw_local if path_cmssw_user: result.append(('CMSSW_DIR_USER', path_cmssw_user)) if self._old_release_top: path_scram_project = os.path.normpath('%s/../../../../' % self._old_release_top) result.append(('CMSSW_DIR_PRO', path_scram_project)) self._log.info( 'Local jobs will try to use the CMSSW software located here:') for idx, loc in enumerate(result): self._log.info(' %i) %s', idx + 1, loc[1]) if result: self._log.info('') return result def _get_config_file_default(self): if self.prolog.is_active() or self.epilog.is_active(): return [] return unspecified def _get_var_name_list(self): result = SCRAMTask._get_var_name_list(self) if not self._has_dataset: result.append('MAX_EVENTS') return result def _process_config_file_list(self, config, config_file_list, fragment_path, auto_prepare, must_prepare): # process list of uninitialized config files iter_uninitialized_config_files = self._config_find_uninitialized( config, config_file_list, auto_prepare, must_prepare) for (cfg, cfg_new, do_prepare) in iter_uninitialized_config_files: ask_user_msg = 'Do you want to prepare %s for running over the dataset?' % cfg if do_prepare and (auto_prepare or self._uii.prompt_bool(ask_user_msg, True)): self._config_store_backup(cfg, cfg_new, fragment_path) else: self._config_store_backup(cfg, cfg_new) result = [] for cfg in config_file_list: cfg_new = config.get_work_path(os.path.basename(cfg)) if not os.path.exists(cfg_new): raise ConfigError( 'Config file %r was not copied to the work directory!' % cfg) is_instrumented = self._config_is_instrumented(cfg_new) if must_prepare and not is_instrumented: raise ConfigError( 'Config file %r must use %s to work properly!' % (cfg, str.join( ', ', imap(lambda x: '@%s@' % x, sorted( self._needed_vn_set))))) if auto_prepare and not is_instrumented: self._log.warning('Config file %r was not instrumented!', cfg) result.append(cfg_new) return result
class CMSSW(SCRAMTask): alias_list = [''] config_section_list = SCRAMTask.config_section_list + ['CMSSW'] def __init__(self, config, name): config.set('se input timeout', '0:30') config.set('application', 'cmsRun', section='dashboard') config.set('dataset provider', 'DBS3Provider') config.set('dataset splitter', 'EventBoundarySplitter') config.set('dataset processor', 'LumiDataProcessor', '+=') config.set('partition processor', 'BasicPartitionProcessor', '-=') config.set('partition processor', 'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor', '+=') self._needed_vn_set = set() SCRAMTask.__init__(self, config, name) self._uii = UserInputInterface() # Setup file path informations self._cmsrun_output_files = ['cmssw.dbs.tar.gz'] if self._do_gzip_std_output: self._cmsrun_output_files.append('cmssw.log.gz') self._script_fpi = Result(path_rel='gc-run.cmssw.sh', path_abs=get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms')) if self._scram_project != 'CMSSW': raise ConfigError('Project area contains no CMSSW project') self._old_release_top = None if self._project_area: scram_arch_env_path = os.path.join(self._project_area, '.SCRAM', self._scram_arch, 'Environment') self._old_release_top = self._parse_scram_file(scram_arch_env_path).get('RELEASETOP') self._update_map_error_code2msg( get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms')) self._project_area_tarball_on_se = config.get_bool(['se runtime', 'se project area'], True) self._project_area_tarball = config.get_work_path('cmssw-project-area.tar.gz') # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.get_fn_list('executable', []) != []: raise ConfigError('Prefix executable and argument options with either prolog or epilog!') self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance # Check that for dataset jobs the necessary placeholders are in the config file if not self._has_dataset: self._events_per_job = config.get('events per job', '0') # this can be a variable like @USER_EVENTS@! self._needed_vn_set.add('MAX_EVENTS') fragment = config.get_fn('instrumentation fragment', get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms')) self._config_fn_list = self._process_config_file_list(config, config.get_fn_list('config file', self._get_config_file_default()), fragment, auto_prepare=config.get_bool('instrumentation', True), must_prepare=self._has_dataset) # Create project area tarball if self._project_area and not os.path.exists(self._project_area_tarball): config.set_state(True, 'init', detail='sandbox') # Information about search order for software environment self._cmssw_search_dict = self._get_cmssw_path_list(config) if config.get_state('init', detail='sandbox'): msg = 'CMSSW tarball already exists! Do you want to regenerate it?' if os.path.exists(self._project_area_tarball) and not self._uii.prompt_bool(msg, True): return # Generate CMSSW tarball if self._project_area: create_tarball(_match_files(self._project_area, self._project_area_matcher, self._always_matcher.create_matcher(''), self._project_area_base_fn), name=self._project_area_tarball) if self._project_area_tarball_on_se: config.set_state(True, 'init', detail='storage') def get_command(self): return './gc-run.cmssw.sh $@' def get_job_arguments(self, jobnum): return SCRAMTask.get_job_arguments(self, jobnum) + ' ' + self.arguments def get_job_dict(self, jobnum): # Get job dependent environment variables job_env_dict = SCRAMTask.get_job_dict(self, jobnum) if not self._has_dataset: job_env_dict['MAX_EVENTS'] = self._events_per_job job_env_dict.update(dict(self._cmssw_search_dict)) if self._do_gzip_std_output: job_env_dict['GZIP_OUT'] = 'yes' if self._project_area_tarball_on_se: job_env_dict['SE_RUNTIME'] = 'yes' if self._project_area: job_env_dict['HAS_RUNTIME'] = 'yes' job_env_dict['CMSSW_EXEC'] = 'cmsRun' job_env_dict['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self._config_fn_list)) job_env_dict['CMSSW_OLD_RELEASETOP'] = self._old_release_top if self.prolog.is_active(): job_env_dict['CMSSW_PROLOG_EXEC'] = self.prolog.get_command() job_env_dict['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.path_rel, self.prolog.get_sb_in_fpi_list())) job_env_dict['CMSSW_PROLOG_ARGS'] = self.prolog.get_arguments() if self.epilog.is_active(): job_env_dict['CMSSW_EPILOG_EXEC'] = self.epilog.get_command() job_env_dict['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.path_rel, self.epilog.get_sb_in_fpi_list())) job_env_dict['CMSSW_EPILOG_ARGS'] = self.epilog.get_arguments() return job_env_dict def get_sb_in_fpi_list(self): # Get files for input sandbox fpi_list = (SCRAMTask.get_sb_in_fpi_list(self) + self.prolog.get_sb_in_fpi_list() + self.epilog.get_sb_in_fpi_list()) for config_file in self._config_fn_list: fpi_list.append(Result(path_abs=config_file, path_rel=os.path.basename(config_file))) if self._project_area and not self._project_area_tarball_on_se: fpi_list.append(Result(path_abs=self._project_area_tarball, path_rel=os.path.basename(self._project_area_tarball))) return fpi_list + [self._script_fpi] def get_sb_out_fn_list(self): # Get files for output sandbox if not self._config_fn_list: return SCRAMTask.get_sb_out_fn_list(self) return SCRAMTask.get_sb_out_fn_list(self) + self._cmsrun_output_files def get_se_in_fn_list(self): # Get files to be transfered via SE (description, source, target) files = SCRAMTask.get_se_in_fn_list(self) if self._project_area and self._project_area_tarball_on_se: return files + [('CMSSW tarball', self._project_area_tarball, self._task_id + '.tar.gz')] return files def _config_find_uninitialized(self, config, config_file_list, auto_prepare, must_prepare): common_path = os.path.dirname(os.path.commonprefix(config_file_list)) config_file_list_todo = [] config_file_status_list = [] for cfg in config_file_list: cfg_new = config.get_work_path(os.path.basename(cfg)) cfg_new_exists = os.path.exists(cfg_new) if cfg_new_exists: is_instrumented = self._config_is_instrumented(cfg_new) do_copy = False else: is_instrumented = self._config_is_instrumented(cfg) do_copy = True do_prepare = (must_prepare or auto_prepare) and not is_instrumented do_copy = do_copy or do_prepare if do_copy: config_file_list_todo.append((cfg, cfg_new, do_prepare)) config_file_status_list.append({1: cfg.split(common_path, 1)[1].lstrip('/'), 2: cfg_new_exists, 3: is_instrumented, 4: do_prepare}) if config_file_status_list: config_file_status_header = [(1, 'Config file'), (2, 'Work dir'), (3, 'Instrumented'), (4, 'Scheduled')] ConsoleTable.create(config_file_status_header, config_file_status_list, 'lccc') return config_file_list_todo def _config_is_instrumented(self, fn): cfg = SafeFile(fn).read_close() for tag in self._needed_vn_set: if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg): return False return True def _config_store_backup(self, source, target, fragment_path=None): content = SafeFile(source).read_close() if fragment_path: self._log.info('Instrumenting... %s', os.path.basename(source)) content += SafeFile(fragment_path).read_close() SafeFile(target, 'w').write_close(content) def _create_datasource(self, config, name, psrc_repository, psrc_list): psrc_data = SCRAMTask._create_datasource(self, config, name, psrc_repository, psrc_list) if psrc_data is not None: self._needed_vn_set.update(psrc_data.get_needed_dataset_keys()) return psrc_data def _get_cmssw_path_list(self, config): result = [] path_cmssw_user = config.get(['cmssw dir', 'vo software dir'], '') if path_cmssw_user: path_cmssw_local = os.path.abspath(clean_path(path_cmssw_user)) if os.path.exists(path_cmssw_local): path_cmssw_user = path_cmssw_local if path_cmssw_user: result.append(('CMSSW_DIR_USER', path_cmssw_user)) if self._old_release_top: path_scram_project = os.path.normpath('%s/../../../../' % self._old_release_top) result.append(('CMSSW_DIR_PRO', path_scram_project)) self._log.info('Local jobs will try to use the CMSSW software located here:') for idx, loc in enumerate(result): self._log.info(' %i) %s', idx + 1, loc[1]) if result: self._log.info('') return result def _get_config_file_default(self): if self.prolog.is_active() or self.epilog.is_active(): return [] return unspecified def _get_var_name_list(self): result = SCRAMTask._get_var_name_list(self) if not self._has_dataset: result.append('MAX_EVENTS') return result def _process_config_file_list(self, config, config_file_list, fragment_path, auto_prepare, must_prepare): # process list of uninitialized config files iter_uninitialized_config_files = self._config_find_uninitialized(config, config_file_list, auto_prepare, must_prepare) for (cfg, cfg_new, do_prepare) in iter_uninitialized_config_files: ask_user_msg = 'Do you want to prepare %s for running over the dataset?' % cfg if do_prepare and (auto_prepare or self._uii.prompt_bool(ask_user_msg, True)): self._config_store_backup(cfg, cfg_new, fragment_path) else: self._config_store_backup(cfg, cfg_new) result = [] for cfg in config_file_list: cfg_new = config.get_work_path(os.path.basename(cfg)) if not os.path.exists(cfg_new): raise ConfigError('Config file %r was not copied to the work directory!' % cfg) is_instrumented = self._config_is_instrumented(cfg_new) if must_prepare and not is_instrumented: raise ConfigError('Config file %r must use %s to work properly!' % (cfg, str.join(', ', imap(lambda x: '@%s@' % x, sorted(self._needed_vn_set))))) if auto_prepare and not is_instrumented: self._log.warning('Config file %r was not instrumented!', cfg) result.append(cfg_new) return result