def __init__(self, config, name): DataTask.__init__(self, config, name) config.set('area files matcher mode', 'ShellStyleMatcher') # SCRAM settings scram_arch_default = unspecified scram_project = config.get_list('scram project', []) if scram_project: # manual scram setup if len(scram_project) != 2: raise ConfigError('%r needs exactly 2 arguments: <PROJECT> <VERSION>' % 'scram project') self._project_area = None self._project_area_selector_list = None self._scram_project = scram_project[0] self._scram_project_version = scram_project[1] # ensure project area is not used if 'project area' in config.get_option_list(): raise ConfigError('Cannot specify both %r and %r' % ('scram project', 'project area')) else: # scram setup used from project area self._project_area = config.get_dn('project area') self._always_matcher = Matcher.create_instance('AlwaysMatcher', config, ['']) self._project_area_base_fn = config.get_bool('area files basename', True, on_change=TriggerInit('sandbox')) self._project_area_matcher = config.get_matcher('area files', '-.* -config bin lib python module data *.xml *.sql *.db *.cfi *.cff *.py -CVS -work.* *.pcm', default_matcher='BlackWhiteMatcher', on_change=TriggerInit('sandbox')) self._log.info('Project area found in: %s', self._project_area) # try to determine scram settings from environment settings scram_path = os.path.join(self._project_area, '.SCRAM') scram_env = self._parse_scram_file(os.path.join(scram_path, 'Environment')) try: self._scram_project = scram_env['SCRAM_PROJECTNAME'] self._scram_project_version = scram_env['SCRAM_PROJECTVERSION'] except: raise ConfigError('Installed program in project area not recognized.') def filter_arch_dir(dn): return os.path.isdir(os.path.join(scram_path, dn)) for arch_dir in sorted(ifilter(filter_arch_dir, os.listdir(scram_path))): scram_arch_default = arch_dir self._scram_version = config.get('scram version', 'scramv1') self._scram_arch = config.get('scram arch', scram_arch_default) self._scram_req_list = [] if config.get_bool('scram arch requirements', True, on_change=None): self._scram_req_list.append((WMS.SOFTWARE, 'VO-cms-%s' % self._scram_arch)) if config.get_bool('scram project requirements', False, on_change=None): self._scram_req_list.append((WMS.SOFTWARE, 'VO-cms-%s' % self._scram_project)) if config.get_bool('scram project version requirements', False, on_change=None): self._scram_req_list.append((WMS.SOFTWARE, 'VO-cms-%s' % self._scram_project_version))
def getSBInFiles(self): files = DataTask.getSBInFiles(self) + self.prolog.getSBInFiles() + self.epilog.getSBInFiles() for cfgFile in self.configFiles: files.append(utils.Result(pathAbs = cfgFile, pathRel = os.path.basename(cfgFile))) if len(self.projectArea) and not self._projectAreaTarballSE: files.append(utils.Result(pathAbs = self._projectAreaTarball, pathRel = os.path.basename(self._projectAreaTarball))) return files + [utils.Result(pathAbs = utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms'), pathRel = 'gc-run.cmssw.sh')]
def getVarNames(self): result = DataTask.getVarNames(self) if self.dataSplitter == None: result.append('MAX_EVENTS') if self.selectedLumis: result.append('LUMI_RANGE') return result
def getJobConfig(self, jobNum): data = DataTask.getJobConfig(self, jobNum) if self.dataSplitter == None: data['MAX_EVENTS'] = self.eventsPerJob if self.selectedLumis: data['LUMI_RANGE'] = self.getActiveLumiFilter(self.selectedLumis) return data
def get_job_dict(self, jobnum): job_env_dict = DataTask.get_job_dict(self, jobnum) job_env_dict['SCRAM_VERSION'] = self._scram_version job_env_dict['SCRAM_ARCH'] = self._scram_arch job_env_dict['SCRAM_PROJECTNAME'] = self._scram_project job_env_dict['SCRAM_PROJECTVERSION'] = self._scram_project_version return job_env_dict
def getTaskConfig(self): data = DataTask.getTaskConfig(self) data['SCRAM_VERSION'] = self._scramVersion data['SCRAM_ARCH'] = self._scramArch data['SCRAM_PROJECTNAME'] = self._scramProject data['SCRAM_PROJECTVERSION'] = self._scramProjectVersion return data
def __init__(self, config, name): config.set('se input timeout', '0:30') config.set('dataset provider', 'DBS3Provider') config.set('dataset splitter', 'EventBoundarySplitter') config.set('partition processor', 'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor') config.set('dataset processor', 'LumiDataProcessor', '+=') DataTask.__init__(self, config, name) self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')) # SCRAM settings self._configureSCRAMSettings(config) self.useReqs = config.getBool('software requirements', True, onChange = None) self._projectAreaTarballSE = config.getBool(['se project area', 'se runtime'], True) self._projectAreaTarball = config.getWorkPath('cmssw-project-area.tar.gz') # Information about search order for software environment self.searchLoc = self._getCMSSWPaths(config) # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.getPaths('executable', []) != []: raise ConfigError('Prefix executable and argument options with either prolog or epilog!') self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance # Check that for dataset jobs the necessary placeholders are in the config file if self.dataSplitter is None: self.eventsPerJob = config.get('events per job', '0') fragment = config.getPath('instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg = 'grid_control_cms')) self.configFiles = self._processConfigFiles(config, list(self._getConfigFiles(config)), fragment, autoPrepare = config.getBool('instrumentation', True), mustPrepare = (self.dataSplitter is not None)) # Create project area tarball if not os.path.exists(self._projectAreaTarball): config.setState(True, 'init', detail = 'sandbox') if config.getState('init', detail = 'sandbox'): if os.path.exists(self._projectAreaTarball): if not utils.getUserBool('CMSSW tarball already exists! Do you want to regenerate it?', True): return # Generate CMSSW tarball if self.projectArea: utils.genTarball(self._projectAreaTarball, utils.matchFiles(self.projectArea, self.pattern)) if self._projectAreaTarballSE: config.setState(True, 'init', detail = 'storage')
def __init__(self, config, name): DataTask.__init__(self, config, name) # SCRAM settings scramArchDefault = noDefault scramProject = config.getList('scram project', []) if scramProject: # manual scram setup if len(scramProject) != 2: raise ConfigError('%r needs exactly 2 arguments: <PROJECT> <VERSION>' % 'scram project') self._projectArea = None self._projectAreaPattern = None self._scramProject = scramProject[0] self._scramProjectVersion = scramProject[1] # ensure project area is not used if 'project area' in config.getOptions(): raise ConfigError('Cannot specify both %r and %r' % ('scram project', 'project area')) else: # scram setup used from project area self._projectArea = config.getPath('project area') self._projectAreaPattern = config.getList('area files', ['-.*', '-config', 'bin', 'lib', 'python', 'module', '*/data', '*.xml', '*.sql', '*.db', '*.cf[if]', '*.py', '-*/.git', '-*/.svn', '-*/CVS', '-*/work.*']) logging.getLogger('user').info('Project area found in: %s', self._projectArea) # try to determine scram settings from environment settings scramPath = os.path.join(self._projectArea, '.SCRAM') scramEnv = self._parse_scram_file(os.path.join(scramPath, 'Environment')) try: self._scramProject = scramEnv['SCRAM_PROJECTNAME'] self._scramProjectVersion = scramEnv['SCRAM_PROJECTVERSION'] except: raise ConfigError('Installed program in project area not recognized.') for arch_dir in sorted(ifilter(lambda dn: os.path.isdir(os.path.join(scramPath, dn)), os.listdir(scramPath))): scramArchDefault = arch_dir self._scramVersion = config.get('scram version', 'scramv1') self._scramArch = config.get('scram arch', scramArchDefault) self._scramReqs = [] if config.getBool('scram arch requirements', True, onChange = None): self._scramReqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self._scramArch)) if config.getBool('scram project requirements', False, onChange = None): self._scramReqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self._scramProject)) if config.getBool('scram project version requirements', False, onChange = None): self._scramReqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self._scramProjectVersion))
def getSubmitInfo(self, jobNum): result = DataTask.getSubmitInfo(self, jobNum) result.update({ 'application': self.scramEnv['SCRAM_PROJECTVERSION'], 'exe': 'cmsRun' }) if self.dataSplitter is None: result.update({'nevtJob': self.eventsPerJob}) return result
def getTaskConfig(self): data = DataTask.getTaskConfig(self) data.update(dict(self.searchLoc)) data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None) data['SCRAM_ARCH'] = self.scramArch data['SCRAM_VERSION'] = self.scramVersion data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION'] data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no') data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no') data['HAS_RUNTIME'] = utils.QM(len(self.projectArea), 'yes', 'no') data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles)) if self.prolog.isActive(): data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand() data['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles())) data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments() if self.epilog.isActive(): data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand() data['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles())) data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments() return data
def getJobArguments(self, jobNum): return DataTask.getJobArguments(self, jobNum) + ' ' + self._exeWrap.getArguments()
def getSBOutFiles(self): tmp = lmap(lambda s: s + utils.QM(self.gzipOut, '.gz', ''), ['job.stdout', 'job.stderr']) return DataTask.getSBOutFiles(self) + tmp
def getRequirements(self, jobNum): reqs = DataTask.getRequirements(self, jobNum) if self.useReqs: reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch)) return reqs
def get_sb_out_fn_list(self): job_out_fn_list = ['job.stdout', 'job.stderr'] if self._do_gzip_std_output: job_out_fn_list = lmap(lambda fn: fn + '.gz', job_out_fn_list) return DataTask.get_sb_out_fn_list(self) + job_out_fn_list
def getSEInFiles(self): files = DataTask.getSEInFiles(self) if len(self.projectArea) and self._projectAreaTarballSE: return files + [('CMSSW tarball', self._projectAreaTarball, self.taskID + '.tar.gz')] return files
def getDescription(self, jobNum): # (task name, job name, type) (taskName, jobName, jobType) = DataTask.getDescription(self, jobNum) return (taskName, jobName, QM(jobType, jobType, 'analysis'))
def getVarNames(self): result = DataTask.getVarNames(self) if self.dataSplitter is None: result.append('MAX_EVENTS') return result
def __init__(self, config, name): DataTask.__init__(self, config, name)
def getSBInFiles(self): return DataTask.getSBInFiles(self) + self._exeWrap.getSBInFiles()
def getJobArguments(self, jobNum): return DataTask.getJobArguments( self, jobNum) + ' ' + self._exeWrap.getArguments()
def __init__(self, config, name): DataTask.__init__(self, config, name) self._exeWrap = TaskExecutableWrapper(config)
def __init__(self, config, name): config.set('se input timeout', '0:30') config.set('dataset provider', 'DBS3Provider') config.set('dataset splitter', 'EventBoundarySplitter') config.set( 'partition processor', 'CMSPartitionProcessor LocationPartitionProcessor LumiPartitionProcessor' ) config.set('dataset processor', 'LumiDataProcessor', '+=') DataTask.__init__(self, config, name) self.updateErrorDict( utils.pathShare('gc-run.cmssw.sh', pkg='grid_control_cms')) # SCRAM settings self._configureSCRAMSettings(config) self.useReqs = config.getBool('software requirements', True, onChange=None) self._projectAreaTarballSE = config.getBool( ['se project area', 'se runtime'], True) self._projectAreaTarball = config.getWorkPath( 'cmssw-project-area.tar.gz') # Information about search order for software environment self.searchLoc = self._getCMSSWPaths(config) # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.getPaths('executable', []) != []: raise ConfigError( 'Prefix executable and argument options with either prolog or epilog!' ) self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance # Check that for dataset jobs the necessary placeholders are in the config file if self.dataSplitter is None: self.eventsPerJob = config.get('events per job', '0') fragment = config.getPath( 'instrumentation fragment', utils.pathShare('fragmentForCMSSW.py', pkg='grid_control_cms')) self.configFiles = self._processConfigFiles( config, list(self._getConfigFiles(config)), fragment, autoPrepare=config.getBool('instrumentation', True), mustPrepare=(self.dataSplitter is not None)) # Create project area tarball if not os.path.exists(self._projectAreaTarball): config.setState(True, 'init', detail='sandbox') if config.getState('init', detail='sandbox'): if os.path.exists(self._projectAreaTarball): if not utils.getUserBool( 'CMSSW tarball already exists! Do you want to regenerate it?', True): return # Generate CMSSW tarball if self.projectArea: utils.genTarball( self._projectAreaTarball, utils.matchFiles(self.projectArea, self.pattern)) if self._projectAreaTarballSE: config.setState(True, 'init', detail='storage')
def get_sb_in_fpi_list(self): return DataTask.get_sb_in_fpi_list( self) + self._exe.get_sb_in_fpi_list()
def get_job_arguments(self, jobnum): return DataTask.get_job_arguments( self, jobnum) + ' ' + self._exe.get_arguments()
def getJobConfig(self, jobNum): data = DataTask.getJobConfig(self, jobNum) if self.dataSplitter is None: data['MAX_EVENTS'] = self.eventsPerJob return data
def getDescription(self, jobNum): # (task name, job name, type) result = DataTask.getDescription(self, jobNum) if not result.jobType: result.jobType = 'analysis' return result
def __init__(self, config, name): config.set('se input timeout', '0:30', override = False) config.set('dataset provider', 'DBS3Provider', override = False) config.set('dataset splitter', 'EventBoundarySplitter', override = False) DataTask.__init__(self, config, name) self.errorDict.update(dict(self.updateErrorDict(utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')))) # SCRAM info scramProject = config.getList('scram project', []) if len(scramProject): self.projectArea = config.getPath('project area', '') if len(self.projectArea): raise ConfigError('Cannot specify both SCRAM project and project area') if len(scramProject) != 2: raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION') else: self.projectArea = config.getPath('project area') # This works in tandem with provider_dbsv2.py ! self.selectedLumis = parseLumiFilter(config.get('lumi filter', '')) self.useReqs = config.getBool('software requirements', True, onChange = None) self.seRuntime = config.getBool('se runtime', False) self.runtimePath = config.getWorkPath('runtime.tar.gz') if len(self.projectArea): defaultPattern = '-.* -config bin lib python module */data *.xml *.sql *.cf[if] *.py -*/.git -*/.svn -*/CVS -*/work.*' self.pattern = config.getList('area files', defaultPattern.split()) if os.path.exists(self.projectArea): utils.vprint('Project area found in: %s' % self.projectArea, -1) else: raise ConfigError('Specified config area %r does not exist!' % self.projectArea) scramPath = os.path.join(self.projectArea, '.SCRAM') # try to open it try: fp = open(os.path.join(scramPath, 'Environment'), 'r') self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str}) except: raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea) for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']: if key not in self.scramEnv: raise ConfigError('Installed program in project area not recognized.') archs = filter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath)) self.scramArch = config.get('scram arch', (archs + [noDefault])[0]) try: fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r') self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str})) except: raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch) else: self.scramEnv = { 'SCRAM_PROJECTNAME': scramProject[0], 'SCRAM_PROJECTVERSION': scramProject[1] } self.scramArch = config.get('scram arch') self.scramVersion = config.get('scram version', 'scramv1') if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW': raise ConfigError('Project area not a valid CMSSW project area.') # Information about search order for software environment self.searchLoc = [] if config.getState('sandbox'): userPath = config.get('cmssw dir', '') if userPath != '': self.searchLoc.append(('CMSSW_DIR_USER', userPath)) if self.scramEnv.get('RELEASETOP', None): projPath = os.path.normpath('%s/../../../../' % self.scramEnv['RELEASETOP']) self.searchLoc.append(('CMSSW_DIR_PRO', projPath)) if len(self.searchLoc): utils.vprint('Local jobs will try to use the CMSSW software located here:', -1) for i, loc in enumerate(self.searchLoc): key, value = loc utils.vprint(' %i) %s' % (i + 1, value), -1) # Prolog / Epilog script support - warn about old syntax self.prolog = TaskExecutableWrapper(config, 'prolog', '') self.epilog = TaskExecutableWrapper(config, 'epilog', '') if config.getPaths('executable', []) != []: raise ConfigError('Prefix executable and argument options with either prolog or epilog!') self.arguments = config.get('arguments', '') # Get cmssw config files and check their existance self.configFiles = [] cfgDefault = QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault) for cfgFile in config.getPaths('config file', cfgDefault, mustExist = False): newPath = config.getWorkPath(os.path.basename(cfgFile)) if not os.path.exists(newPath): if not os.path.exists(cfgFile): raise ConfigError('Config file %r not found.' % cfgFile) shutil.copyfile(cfgFile, newPath) self.configFiles.append(newPath) # Check that for dataset jobs the necessary placeholders are in the config file self.prepare = config.getBool('prepare config', False) fragment = config.getPath('instrumentation fragment', os.path.join('packages', 'grid_control_cms', 'share', 'fragmentForCMSSW.py')) if self.dataSplitter != None: if config.getState('sandbox'): if len(self.configFiles) > 0: self.instrumentCfgQueue(self.configFiles, fragment, mustPrepare = True) else: self.eventsPerJob = config.get('events per job', '0') if config.getState(detail = 'sandbox') and self.prepare: self.instrumentCfgQueue(self.configFiles, fragment) if not os.path.exists(config.getWorkPath('runtime.tar.gz')): config.setState(True, detail = 'sandbox') if config.getState(detail = 'sandbox'): if os.path.exists(config.getWorkPath('runtime.tar.gz')): if not utils.getUserBool('Runtime already exists! Do you want to regenerate CMSSW tarball?', True): return # Generate runtime tarball (and move to SE) if self.projectArea: utils.genTarball(config.getWorkPath('runtime.tar.gz'), utils.matchFiles(self.projectArea, self.pattern)) if self.seRuntime: config.setState(True, detail = 'storage')
def __init__(self, config, name): DataTask.__init__(self, config, name) self._exe = TaskExecutableWrapper(config)
def getDependencies(self): return DataTask.getDependencies(self) + ['cmssw']
def get_sb_in_fpi_list(self): return DataTask.get_sb_in_fpi_list(self) + self._exe.get_sb_in_fpi_list()
def get_dependency_list(self): return DataTask.get_dependency_list(self) + ['cmssw']
def getJobArguments(self, jobNum): return DataTask.getJobArguments(self, jobNum) + ' ' + self.arguments
def get_requirement_list(self, jobnum): # Get job requirements return DataTask.get_requirement_list(self, jobnum) + self._scram_req_list
def getSEInFiles(self): files = DataTask.getSEInFiles(self) if len(self.projectArea) and self.seRuntime: return files + [('CMSSW runtime', self.runtimePath, self.taskID + '.tar.gz')] return files
def getSBInFiles(self): files = DataTask.getSBInFiles(self) + self.configFiles + self.prolog.getSBInFiles() + self.epilog.getSBInFiles() if len(self.projectArea) and not self.seRuntime: files.append(self.runtimePath) return files + [utils.pathShare('gc-run.cmssw.sh', pkg = 'grid_control_cms')]
def getRequirements(self, jobNum): return DataTask.getRequirements(self, jobNum) + self._scramReqs
def getSBOutFiles(self): return DataTask.getSBOutFiles(self) + QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']
def getSBOutFiles(self): return DataTask.getSBOutFiles(self) + utils.QM( self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']
def get_job_arguments(self, jobnum): return DataTask.get_job_arguments(self, jobnum) + ' ' + self._exe.get_arguments()
def getSubmitInfo(self, jobNum): result = DataTask.getSubmitInfo(self, jobNum) result.update({'application': self.scramEnv['SCRAM_PROJECTVERSION'], 'exe': 'cmsRun'}) if self.dataSplitter == None: result.update({'nevtJob': self.eventsPerJob}) return result
def getRequirements(self, jobNum): reqs = DataTask.getRequirements(self, jobNum) if self.useReqs: reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramEnv['SCRAM_PROJECTVERSION'])) reqs.append((WMS.SOFTWARE, 'VO-cms-%s' % self.scramArch)) return reqs