def logging_create_handlers(config, logger_name): LogLevelEnum = makeEnum(lmap(lambda level: logging.getLevelName(level).upper(), irange(51))) logger = logging.getLogger(logger_name.lower()) # Set logging level logger.setLevel(config.getEnum(logger_name + ' level', LogLevelEnum, logger.level, onChange = None)) # Set propagate status logger.propagate = config.getBool(logger_name + ' propagate', bool(logger.propagate), onChange = None) # Setup handlers if logger_name + ' handler' in config.getOptions(): # remove any standard handlers: for handler in list(logger.handlers): logger.removeHandler(handler) handler_list = config.getList(logger_name + ' handler', [], onChange = None) for handler_str in set(handler_list): # add only unique output handlers if handler_str == 'stdout': handler = StdoutStreamHandler() elif handler_str == 'stderr': handler = StderrStreamHandler() elif handler_str == 'file': handler = logging.FileHandler(config.get(logger_name + ' file', onChange = None), 'w') elif handler_str == 'debug_file': handler = GCLogHandler(config.get(logger_name + ' debug file', onChange = None), 'w') else: raise Exception('Unknown handler %s for logger %s' % (handler_str, logger_name)) logger.addHandler(logging_configure_handler(config, logger_name, handler_str, handler))
def logging_create_handlers(config, logger_name): LogLevelEnum = makeEnum( lmap(lambda level: logging.getLevelName(level).upper(), irange(51))) logger = logging.getLogger(logger_name.lower()) # Set logging level logger.setLevel( config.getEnum(logger_name + ' level', LogLevelEnum, logger.level, onChange=None)) # Set propagate status logger.propagate = config.getBool(logger_name + ' propagate', bool(logger.propagate), onChange=None) # Setup handlers if logger_name + ' handler' in config.getOptions(): # remove any standard handlers: for handler in list(logger.handlers): logger.removeHandler(handler) handler_list = config.getList(logger_name + ' handler', [], onChange=None) for handler_str in set( handler_list): # add only unique output handlers if handler_str == 'stdout': handler = StdoutStreamHandler() elif handler_str == 'stderr': handler = StderrStreamHandler() elif handler_str == 'file': handler = logging.FileHandler( config.get(logger_name + ' file', onChange=None), 'w') elif handler_str == 'debug_file': handler = GCLogHandler( config.get(logger_name + ' debug file', onChange=None), 'w') else: raise Exception('Unknown handler %s for logger %s' % (handler_str, logger_name)) logger.addHandler( logging_configure_handler(config, logger_name, handler_str, handler))
# | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.utils.data_structures import makeEnum from grid_control.utils.parsing import strDict from hpfwk import APIError, NestedException, clear_current_exception from python_compat import ichain, ifilter, imap, lfilter, lmap, set, sorted class ConfigError(NestedException): pass # Placeholder to specify a non-existing default noDefault = makeEnum(['noDefault']) # return canonized section or option string def standardConfigForm(value): if value is not None: if not isinstance(value, list): value = [value] return lmap(lambda x: str(x).strip().lower(), value) def appendOption(option, suffix): if isinstance(option, (list, tuple)): return lmap(lambda x: appendOption(x, suffix), option) return option.rstrip() + ' ' + suffix
# | You may obtain a copy of the License at # | # | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. import time, logging from grid_control.utils.data_structures import makeEnum from hpfwk import AbstractError, NestedException, Plugin from python_compat import md5_hex, set ParameterInfo = makeEnum(['ACTIVE', 'HASH', 'REQS', 'FILES']) class ParameterError(NestedException): pass class ParameterMetadata(str): def __new__(cls, value, untracked = False): obj = str.__new__(cls, value) obj.untracked = untracked return obj def __repr__(self): if self.untracked: return "'!%s'" % self
def process(self, dn): return WMS.parseJobInfo(os.path.join(dn, 'job.info')) class FileInfoProcessor(JobInfoProcessor): def process(self, dn): jobInfo = JobInfoProcessor.process(self, dn) if jobInfo: jobData = jobInfo[2] result = {} # parse old job info data format for files oldFileFormat = [FileInfoProcessor.Hash, FileInfoProcessor.NameLocal, FileInfoProcessor.NameDest, FileInfoProcessor.Path] for (fileKey, fileData) in ifilter(lambda key_value: key_value[0].startswith('FILE'), jobData.items()): fileIdx = fileKey.replace('FILE', '').rjust(1, '0') result[int(fileIdx)] = dict(izip(oldFileFormat, fileData.strip('"').split(' '))) # parse new job info data format for (fileKey, fileData) in ifilter(lambda key_value: key_value[0].startswith('OUTPUT_FILE'), jobData.items()): (fileIdx, fileProperty) = fileKey.replace('OUTPUT_FILE_', '').split('_') if isinstance(fileData, str): fileData = fileData.strip('"') result.setdefault(int(fileIdx), {})[FileInfoProcessor.str2enum(fileProperty)] = fileData return list(result.values()) makeEnum(['Hash', 'NameLocal', 'NameDest', 'Path'], FileInfoProcessor) class TaskOutputProcessor(OutputProcessor): def __init__(self, task): self._task = task class SandboxProcessor(TaskOutputProcessor): def process(self, dn): return True
# | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.config import triggerResync from grid_control.datasets import DataProvider, DataSplitter, DatasetError from grid_control.datasets.splitter_basic import HybridSplitter from grid_control.utils import optSplit from grid_control.utils.data_structures import makeEnum from grid_control.utils.thread_tools import start_thread from grid_control.utils.webservice import JSONRestClient from grid_control_cms.lumi_tools import parseLumiFilter, strLumi from python_compat import sorted CMSLocationFormat = makeEnum(['hostname', 'siteDB', 'both']) PhedexT1Mode = makeEnum(['accept', 'disk', 'none']) # required format: <dataset path>[@<instance>][#<block>] class CMSBaseProvider(DataProvider): def __init__(self, config, datasetExpr, datasetNick=None, datasetID=0): changeTrigger = triggerResync(['datasets', 'parameters']) self._lumi_filter = config.getLookup('lumi filter', {}, parser=parseLumiFilter, strfun=strLumi, onChange=changeTrigger) if not self._lumi_filter.empty(): config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID)
# | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. import os, sys, time, logging, threading from grid_control.gc_exceptions import GCError, GCLogHandler from grid_control.utils.data_structures import UniqueList, makeEnum from grid_control.utils.file_objects import SafeFile, VirtualFile from grid_control.utils.thread_tools import GCLock from hpfwk import AbstractError, clear_current_exception, format_exception from python_compat import irange, lmap, set, sorted, tarfile LogLevelEnum = makeEnum(lmap(lambda level: logging.getLevelName(level).upper(), irange(51)), useHash=False) class LogEveryNsec(logging.Filter): def __init__(self, interval): logging.Filter.__init__(self) self._memory = {} self._interval = interval def filter(self, record): accept = (time.time() - self._memory.get(record.msg, 0) > self._interval) if accept: self._memory[record.msg] = time.time() return accept
try: import matplotlib import matplotlib.pyplot except ImportError: matplotlib = None import os, re, logging from grid_control.output_processor import JobInfoProcessor, JobResult from grid_control.report import Report from grid_control.utils.data_structures import makeEnum from python_compat import irange, izip JobResultEnum = makeEnum([ "TIMESTAMP_WRAPPER_START", "TIMESTAMP_DEPLOYMENT_START", "TIMESTAMP_DEPLOYMENT_DONE", "TIMESTAMP_SE_IN_START", "TIMESTAMP_SE_IN_DONE", "TIMESTAMP_CMSSW_CMSRUN1_START", "TIMESTAMP_CMSSW_CMSRUN1_DONE", "TIMESTAMP_EXECUTION_START", "TIMESTAMP_EXECUTION_DONE", "TIMESTAMP_SE_OUT_START", "TIMESTAMP_SE_OUT_DONE", "TIMESTAMP_WRAPPER_DONE", "FILESIZE_IN_TOTAL", "FILESIZE_OUT_TOTAL", "EVENT_COUNT" ]) def extractJobTiming(jInfo, task): jobResult = dict() jobNum = jInfo[JobResult.JOBNUM] # intialize all with None for key in JobResultEnum.enumNames: enumID = JobResultEnum.str2enum(key) jobResult[enumID] = None
if len(clsList) == 1: return clsList[0] elif not clsList: # requirePlugin == False return None if not option_compositor: option_compositor = appendOption(option, 'manager') return self.getPlugin(option_compositor, default_compositor, cls, tags, inherit, pargs=tuple([clsList] + list(pargs or [])), **kwargs) CommandType = makeEnum(['executable', 'command']) class SimpleConfigInterface(TypedConfigInterface): def __init__(self, configView): TypedConfigInterface.__init__(self, configView) self._interactive_enabled = None # delay config query def isInteractive(self, option, default): if isinstance(option, list): user_option_exists = any( imap(lambda opt: opt in self.getOptions(), option)) else: user_option_exists = option in self.getOptions() # global switch to enable / disable interactive option queries config_interactive = self.changeView(
except ImportError: matplotlib = None import os, re, logging from grid_control.output_processor import JobInfoProcessor, JobResult from grid_control.report import Report from grid_control.utils.data_structures import makeEnum from python_compat import irange, izip JobResultEnum = makeEnum([ "TIMESTAMP_WRAPPER_START", "TIMESTAMP_DEPLOYMENT_START", "TIMESTAMP_DEPLOYMENT_DONE", "TIMESTAMP_SE_IN_START", "TIMESTAMP_SE_IN_DONE", "TIMESTAMP_CMSSW_CMSRUN1_START", "TIMESTAMP_CMSSW_CMSRUN1_DONE", "TIMESTAMP_EXECUTION_START", "TIMESTAMP_EXECUTION_DONE", "TIMESTAMP_SE_OUT_START", "TIMESTAMP_SE_OUT_DONE", "TIMESTAMP_WRAPPER_DONE", "FILESIZE_IN_TOTAL", "FILESIZE_OUT_TOTAL", "EVENT_COUNT"]) def extractJobTiming(jInfo, task): jobResult = dict() jobNum = jInfo[JobResult.JOBNUM] # intialize all with None for key in JobResultEnum.enumNames:
# | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.config import triggerResync from grid_control.datasets import DataProcessor, DataProvider, DataSplitter, DatasetError, PartitionProcessor from grid_control.parameters import ParameterMetadata from grid_control.utils.data_structures import makeEnum from grid_control_cms.lumi_tools import filterLumiFilter, formatLumi, parseLumiFilter, selectLumi, selectRun, strLumi from python_compat import any, ichain, imap, izip, set LumiKeep = makeEnum(['RunLumi', 'Run', 'none']) LumiMode = makeEnum(['strict', 'weak']) def removeRunLumi(value, idxRuns, idxLumi): if (idxRuns is not None) and (idxLumi is not None): value.pop(max(idxRuns, idxLumi)) value.pop(min(idxRuns, idxLumi)) elif idxLumi is not None: value.pop(idxLumi) elif idxRuns is not None: value.pop(idxRuns) class LumiDataProcessor(DataProcessor): alias = ['lumi']
# | You may obtain a copy of the License at # | # | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. import time from grid_control.utils.data_structures import makeEnum from hpfwk import AbstractError, NestedException, Plugin from python_compat import set ParameterInfo = makeEnum(['ACTIVE', 'HASH', 'REQS', 'FILES']) class ParameterError(NestedException): pass class ParameterMetadata(str): def __new__(cls, value, untracked=False): obj = str.__new__(cls, value) obj.untracked = untracked return obj def __repr__(self): if self.untracked: return "'!%s'" % self
except Exception: from subprocess import getoutput from grid_control import utils from grid_control.backends.aspect_cancel import CancelAndPurgeJobs from grid_control.backends.aspect_status import CheckJobsMissingState from grid_control.backends.broker_base import Broker from grid_control.backends.condor_wms.processhandler import ProcessHandler from grid_control.backends.wms import BackendError, BasicWMS, WMS from grid_control.backends.wms_condor import Condor_CancelJobs, Condor_CheckJobs from grid_control.backends.wms_local import LocalPurgeJobs, SandboxHelper from grid_control.utils.activity import Activity from grid_control.utils.data_structures import makeEnum from python_compat import imap, irange, lmap, lzip, md5, set # if the ssh stuff proves too hack'y: http://www.lag.net/paramiko/ PoolType = makeEnum(['LOCAL','SPOOL','SSH','GSISSH']) class Condor(BasicWMS): configSections = BasicWMS.configSections + ['condor'] # dictionary mapping vanilla condor job status to GC job status # condor: U = unexpanded (never been run), H = on hold, R = running, I = idle (waiting for a machine to execute on), C = completed, and X = removed # 0 Unexpanded U -- 1 Idle I -- 2 Running R -- 3 Removed X -- 4 Completed C -- 5 Held H -- 6 Submission_err E # GC: 'INIT', 'SUBMITTED', 'DISABLED', 'READY', 'WAITING', 'QUEUED', 'ABORTED', 'RUNNING', 'CANCELLED', 'DONE', 'FAILED', 'SUCCESS' # __init__: start Condor based job management #>>config: Config class extended dictionary def __init__(self, config, name): self._sandbox_helper = SandboxHelper(config) BasicWMS.__init__(self, config, name, checkExecutor = CheckJobsMissingState(config, Condor_CheckJobs(config)),
# | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. import os, sys, time, logging, threading from grid_control.gc_exceptions import GCError, GCLogHandler from grid_control.utils.data_structures import UniqueList, makeEnum from grid_control.utils.file_objects import SafeFile, VirtualFile from grid_control.utils.thread_tools import GCLock from hpfwk import AbstractError, clear_current_exception, format_exception from python_compat import irange, lmap, set, sorted, tarfile LogLevelEnum = makeEnum(lmap(lambda level: logging.getLevelName(level).upper(), irange(51)), useHash = False) class LogEveryNsec(logging.Filter): def __init__(self, interval): logging.Filter.__init__(self) self._memory = {} self._interval = interval def filter(self, record): accept = (time.time() - self._memory.get(record.msg, 0) > self._interval) if accept: self._memory[record.msg] = time.time() return accept # In contrast to StreamHandler, this logging handler doesn't keep a stream copy
import os, re, glob, time, tempfile try: from commands import getoutput except Exception: from subprocess import getoutput from grid_control import utils from grid_control.backends.broker_base import Broker from grid_control.backends.condor_wms.processhandler import ProcessHandler from grid_control.backends.wms import BackendError, BasicWMS, WMS from grid_control.job_db import Job from grid_control.utils.data_structures import makeEnum from python_compat import ifilter, imap, irange, izip, lmap, lzip, md5, set, sorted # if the ssh stuff proves too hack'y: http://www.lag.net/paramiko/ PoolType = makeEnum(['LOCAL','SPOOL','SSH','GSISSH'], useHash = True) class Condor(BasicWMS): configSections = BasicWMS.configSections + ['condor'] # dictionary mapping vanilla condor job status to GC job status # condor: U = unexpanded (never been run), H = on hold, R = running, I = idle (waiting for a machine to execute on), C = completed, and X = removed # 0 Unexpanded U -- 1 Idle I -- 2 Running R -- 3 Removed X -- 4 Completed C -- 5 Held H -- 6 Submission_err E # GC: 'INIT', 'SUBMITTED', 'DISABLED', 'READY', 'WAITING', 'QUEUED', 'ABORTED', 'RUNNING', 'CANCELLED', 'DONE', 'FAILED', 'SUCCESS' _statusMap = { # dictionary mapping vanilla condor job status to GC job status '0' : Job.WAITING, # unexpanded (never been run) '1' : Job.SUBMITTED, # idle (waiting for a machine to execute on) '2' : Job.RUNNING, # running '3' : Job.ABORTED, # removed '4' : Job.DONE, # completed '5' : Job.WAITING, # DISABLED; on hold
# | limitations under the License. import os, gzip, logging from grid_control.utils import DictFormat from grid_control.utils.data_structures import makeEnum from hpfwk import AbstractError, NestedException, Plugin, get_current_exception from python_compat import bytes2str, ifilter, izip class OutputProcessor(Plugin): def process(self, dn): raise AbstractError class JobResultError(NestedException): pass JobResult = makeEnum(['JOBNUM', 'EXITCODE', 'RAW']) class JobInfoProcessor(OutputProcessor): def __init__(self): OutputProcessor.__init__(self) self._df = DictFormat() def process(self, dn): fn = os.path.join(dn, 'job.info') try: if not os.path.exists(fn): raise JobResultError('Job result file %r does not exist' % fn) try: info_content = open(fn, 'r').read() except Exception: raise JobResultError('Unable to read job result file %r' % fn)
def processBlock(self, block): if block[DataProvider.Locations] is not None: sites = self._locationfilter.filterList(block[DataProvider.Locations]) if (sites is not None) and (len(sites) == 0) and (len(block[DataProvider.FileList]) != 0): if not len(block[DataProvider.Locations]): self._log.warning('Block %s#%s is not available at any site!', block[DataProvider.Dataset], block[DataProvider.BlockName]) elif not len(sites): self._log.warning('Block %s#%s is not available at any selected site!', block[DataProvider.Dataset], block[DataProvider.BlockName]) block[DataProvider.Locations] = sites return block # Enum to specify how to react to multiple occurences of something DatasetUniqueMode = makeEnum(['warn', 'abort', 'skip', 'ignore', 'record'], useHash = True) class UniqueDataProcessor(DataProcessor): alias = ['unique'] def __init__(self, config): DataProcessor.__init__(self, config) self._checkURL = config.getEnum('dataset check unique url', DatasetUniqueMode, DatasetUniqueMode.abort, onChange = DataProcessor.triggerDataResync) self._checkBlock = config.getEnum('dataset check unique block', DatasetUniqueMode, DatasetUniqueMode.abort, onChange = DataProcessor.triggerDataResync) def enabled(self): return (self._checkURL == DatasetUniqueMode.ignore) and (self._checkBlock == DatasetUniqueMode.ignore) def process(self, blockIter):
# | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. import logging from grid_control.config import triggerResync from grid_control.datasets import DataProcessor, DataProvider, DataSplitter, DatasetError, PartitionProcessor from grid_control.parameters import ParameterMetadata from grid_control.utils.data_structures import makeEnum from grid_control.utils.gc_itertools import ichain from grid_control_cms.lumi_tools import filterLumiFilter, formatLumi, parseLumiFilter, selectLumi, strLumi from python_compat import imap, izip, set LumiKeep = makeEnum(['RunLumi', 'Run', 'none']) def removeRunLumi(value, idxRuns, idxLumi): if (idxRuns is not None) and (idxLumi is not None): value.pop(max(idxRuns, idxLumi)) value.pop(min(idxRuns, idxLumi)) elif idxLumi is not None: value.pop(idxLumi) elif idxRuns is not None: value.pop(idxRuns) class LumiDataProcessor(DataProcessor): alias = ['lumi'] def __init__(self, config):
def cancelJobs(self, ids): raise AbstractError # Return (jobNum, wmsId) for cancelled jobs def _createId(self, wmsIdRaw): return 'WMSID.%s.%s' % (self.wmsName, wmsIdRaw) def _splitId(self, wmsId): if wmsId.startswith('WMSID'): # local wms return tuple(wmsId.split('.', 2)[1:]) elif wmsId.startswith('http'): # legacy support return ('grid', wmsId) def _getRawIDs(self, ids): for (wmsId, _) in ids: yield self._splitId(wmsId)[1] makeEnum(['WALLTIME', 'CPUTIME', 'MEMORY', 'CPUS', 'BACKEND', 'SITES', 'QUEUES', 'SOFTWARE', 'STORAGE'], WMS) class InactiveWMS(WMS): alias = ['inactive'] def __init__(self, config, wmsName): WMS.__init__(self, config, wmsName) self._token = config.getCompositePlugin(['proxy', 'access token'], 'TrivialAccessToken', 'MultiAccessToken', cls = AccessToken, inherit = True, tags = [self]) def canSubmit(self, neededTime, canCurrentlySubmit): return True def getAccessToken(self, wmsId): return self._token
return lfilter(lambda p: not p.startswith('-'), selector.split()) def parseSelector(self, selector): return selector.split() def matcher(self, value, selector): result = 0 for idx, subselector in enumerate(selector.split()): if subselector.startswith('-') and (self._baseMatcher.matcher(value, subselector[1:]) > 0): result = -(idx + 1) elif self._baseMatcher.matcher(value, subselector) > 0: result = idx + 1 return result ListOrder = makeEnum(['source', 'matcher']) class ListFilter(Plugin): def __init__(self, selector, matcher, order): (self._matchFunction, self._positive, self._selector, self._order) = (None, None, None, order) if selector: self._selector = matcher.parseSelector(selector) self._matchFunction = matcher.matchWith(selector) self._positive = matcher.getPositive(selector) def getSelector(self): return self._selector def filterList(self, entries): if entries is None: return self._positive
# | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. import logging from grid_control.config.config_entry import ConfigEntry, ConfigError, noDefault, standardConfigForm from grid_control.utils.data_structures import makeEnum from grid_control.utils.gc_itertools import ichain from hpfwk import AbstractError, Plugin from python_compat import imap, lfilter, sorted selectorUnchanged = makeEnum(['selector_unchanged']) class ConfigView(Plugin): def __init__(self, name, parent = None): if not parent: parent = self self._parent = parent self.pathDict = {} self.pathDict.update(parent.pathDict) # inherit path dict from parent self.setConfigName(name) def setConfigName(self, name): self.configName = name self._log = logging.getLogger('config.%s' % name.lower()) def getView(self, setSections = selectorUnchanged, **kwargs):
def parseSelector(self, selector): return selector.split() def matcher(self, value, selector): result = 0 for idx, subselector in enumerate(selector.split()): if subselector.startswith('-') and (self._baseMatcher.matcher( value, subselector[1:]) > 0): result = -(idx + 1) elif self._baseMatcher.matcher(value, subselector) > 0: result = idx + 1 return result ListOrder = makeEnum(['source', 'matcher']) class ListFilter(Plugin): def __init__(self, selector, matcher, order, match_key, negate): (self._matchFunction, self._positive, self._selector, self._order, self._negate) = (None, None, None, order, negate) if selector: self._selector = matcher.parseSelector(selector) self._positive = matcher.getPositive(selector) matchObj = matcher.matchWith(selector) if match_key or negate: def match_fun(item): if match_key: item = match_key(item)
except Exception: from subprocess import getoutput from grid_control import utils from grid_control.backends.aspect_cancel import CancelAndPurgeJobs from grid_control.backends.aspect_status import CheckJobsMissingState from grid_control.backends.broker_base import Broker from grid_control.backends.condor_wms.processhandler import ProcessHandler from grid_control.backends.wms import BackendError, BasicWMS, WMS from grid_control.backends.wms_condor import Condor_CancelJobs, Condor_CheckJobs from grid_control.backends.wms_local import LocalPurgeJobs, SandboxHelper from grid_control.utils.activity import Activity from grid_control.utils.data_structures import makeEnum from python_compat import imap, irange, lmap, lzip, md5, set # if the ssh stuff proves too hack'y: http://www.lag.net/paramiko/ PoolType = makeEnum(['LOCAL', 'SPOOL', 'SSH', 'GSISSH']) class Condor(BasicWMS): configSections = BasicWMS.configSections + ['condor'] # dictionary mapping vanilla condor job status to GC job status # condor: U = unexpanded (never been run), H = on hold, R = running, I = idle (waiting for a machine to execute on), C = completed, and X = removed # 0 Unexpanded U -- 1 Idle I -- 2 Running R -- 3 Removed X -- 4 Completed C -- 5 Held H -- 6 Submission_err E # GC: 'INIT', 'SUBMITTED', 'DISABLED', 'READY', 'WAITING', 'QUEUED', 'ABORTED', 'RUNNING', 'CANCELLED', 'DONE', 'FAILED', 'SUCCESS' # __init__: start Condor based job management #>>config: Config class extended dictionary def __init__(self, config, name): self._sandbox_helper = SandboxHelper(config) BasicWMS.__init__(self,
from grid_control.utils.activity import Activity from grid_control.utils.data_structures import makeEnum from grid_control.utils.file_objects import SafeFile, VirtualFile from hpfwk import AbstractError, NestedException, clear_current_exception from python_compat import ichain, identity, imap, izip, lchain, lmap, set, sorted class BackendError(NestedException): pass BackendJobState = makeEnum([ 'ABORTED', # job was aborted by the WMS 'CANCELLED', # job was cancelled 'DONE', # job is finished 'QUEUED', # job is at WMS and is assigned a place to run 'RUNNING', # job is running 'UNKNOWN', # job status is unknown 'WAITING', # job is at WMS but was not yet assigned some place to run ]) class WMS(NamedPlugin): configSections = NamedPlugin.configSections + ['wms', 'backend'] tagName = 'wms' def __init__(self, config, name): name = (name or self.__class__.__name__).upper().replace('.', '_') NamedPlugin.__init__(self, config, name) self._wait_idle = config.getInt('wait idle', 60, onChange=None) self._wait_work = config.getInt('wait work', 10, onChange=None)
import os, re, glob, time, tempfile try: from commands import getoutput except Exception: from subprocess import getoutput from grid_control import utils from grid_control.backends.broker_base import Broker from grid_control.backends.condor_wms.processhandler import ProcessHandler from grid_control.backends.wms import BackendError, BasicWMS, WMS from grid_control.job_db import Job from grid_control.utils.data_structures import makeEnum from python_compat import ifilter, imap, irange, izip, lmap, lzip, md5, set, sorted # if the ssh stuff proves too hack'y: http://www.lag.net/paramiko/ PoolType = makeEnum(['LOCAL', 'SPOOL', 'SSH', 'GSISSH'], useHash=True) class Condor(BasicWMS): configSections = BasicWMS.configSections + ['condor'] # dictionary mapping vanilla condor job status to GC job status # condor: U = unexpanded (never been run), H = on hold, R = running, I = idle (waiting for a machine to execute on), C = completed, and X = removed # 0 Unexpanded U -- 1 Idle I -- 2 Running R -- 3 Removed X -- 4 Completed C -- 5 Held H -- 6 Submission_err E # GC: 'INIT', 'SUBMITTED', 'DISABLED', 'READY', 'WAITING', 'QUEUED', 'ABORTED', 'RUNNING', 'CANCELLED', 'DONE', 'FAILED', 'SUCCESS' _statusMap = { # dictionary mapping vanilla condor job status to GC job status '0': Job.WAITING, # unexpanded (never been run) '1': Job.SUBMITTED, # idle (waiting for a machine to execute on) '2': Job.RUNNING, # running '3': Job.ABORTED, # removed '4': Job.DONE, # completed '5': Job.WAITING, # DISABLED; on hold
# | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.config import triggerResync from grid_control.datasets import DataProvider, DataSplitter, DatasetError from grid_control.datasets.splitter_basic import HybridSplitter from grid_control.utils import optSplit from grid_control.utils.data_structures import makeEnum from grid_control.utils.thread_tools import start_thread from grid_control.utils.webservice import JSONRestClient from grid_control_cms.lumi_tools import parseLumiFilter, strLumi from grid_control_cms.sitedb import SiteDB from python_compat import itemgetter, lfilter, sorted CMSLocationFormat = makeEnum(['hostname', 'siteDB', 'both']) # required format: <dataset path>[@<instance>][#<block>] class CMSBaseProvider(DataProvider): def __init__(self, config, datasetExpr, datasetNick=None): self._changeTrigger = triggerResync(['datasets', 'parameters']) self._lumi_filter = config.getLookup('lumi filter', {}, parser=parseLumiFilter, strfun=strLumi, onChange=self._changeTrigger) if not self._lumi_filter.empty(): config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = config.getBool('lumi metadata',
# | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. import logging from grid_control import utils from grid_control.backends.backend_tools import BackendError, BackendExecutor from grid_control.job_db import Job from grid_control.utils.data_structures import makeEnum from hpfwk import AbstractError from python_compat import set CheckInfo = makeEnum(['WMSID', 'RAW_STATUS', 'QUEUE', 'WN', 'SITE']) CheckStatus = makeEnum(['OK', 'ERROR']) class CheckJobs(BackendExecutor): def execute(self, wmsIDs): # yields list of (wmsID, job_status, job_info) raise AbstractError def get_status(self): return CheckStatus.OK class CheckJobsMissingState(CheckJobs): def __init__(self, config, executor, missing_state = Job.DONE): CheckJobs.__init__(self, config) (self._executor, self._missing_state) = (executor, missing_state)
except Exception: return log.exception('Unable to read %r!', fn) if not info_content: return log.warning('%r is empty!', fn) try: data = utils.DictFormat().parse(info_content, keyParser={None: str}) return (data['JOBID'], data['EXITCODE'], data) except Exception: return log.warning('Unable to parse %r!', fn) parseJobInfo = staticmethod(parseJobInfo) makeEnum([ 'WALLTIME', 'CPUTIME', 'MEMORY', 'CPUS', 'BACKEND', 'SITES', 'QUEUES', 'SOFTWARE', 'STORAGE' ], WMS) class InactiveWMS(WMS): alias = ['inactive'] def __init__(self, config, wmsName): WMS.__init__(self, config, wmsName) self._token = config.getCompositePlugin(['access token', 'proxy'], 'TrivialAccessToken', 'MultiAccessToken', cls=AccessToken, inherit=True, tags=[self])
return self.dict.get(key, default) def update(self, state): self.state = state self.changed = time.time() self.history[self.attempt] = self.dict.get('dest', 'N/A') def assignId(self, wmsId): self.dict['legacy'] = None # Legacy support self.wmsId = wmsId self.attempt = self.attempt + 1 self.submitted = time.time() makeEnum(['INIT', 'SUBMITTED', 'DISABLED', 'READY', 'WAITING', 'QUEUED', 'ABORTED', 'RUNNING', 'CANCELLED', 'DONE', 'FAILED', 'SUCCESS'], Job, useHash = False) class JobClass(object): mkJobClass = lambda *fList: (reduce(operator.add, imap(lambda f: 1 << f, fList)), fList) ATWMS = mkJobClass(Job.SUBMITTED, Job.WAITING, Job.READY, Job.QUEUED) RUNNING = mkJobClass(Job.RUNNING) PROCESSING = mkJobClass(Job.SUBMITTED, Job.WAITING, Job.READY, Job.QUEUED, Job.RUNNING) READY = mkJobClass(Job.INIT, Job.FAILED, Job.ABORTED, Job.CANCELLED) DONE = mkJobClass(Job.DONE) SUCCESS = mkJobClass(Job.SUCCESS) DISABLED = mkJobClass(Job.DISABLED) ENDSTATE = mkJobClass(Job.SUCCESS, Job.DISABLED) PROCESSED = mkJobClass(Job.SUCCESS, Job.FAILED, Job.CANCELLED, Job.ABORTED)
def onMatchingFile(filesAdded, filesMissing, filesMatched, oldFile, newFile): filesMatched.append((oldFile, newFile)) (filesAdded, filesMissing, filesMatched) = \ utils.DiffLists(oldBlock[DataProvider.FileList], newBlock[DataProvider.FileList], keyFiles, onMatchingFile, isSorted = True) if filesAdded: # Create new block for added files in an existing block tmpBlock = copy.copy(newBlock) tmpBlock[DataProvider.FileList] = filesAdded tmpBlock[DataProvider.NEntries] = sum( imap(lambda x: x[DataProvider.NEntries], filesAdded)) blocksAdded.append(tmpBlock) blocksMatching.append( (oldBlock, newBlock, filesMissing, filesMatched)) return utils.DiffLists(oldBlocks, newBlocks, keyBlock, onMatchingBlock, isSorted=True) resyncSources = staticmethod(resyncSources) # To uncover errors, the enums of DataProvider / DataSplitter do *NOT* match type wise makeEnum([ 'NEntries', 'BlockName', 'Dataset', 'Locations', 'URL', 'FileList', 'Nickname', 'DatasetID', 'Metadata', 'Provider', 'ResyncInfo' ], DataProvider)
def get(self, key, default=None): return self.dict.get(key, default) def update(self, state): self.state = state self.changed = time.time() self.history[self.attempt] = self.dict.get('dest', 'N/A') def assignId(self, gcID): self.gcID = gcID self.attempt = self.attempt + 1 self.submitted = time.time() makeEnum([ 'INIT', 'SUBMITTED', 'DISABLED', 'READY', 'WAITING', 'QUEUED', 'ABORTED', 'RUNNING', 'CANCEL', 'UNKNOWN', 'CANCELLED', 'DONE', 'FAILED', 'SUCCESS' ], Job) class JobClassHolder(object): def __init__(self, *states): self.states = states class JobClass(object): ATWMS = JobClassHolder(Job.SUBMITTED, Job.WAITING, Job.READY, Job.QUEUED, Job.UNKNOWN) CANCEL = JobClassHolder(Job.CANCEL) DISABLED = JobClassHolder(Job.DISABLED) DONE = JobClassHolder(Job.DONE) ENDSTATE = JobClassHolder(Job.SUCCESS, Job.DISABLED)
from urllib import urlencode except Exception: from urllib.parse import urlencode class RestError(NestedException): pass class RestSession(Plugin): def __init__(self): pass def request(self, mode, url, headers, params = None, data = None, cert = None): raise AbstractError makeEnum(['GET', 'PUT', 'POST', 'DELETE'], RestSession) class RestClient(object): def __init__(self, cert = None, url = None, default_headers = None, process_result = None, process_data = None, session = None): self._log = logging.getLogger('webservice') (self._cert, self._url, self._headers) = (cert, url, default_headers) (self._process_result, self._process_data) = (process_result or identity, process_data or urlencode) if not session: try: self._session = RestSession.createInstance('RequestsSession') except Exception: # pulling in incompatible dependencies can cause many different types of exceptions self._session = RestSession.createInstance('Urllib2Session') def _request(self, mode, url, api, headers, params = None, data = None):
from grid_control.utils import DictFormat from grid_control.utils.data_structures import makeEnum from hpfwk import AbstractError, NestedException, Plugin from python_compat import ifilter, izip class OutputProcessor(Plugin): def process(self, dn): raise AbstractError class JobResultError(NestedException): pass JobResult = makeEnum(['JOBNUM', 'EXITCODE', 'RAW'], useHash=True) class JobInfoProcessor(OutputProcessor): def __init__(self): OutputProcessor.__init__(self) self._df = DictFormat() def process(self, dn): fn = os.path.join(dn, 'job.info') if not os.path.exists(fn): raise JobResultError('Job result file %r does not exist' % fn) try: info_content = open(fn, 'r').read() except Exception: raise JobResultError('Unable to read job result file %r' % fn)
pargs = None, pkwargs = None, **kwargs): clsList = [] for factory in self._getPluginFactories(option, default, cls, tags, inherit, requirePlugin, singlePlugin = False, desc = 'composite plugin', **kwargs): clsList.append(factory.getBoundInstance(*(pargs or ()), **(pkwargs or {}))) if len(clsList) == 1: return clsList[0] elif not clsList: # requirePlugin == False return None if not option_compositor: option_compositor = appendOption(option, 'manager') return self.getPlugin(option_compositor, default_compositor, cls, tags, inherit, pargs = tuple([clsList] + list(pargs or [])), **kwargs) CommandType = makeEnum(['executable', 'command']) class SimpleConfigInterface(TypedConfigInterface): def getCommand(self, option, default = noDefault, **kwargs): scriptType = self.getEnum(appendOption(option, 'type'), CommandType, CommandType.executable, **kwargs) if scriptType == CommandType.executable: return self.getPath(option, default, **kwargs) return os.path.expandvars(self.get(option, default, **kwargs)) def getLookup(self, option, default = noDefault, defaultMatcher = 'start', single = True, includeDefault = False, **kwargs): matcherArgs = {} if 'onChange' in kwargs: matcherArgs['onChange'] = kwargs['onChange'] matcherOpt = appendOption(option, 'matcher') matcherObj = self.getPlugin(matcherOpt, defaultMatcher, cls = Matcher, pargs = (matcherOpt,), **matcherArgs)
from grid_control.gc_plugin import NamedPlugin from grid_control.output_processor import JobResult from grid_control.utils.activity import Activity from grid_control.utils.data_structures import makeEnum from grid_control.utils.file_objects import SafeFile, VirtualFile from hpfwk import AbstractError, NestedException, clear_current_exception from python_compat import ichain, identity, imap, izip, lchain, lmap, set, sorted class BackendError(NestedException): pass BackendJobState = makeEnum([ 'ABORTED', # job was aborted by the WMS 'CANCELLED', # job was cancelled 'DONE', # job is finished 'QUEUED', # job is at WMS and is assigned a place to run 'RUNNING', # job is running 'UNKNOWN', # job status is unknown 'WAITING', # job is at WMS but was not yet assigned some place to run ]) class WMS(NamedPlugin): configSections = NamedPlugin.configSections + ['wms', 'backend'] tagName = 'wms' def __init__(self, config, name): name = (name or self.__class__.__name__).upper().replace('.', '_') NamedPlugin.__init__(self, config, name) self._wait_idle = config.getInt('wait idle', 60, onChange = None) self._wait_work = config.getInt('wait work', 10, onChange = None) self._job_parser = config.getPlugin('job parser', 'JobInfoProcessor',
# | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. import logging from grid_control.config.config_entry import ConfigEntry, ConfigError, noDefault, standardConfigForm from grid_control.utils.data_structures import makeEnum from grid_control.utils.gc_itertools import ichain from hpfwk import AbstractError, Plugin from python_compat import imap, lfilter, sorted selectorUnchanged = makeEnum(['selector_unchanged']) class ConfigView(Plugin): def __init__(self, name, parent=None): if not parent: parent = self self._parent = parent self.pathDict = {} self.pathDict.update(parent.pathDict) # inherit path dict from parent self.setConfigName(name) def setConfigName(self, name): self.configName = name self._log = logging.getLogger('config.%s' % name.lower())
def keyBlock(x): return (x[DataProvider.Dataset], x[DataProvider.BlockName]) sort_inplace(oldBlocks, key = keyBlock) sort_inplace(newBlocks, key = keyBlock) def onMatchingBlock(blocksAdded, blocksMissing, blocksMatching, oldBlock, newBlock): # Compare different files according to their name - NOT full content def keyFiles(x): return x[DataProvider.URL] sort_inplace(oldBlock[DataProvider.FileList], key = keyFiles) sort_inplace(newBlock[DataProvider.FileList], key = keyFiles) def onMatchingFile(filesAdded, filesMissing, filesMatched, oldFile, newFile): filesMatched.append((oldFile, newFile)) (filesAdded, filesMissing, filesMatched) = \ utils.DiffLists(oldBlock[DataProvider.FileList], newBlock[DataProvider.FileList], keyFiles, onMatchingFile, isSorted = True) if filesAdded: # Create new block for added files in an existing block tmpBlock = copy.copy(newBlock) tmpBlock[DataProvider.FileList] = filesAdded tmpBlock[DataProvider.NEntries] = sum(imap(lambda x: x[DataProvider.NEntries], filesAdded)) blocksAdded.append(tmpBlock) blocksMatching.append((oldBlock, newBlock, filesMissing, filesMatched)) return utils.DiffLists(oldBlocks, newBlocks, keyBlock, onMatchingBlock, isSorted = True) resyncSources = staticmethod(resyncSources) # To uncover errors, the enums of DataProvider / DataSplitter do *NOT* match type wise makeEnum(['NEntries', 'BlockName', 'Dataset', 'Locations', 'URL', 'FileList', 'Nickname', 'DatasetID', 'Metadata', 'Provider', 'ResyncInfo'], DataProvider)
# | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.config import triggerResync from grid_control.datasets import DataProvider, DataSplitter, DatasetError from grid_control.datasets.splitter_basic import HybridSplitter from grid_control.utils import optSplit from grid_control.utils.data_structures import makeEnum from grid_control.utils.thread_tools import start_thread from grid_control.utils.webservice import JSONRestClient from grid_control_cms.lumi_tools import parseLumiFilter, strLumi from grid_control_cms.sitedb import SiteDB from python_compat import itemgetter, lfilter, sorted CMSLocationFormat = makeEnum(['hostname', 'siteDB', 'both']) # required format: <dataset path>[@<instance>][#<block>] class CMSBaseProvider(DataProvider): def __init__(self, config, datasetExpr, datasetNick = None): self._changeTrigger = triggerResync(['datasets', 'parameters']) self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = self._changeTrigger) if not self._lumi_filter.empty(): config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = self._changeTrigger) config.set('phedex sites matcher mode', 'shell', '?=') # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont! self._phedexFilter = config.getFilter('phedex sites', '-* T1_*_Disk T2_* T3_*', defaultMatcher = 'blackwhite', defaultFilter = 'strict', onChange = self._changeTrigger)
# | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.config import triggerResync from grid_control.datasets import DataProvider, DataSplitter, DatasetError from grid_control.datasets.splitter_basic import HybridSplitter from grid_control.utils import optSplit from grid_control.utils.data_structures import makeEnum from grid_control.utils.thread_tools import start_thread from grid_control.utils.webservice import JSONRestClient from grid_control_cms.lumi_tools import parseLumiFilter, strLumi from python_compat import sorted CMSLocationFormat = makeEnum(['hostname', 'siteDB', 'both']) PhedexT1Mode = makeEnum(['accept', 'disk', 'none']) # required format: <dataset path>[@<instance>][#<block>] class CMSBaseProvider(DataProvider): def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0): changeTrigger = triggerResync(['datasets', 'parameters']) self._lumi_filter = config.getLookup('lumi filter', {}, parser = parseLumiFilter, strfun = strLumi, onChange = changeTrigger) if not self._lumi_filter.empty(): config.set('dataset processor', 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasetExpr, datasetNick, datasetID) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = config.getBool('lumi metadata', not self._lumi_filter.empty(), onChange = changeTrigger) # PhEDex blacklist: 'T1_DE_KIT', 'T1_US_FNAL' and '*_Disk' allow user jobs - other T1's dont! self._phedexFilter = config.getFilter('phedex sites', '-T3_US_FNALLPC', defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = changeTrigger)
# | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. import logging from grid_control.datasets import DataProcessor, DataProvider, DataSplitter, DatasetError, PartitionProcessor from grid_control.parameters import ParameterMetadata from grid_control.utils.data_structures import makeEnum from grid_control.utils.gc_itertools import ichain from grid_control_cms.lumi_tools import filterLumiFilter, formatLumi, parseLumiFilter, selectLumi, strLumi from python_compat import imap, izip, set LumiKeep = makeEnum(['RunLumi', 'Run', 'none']) def removeRunLumi(value, idxRuns, idxLumi): if (idxRuns is not None) and (idxLumi is not None): value.pop(max(idxRuns, idxLumi)) value.pop(min(idxRuns, idxLumi)) elif idxLumi is not None: value.pop(idxLumi) elif idxRuns is not None: value.pop(idxRuns) class LumiDataProcessor(DataProcessor): def __init__(self, config): DataProcessor.__init__(self, config)
# | # | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.datasets.dproc_base import DataProcessor from grid_control.datasets.provider_base import DataProvider, DatasetError from grid_control.utils.data_structures import makeEnum from python_compat import imap, md5_hex, set # Enum to specify how to react to multiple occurences of something DatasetUniqueMode = makeEnum(['warn', 'abort', 'skip', 'ignore', 'record']) DatasetCheckMode = makeEnum(['warn', 'abort', 'ignore']) class DataChecker(DataProcessor): def _handleError(self, msg, mode): if mode == DatasetCheckMode.warn: self._log.warning(msg) elif mode == DatasetCheckMode.abort: raise DatasetError(msg) class EntriesConsistencyDataProcessor(DataChecker): alias = ['consistency'] def __init__(self, config, onChange): DataChecker.__init__(self, config, onChange)
# | # | http://www.apache.org/licenses/LICENSE-2.0 # | # | Unless required by applicable law or agreed to in writing, software # | distributed under the License is distributed on an "AS IS" BASIS, # | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # | See the License for the specific language governing permissions and # | limitations under the License. from grid_control.datasets.dproc_base import DataProcessor from grid_control.datasets.provider_base import DataProvider, DatasetError from grid_control.utils.data_structures import makeEnum from python_compat import imap, md5_hex, set # Enum to specify how to react to multiple occurences of something DatasetUniqueMode = makeEnum(['warn', 'abort', 'skip', 'ignore', 'record']) DatasetCheckMode = makeEnum(['warn', 'abort', 'ignore']) class DataChecker(DataProcessor): def _handleError(self, msg, mode): if mode == DatasetCheckMode.warn: self._log.warning(msg) elif mode == DatasetCheckMode.abort: raise DatasetError(msg) class EntriesConsistencyDataProcessor(DataChecker): alias = ['consistency'] def __init__(self, config, onChange):
def update(self, state): self.state = state self.changed = time.time() self.history[self.attempt] = self.dict.get('dest', 'N/A') def assignId(self, wmsId): self.dict['legacy'] = None # Legacy support self.wmsId = wmsId self.attempt = self.attempt + 1 self.submitted = time.time() makeEnum([ 'INIT', 'SUBMITTED', 'DISABLED', 'READY', 'WAITING', 'QUEUED', 'ABORTED', 'RUNNING', 'CANCELLED', 'DONE', 'FAILED', 'SUCCESS' ], Job, useHash=False) class JobClass(object): mkJobClass = lambda *fList: (reduce(operator.add, imap(lambda f: 1 << f, fList)), fList) ATWMS = mkJobClass(Job.SUBMITTED, Job.WAITING, Job.READY, Job.QUEUED) RUNNING = mkJobClass(Job.RUNNING) PROCESSING = mkJobClass(Job.SUBMITTED, Job.WAITING, Job.READY, Job.QUEUED, Job.RUNNING) READY = mkJobClass(Job.INIT, Job.FAILED, Job.ABORTED, Job.CANCELLED) DONE = mkJobClass(Job.DONE) SUCCESS = mkJobClass(Job.SUCCESS) DISABLED = mkJobClass(Job.DISABLED)
from urllib.parse import urlencode class RestError(NestedException): pass class RestSession(Plugin): def __init__(self): pass def request(self, mode, url, headers, params=None, data=None, cert=None): raise AbstractError makeEnum(['GET', 'PUT', 'POST', 'DELETE'], RestSession) class RestClient(object): def __init__(self, cert=None, url=None, default_headers=None, process_result=None, process_data=None, session=None): self._log = logging.getLogger('webservice') (self._cert, self._url, self._headers) = (cert, url, default_headers) (self._process_result, self._process_data) = (process_result or identity, process_data or urlencode)
class RemoteProcessHandler(object): # enum for connection type - LOCAL exists to ensure uniform interfacing with local programms if needed RPHType = makeEnum(['LOCAL', 'SSH', 'GSISSH']) # helper functions - properly prepare argument string for passing via interface def _argFormatSSH(self, args): return "'" + args.replace("'", "'\\''") + "'" def _argFormatLocal(self, args): return args # template for input for connection types RPHTemplate = { RPHType.LOCAL: { 'command' : "%(args)s %(cmdargs)s %%(cmd)s", 'copy' : "cp -r %(args)s %(cpargs)s %%(source)s %%(dest)s", 'path' : "%(path)s", 'argFormat' : _argFormatLocal }, RPHType.SSH: { 'command' : "ssh %%(args)s %%(cmdargs)s %(rhost)s %%%%(cmd)s", 'copy' : "scp -r %%(args)s %%(cpargs)s %%%%(source)s %%%%(dest)s", 'path' : "%(host)s:%(path)s", 'argFormat' : _argFormatSSH }, RPHType.GSISSH: { 'command' : "gsissh %%(args)s %%(cmdargs)s %(rhost)s %%%%(cmd)s", 'copy' : "gsiscp -r %%(args)s %%(cpargs)s %%%%(source)s %%%%(dest)s", 'path' : "%(host)s:%(path)s", 'argFormat' : _argFormatSSH }, } def __init__(self, remoteType="", **kwargs): self._log = logging.getLogger('backend.condor') self.cmd=False # pick requested remote connection try: self.remoteType = self.RPHType.str2enum(remoteType) self.cmd = self.RPHTemplate[self.remoteType]["command"] self.copy = self.RPHTemplate[self.remoteType]["copy"] self.path = self.RPHTemplate[self.remoteType]["path"] self.argFormat = self.RPHTemplate[self.remoteType]["argFormat"] except Exception: raise ConfigError("Request to initialize RemoteProcessHandler of unknown type: %s" % remoteType) # destination should be of type: [user@]host if self.remoteType==self.RPHType.SSH or self.remoteType==self.RPHType.GSISSH: try: self.cmd = self.cmd % { "rhost" : kwargs["host"] } self.copy = self.copy % { "rhost" : kwargs["host"] } self.host = kwargs["host"] except Exception: raise ConfigError("Request to initialize RemoteProcessHandler of type %s without remote host." % self.RPHType.enum2str(self.remoteType)) # add default arguments for all commands self.cmd = self.cmd % { "cmdargs" : kwargs.get("cmdargs",""), "args" : kwargs.get("args","") } self.copy = self.copy % { "cpargs" : kwargs.get("cpargs",""), "args" : kwargs.get("args","") } # test connection once proc = LoggedProcess(self.cmd % { "cmd" : "exit"}) ret = proc.getAll()[0] if ret != 0: raise CondorProcessError('Validation of remote connection failed!', proc) self._log.log(logging.INFO2, 'Remote interface initialized:\n\tCmd: %s\n\tCp : %s', self.cmd, self.copy) # return instance of LoggedExecute with input properly wrapped def LoggedExecute(self, cmd, args = '', argFormat=defaultArg): if argFormat is defaultArg: argFormat=self.argFormat return LoggedProcess( self.cmd % { "cmd" : argFormat(self, "%s %s" % ( cmd, args )) } ) def LoggedCopyToRemote(self, source, dest): return LoggedProcess( self.copy % { "source" : source, "dest" : self.path%{"host":self.host,"path":dest} } ) def LoggedCopyFromRemote(self, source, dest): return LoggedProcess( self.copy % { "source" : self.path%{"host":self.host,"path":source}, "dest" : dest } ) def LoggedCopy(self, source, dest, remoteKey="<remote>"): if source.startswith(remoteKey): source = self.path%{"host":self.host,"path":source[len(remoteKey):]} if dest.startswith(remoteKey): dest = self.path%{"host":self.host,"path":dest[len(remoteKey):]} return LoggedProcess( self.copy % { "source" : "%s:%s"%(self.host,source), "dest" : dest } )
from grid_control.utils.data_structures import makeEnum from hpfwk import AbstractError, NestedException, Plugin from python_compat import imap, irange, itemgetter, lmap, next, sort_inplace def fast_search(lst, key_fun, key): (idx, hi) = (0, len(lst)) while idx < hi: mid = int((idx + hi) / 2) if key_fun(lst[mid]) < key: idx = mid + 1 else: hi = mid if (idx < len(lst)) and (key_fun(lst[idx]) == key): return lst[idx] ResyncMode = makeEnum(['disable', 'complete', 'changed', 'ignore']) # prio: "disable" overrides "complete", etc. ResyncMode.noChanged = [ResyncMode.disable, ResyncMode.complete, ResyncMode.ignore] ResyncOrder = makeEnum(['append', 'preserve', 'fillgap', 'reorder']) # reorder mechanism class PartitionError(NestedException): pass class DataSplitterIO(Plugin): def saveSplitting(self, path, meta, source, sourceLenHint, message = 'Writing job mapping file'): raise AbstractError def loadSplitting(self, path): raise AbstractError
jobData.items()): fileIdx = fileKey.replace('FILE', '').rjust(1, '0') result[int(fileIdx)] = dict( izip(oldFileFormat, fileData.strip('"').split(' '))) # parse new job info data format for (fileKey, fileData) in ifilter( lambda key_value: key_value[0].startswith('OUTPUT_FILE'), jobData.items()): (fileIdx, fileProperty) = fileKey.replace('OUTPUT_FILE_', '').split('_') if isinstance(fileData, str): fileData = fileData.strip('"') result.setdefault( int(fileIdx), {})[FileInfoProcessor.str2enum(fileProperty)] = fileData return list(result.values()) makeEnum(['Hash', 'NameLocal', 'NameDest', 'Path'], FileInfoProcessor) class TaskOutputProcessor(OutputProcessor): def __init__(self, task): self._task = task class SandboxProcessor(TaskOutputProcessor): def process(self, dn): return True