def initializeRuntime(): # initialize DIANE's own configuration (read the config file etc) from diane.config import initialize, ConfigError try: initialize() except ConfigError,x: from diane import getLogger logger = getLogger('setup') x.warning(logger)
def fullSetup(print_env=False): # switch off the warnings from incompatible extension modules (if any) import warnings warnings.simplefilter('ignore', RuntimeWarning) # keep all var names which have been modified so we can print their values later modified_vars = {} # setup the environment only if the process has not been already rexecuted if not os.environ.has_key('DIANE_NO_INTERNAL_SETUP'): # the first element in the sys.path is the diane package directory (defined in the executable boiler plate) prependPath('PYTHONPATH',sys.path[0]) # put diane workspace apps directory into the python path import diane.workspace prependPath('PYTHONPATH',diane.workspace.getPath('apps')) prependPath('PYTHONPATH',diane.workspace.getPath('submitters')) os.environ['DIANE_RELEASE_DIR'] = getReleaseDir() for name in _externalPackages: for v in _externalPackages[name]['vars']: pp = getPackagePath(name,v) if pp: modified_vars[v] = 1 prependPath(v,pp) if print_env: for v in modified_vars: print 'export %s=%s'%(v,os.environ[v]), print #add new line to flush stdout else: from diane import getLogger logger = getLogger('setup') logger.debug('re-executing setup process, sys.executable=%s, sys.argv=%s',sys.executable,sys.argv) # restart current process (for LD_LIBRARY_PATH to take effect) os.environ['DIANE_NO_INTERNAL_SETUP'] = '1' _reExecThisProcess() from diane import getLogger logger = getLogger('setup') logger.info('This is DIANE version "%s"',getVersion()) logger.debug('current process: sys.executable=%s, sys.argv=%s',sys.executable,sys.argv)
def getPackagePath(name,var=None,check=True,force=False): """Return the top of the external package (including version and platform components of the path). If var is specified the return the full path to the subdirectory defined by var. Note that var may define multiple subdirectories separated by colon. If check is True then print warning messages if the paths do not exist. If force is True then disable conditional packages (such as specified with maxHexVersion). If platform is NOPLATF then return an empty string. """ p = _externalPackages[name] # if checking enabled and current python interpreter version is high enough # then return empty string (package is not required) if not force and p.has_key('maxHexVersion'): if sys.hexversion >= int(p['maxHexVersion'],16): return "" if p.has_key('noarch') and p['noarch']: platf = 'noarch' else: platf = getPlatformString() if platf == 'NOPLATF': return '' prefix_path = [getExternalDir(),name,p['version'],platf] def transform(s): # get the distribution id try: distver = platf.split('-')[1] except IndexError: distver = platf # end look up the fixed python version in the pyver table if needed try: pyver = pyver_table[distver] except KeyError: pyver = sys.version[:3] return s%{"PYVER":pyver} if var: paths = [transform(os.path.join(*(prefix_path+[p]))) for p in p['vars'][var].split(':') if p] else: paths = [os.path.join(*prefix_path)] for p in paths: if not os.path.exists(p): from diane import getLogger logger = getLogger('setup') logger.debug('path %s does not exist (setting %s for %s)',p,var,name) return ':'.join(paths)
import diane logger = diane.getLogger('WorkerAgent') config = diane.getConfig('WorkerAgent') logger.debug('original config %s',repr(config)) config.addOption('HEARTBEAT_DELAY',10,"default periodicity of heartbeat") config.addOption('HEARTBEAT_TIMEOUT',30,"timeout for heartbeat calls, if a heartbeat call may not be completed in HEARTBEAT_TIMEOUT seconds, we assume that the peer (master or directory service) is lost") config.addOption('BOOTSTRAP_CONTACT_TIMEOUT',30,"timeout for bootstraping new connections") config.addOption('BOOTSTRAP_CONTACT_REPEAT',10,"the number of times the agent will attempt to establish the first contact with the master") config.addOption('PULL_REQUEST_DELAY',0.2,"delay in seconds between updating the result to the master and pulling new task") config.addOption('APPLICATION_SHELL','','Shell for running the application. If left empty then the application package is imported directly into the WorkerAgent process. Otherwise, a separate process is started with the specified shell (e.g. "sh"). That process is called a servlet (and implemented by WorkerServlet module) and the application package is imported there.') # constants miliseconds = 1000 import omniORB from omniORB import CORBA import DIANE_CORBA import time import os import sys import streamer from diane.application import create_application_proxy import diane.application from diane.BaseThread import BaseThread class HeartbeatThread(BaseThread): """ Periodically ping given peer (master or directory service). """
import os import diane logger = diane.getLogger('CachingExecutableApplication') ####### # main hooks # this application plugin extends the executable application, hence a convenience shortcut import diane_test_applications.ExecutableApplication as executable # the main hook for the user run file def run(input,config): input.scheduler = diane.SimpleTaskScheduler input.manager = ApplicationManager input.worker = Worker input.data = RunData() # use the same posprocessing hook as ExecutableApplication run_post = executable.run_post ######## # run data extensions class Policy(executable.ExecutablePolicy): """ Extended policies to control caching parameters. """ BREAK_CACHE_LOCK_TIMEOUT = 3600
""" This is a dummy application to check if the configuration options are passed correctly to the worker. """ import diane logger = diane.getLogger('test_config') import diane.config class TCWorker: def check(self): for name in self.config_dict: d = self.config_dict[name] c = diane.config.getConfig(name) for opt in d: try: val = getattr(c,opt) if val != d[opt]: logger.error("problem with option %s.%s : requested value %s, actual configuration value: %s",name,opt,d[opt],val) return except AttributeError: logger.error("problem with option %s.%s : not found in the actual configuration") return def initialize(self,config_dict): self.config_dict = config_dict self.check() def finalize(self,x): self.check() def do_work(self,x): self.check()
## functions dealing with reading user-defined run files and preparing run input from diane import getLogger logger = getLogger("RunMaster") from diane.diane_exceptions import DianeException class InputError(DianeException): pass class Input: """Collection of user-controllable parameters of diane-run. The parameters starting with underscore are read-only and are initialized by DIANE automatically: _runfile : name (absolute path) of the runfile _rundir : absolute path of the run directory """ def __init__(self): self._runfile = None self._rundir = None class Runfile: """ Interface to runfile: resolve attribute values and call functions using the priority mechanism: stuff defined in runfile explicitly has precedence over the stuff defined at the application level. """
from Data import * from diane.util import File from diane import IApplicationWorker, SimpleApplicationManager from diane import getLogger logger = getLogger('G4Analysis') from diane.analysis_tools.stats import Histogram1D import StringIO import os class ApplicationManager(SimpleApplicationManager): def __init__(self): SimpleApplicationManager.__init__(self) self.done_counter = 0 def initialize(self,run_data): # compute and store the number of tasks self.task_num = run_data.runParameters.eventNumber/run_data.eventChunk + 1 # original C++ module used CLHEP for initial table of random seeds #HepRandom::setTheEngine(new HepJamesRandom); #HepRandom::setTheSeed(run_data.runParams.seed); import random random.seed(run_data.runParameters.seed)
import os,sys import time import pickle from config import config from diane.util.compatibility import uuid import diane logger = diane.getLogger('FileTransfer.Client') def unique_md5sum_hexdigest(name,opts=None): """ Return md5 checksum of a file. If a file does not exist or there is a read error return a globally unique identifier. """ if opts is None: opts = FileTransferOptions() try: f = file(name,'r') import md5 m = md5.new() data = f.read(opts.CHUNK_SIZE) while bool(data): m.update(data) data = f.read(opts.CHUNK_SIZE) m.update(data) return m.hexdigest() except IOError,x: logger.debug('md5sum: %s',str(x)) return uuid()
import diane logger = diane.getLogger('RunMaster') import diane.util import DIANE_CORBA import DIANE_CORBA__POA import sys, time config = diane.getConfig('RunMaster') config.addOption('LOST_WORKER_TIMEOUT',60,'timout in seconds to declare worker as "lost"') config.addOption('CONTROL_DELAY',1,'default periodicity of control loop') config.addOption('IDLE_WORKER_TIMEOUT', 600, 'if a worker stays idle for this time then it is automatically removed from the pool, 0 means that the worker is never removed') # --- temporary config and logger workaround import diane.WorkerRegistry as WorkerRegistryModule WorkerRegistryModule.config = config WorkerRegistryModule.logger = logger # --- import diane.journal from diane.TaskInfo import TaskInfo,TaskStatus from diane.Peer import Peer # make sure that you always import with full package name (otherwise pickle on the client side will compain) from diane.WorkerRegistry import WorkerRegistry import streamer
import diane logger = diane.getLogger("idle") # this is an application adapter which does nothing (useful for testing purposes) class Worker: def initialize(self, x): logger.debug("idle initialize") def finalize(self, x): logger.debug("idle finalize") def do_work(self, x): logger.debug("idle do work") from diane import SimpleApplicationManager from diane import SimpleTaskScheduler class IdleApplicationManager(SimpleApplicationManager): def has_more_work(self): return True # define application symbol to point to this module if we are not being imported # this allows to use this file as a run file if __name__ != "diane_test_applications.idle": import diane_test_applications.idle as application
#!/usr/bin/env python # =============================================== # SETUP FULL DIANE ENVIRONMENT # Note: this is copy/paste logic for executable scripts. # Note: minimal diane environment must be already set up (i.e. diane-env in PATH) # Note: this code does not work in interactive python sessions or with -c python option. import os.path,sys setup_done = False for p in os.environ['PATH'].split(os.pathsep): if os.path.exists(os.path.join(p,'diane-env')): sys.path.insert(0, os.path.join(os.path.dirname(p),'python')) import diane.PACKAGE diane.PACKAGE.standardSetup() diane.PACKAGE.initializeRuntime() setup_done = True if not setup_done: print >> sys.stderr, 'ERROR: cannot set up DIANE environment (diane-env not in PATH)' sys.exit(-1) # =============================================== # Here is an example: import diane logger = diane.getLogger('mylogger') logger.info('Hey, I am using DIANE logger')
#!/usr/bin/env ganga #-*-python-*- # author: [email protected] # This Script implements a RemoteSubmitter using # ganga's remote backend # Please consult ganga's manual on usage of remote backend # usage: # diane-env -d ganga RemoteSubmitter.py --host="somehost.cern.ch" --username="******" [--prescript=any_additional_cmd] \ # [--ganga_cmd="ganga_executable_full_path_on_remote_host"] [--ganga_dir="ganga_directory_for_remote_backend] #from Ganga.GPIDev.Base import * #from Ganga.GPIDev.Schema import * from diane import getLogger logger=getLogger('RemoteSubmitter') from diane.submitters import Submitter prog = Submitter() prog.download=False prog.parser.description="Submit worker agents to a remote host "+prog.parser.description prog.parser.add_option("--host",dest="host",type="string",default='localhost',help="Set remote host name") prog.parser.add_option("--username",dest="username",type="string",default='',help="Remote user name") prog.parser.add_option("--prescript",dest="prescript",default=[],help="Additional script to be executed on remote host before applicaiton. Can be used several times",action="append") prog.parser.add_option("--ganga_cmd",dest="gangacmd",help="Ganga command (with full path) on remote host") prog.parser.add_option("--ganga_dir",dest="gangadir",help="Ganga remote directory for jobs") prog.parser.add_option("--delay",dest="delay",type="int",default=0,help="delay in seconds in between the worker submission (to avoid spikes in the worker agent registration)") prog.parser.add_option("--config_file",dest="config_file",help="Configuration file for defaults value of some parameters, if not specified use $DIANE_USER_WORKSAPCE/remotesubmitters_config.py") prog.parser.add_option("--ssh_key",dest="ssh_key",help="SSH Key file. Needed to avoid ganga asking password for remote host. Refer to Ganga Remote backend for details.") prog.parser.add_option("--key_type",dest="key_type",help="SSH Key type. Needed if ssh_key is specified. Refer to Ganga Remote backend for details.")
import diane logger = diane.getLogger('AtlasPilotJobs') import os.path, shutil, glob from diane_test_applications import ExecutableApplication class AtlasPilotRunData(ExecutableApplication.ExecutableRunData): def __init__(self): ExecutableApplication.ExecutableRunData.__init__(self) def newTask(self): d = AtlasPilotTaskData() self.tasks.append(d) return d AtlasPilotWorkerData = ExecutableApplication.ExecutableWorkerData class AtlasPilotTaskData(ExecutableApplication.ExecutableTaskData): def __init__(self): ExecutableApplication.ExecutableTaskData.__init__(self) self.requirements = PilotTaskRequirements() AtlasPilotTaskResult = ExecutableApplication.ExecutableTaskResult PilotWorkerCapabilities = frozenset PilotTaskRequirements = frozenset
import diane logger = diane.getLogger("StandingCall") import time class StandingCallFailed(Exception): pass class StandingCallStopped(Exception): pass class StandingCall: """ Invoke methods of a remote object as standing calls. Standing call is a mechanism which provides automatic retry in case of (transient) network problems, so when the standing call ultimately fails the client may be pretty sure that the server cannot be reached. Additionally standing call implements the handling of the XHangup andXRepeatCall exceptions so that the server may explicitly request to have the call repeated or to terminate the converation with the client. The StandingCall class is a wrapper for a remote object reference. Each method is implicitly invoked as a standing call. The configuration parameters of the standing call are defined in the constructor. """
# # Modified 01/2007 # ##################################################################### import os def chmod_executable(path): "make a file executable" import stat,os os.chmod(path,stat.S_IXUSR|os.stat(path).st_mode) from diane import IApplicationWorker, SimpleApplicationManager, SimpleTaskScheduler from diane import getLogger logger = getLogger('CrashApplication') class G4ProdApplicationManager(SimpleApplicationManager): def initialize(self, job_data): ################### # Creating Plan: # Input: # job_data: Includes dictionary 'JobInitData' specified in .job file, with # the keys {'exedir','tarname'}, where: # # exedir: directory containing the following files: # - geant4_executables*.py -- executable files # (one file corresponds to one task) # the parent directory must contain: # - geant4_production.sh -- the driver (script running the executable)
import diane logger = diane.getLogger('ExecutableApplication') import os.path, shutil, glob class ExecutablePolicy: """ Collection of parameters (with default values) to control the Executable application. """ ## Allow missing output files. ## True => worker will try to upload as many output files as possible and report the task as completed. ## False => if any of the output files cannot be uploaded then report the task as failed. ALLOW_MISSING_OUTPUT = False class ExecutableRunData: def __init__(self): # list of TaskData items self.tasks = [] # unless specified otherwise all tasks share these defaults self.task_defaults = ExecutableTaskData() self.task_defaults.input_files = [] self.task_defaults.output_files = [] self.task_defaults.args = [] self.policy = ExecutablePolicy() # these attributes are set automatically and are needed to copy the input files self._udir = None # user directory (where the diane-run was executed) self._idir = None # input_files directory
import diane logger = diane.getLogger("TaskScheduler") class Policy: """ Collection of policy values. Empty by default.""" pass # PENDING: TODO: instrument on the fly methods of TaskScheduler with # log and journal statements. This will guarantee that all user-defined # schedulers will be logged and journalled correctly. from diane.BaseThread import BaseThread class ITaskScheduler(BaseThread): """ TaskScheduler keeps track of the tasks, controls how tasks are assigned to workers and how task output is processed (integrated). TaskInfo objects are used to represent tasks. WorkerEntry objects are used to represent the worker agents. Task manager runs in a separate thread and is notified by RunMaster by callback methods (defined below). An application manager (if defined) may be used to separate the scheduling functionality from the application-specific actions. See IApplicationManager. """ ## Policy parameters defining the behaviour of the TaskScheduler. ## The parameters are specific to the implementation and may be set by the user at runtime.
def _make_logger(): global logger if logger is None: from diane import getLogger logger = getLogger('config')
import threading import traceback import diane logger = diane.getLogger('BaseThread') class BaseThread(threading.Thread): """ Application thread base class. """ def __init__(self,name=None,auto_register=True): """ Create a DIANE service thread (and register it as such into the singleton program unless auto_register == False).""" if name is None: name = '' name = '.'.join(['diane.BaseThread',name]) threading.Thread.__init__(self,name=name) self.setDaemon(1) # we can exit entire application at any time self.__should_stop_flag = False logger.debug("BaseThread object created:%s",self.__class__.__name__) if auto_register: import diane.CORBAProgram diane.CORBAProgram.theProgramInstance.addServiceThread(self) def should_stop(self): return self.__should_stop_flag def stop(self): if not self.__should_stop_flag: logger.debug("Stopping: %s",self.__class__.__name__) self.__should_stop_flag = True
import os import glob import shutil import time def chmod_executable(path): "make a file executable" import stat,os os.chmod(path,stat.S_IXUSR|os.stat(path).st_mode) from diane import IApplicationWorker, SimpleApplicationManager, SimpleTaskScheduler,IApplicationManager from diane.BaseThread import BaseThread from diane import getLogger logger = getLogger('THISApplication') #FIXME: hardcoded for the time being EVENT_BUNCH = 100000 TOTAL_EVENTS = 20000000 # basedir contains: # input/* - all files to be copied to the WN # output/* - output files received from WN # rndm/* - random seeds (one file per task sequence) class RunData: def __init__(self): self.basedir = None self.executable = 'this' # name of an executable file self.rundir = 'this/Thorax' # run directory on the worker node (containing executable and mac directory) self.args = ['mac/Thorax-segRe-6-RV.mac',
from G4Analysis import Worker, ApplicationManager # uncomment this line if you want to enable the shared library mode #from G4AnalysisSharedLibWorker import SharedLibWorker, setup_application from Data import * from diane import SimpleTaskScheduler import diane logger = diane.getLogger('G4Analysis') def run(input,config): input.worker = Worker input.manager = ApplicationManager input.scheduler = SimpleTaskScheduler #################### #shared library mode #config.WorkerAgent.APPLICATION_SHELL='sh' # run via a separate shell process #input.worker = SharedLibWorker #################### # clean and tar up the application directory def run_post(input,config): import os appname = input.data.workerInit.G4ApplicationName logger.info('simulation module: %s',appname) try:
from ITaskScheduler import ITaskScheduler import time import Queue import diane logger = diane.getLogger('SimpleTaskScheduler') class SimplePolicy: """ Collection of parameters to control the SimpleTaskScheduler. """ ## True => automatically discard init_input and init_output after worker is fully initialized ## by resetting the corresponding attributes of WorkerEntry to None. ## This gives a chance for garbage collection of unneeded data in memory INIT_DATA_CLEANUP = True ## True => automatically discard task.input_data after the task has been handed out by the task scheduler (completed or ignored). TASK_INPUT_CLEANUP = True ## True => automatically discard task.details.output_data after the task has been handed out by the task scheduler (completed or ignored). TASK_OUTPUT_CLEANUP = True ## A number of times task execution is attempted (while task is reported as failed). ## After reaching this number the policy STOP_IF_FAILED_TASKS is applied. ## For example: FAILED_TASK_MAX_ASSIGN == 2 => failed task will be retried only once. FAILED_TASK_MAX_ASSIGN = 3 ## False => ignore failed tasks and continue running. True => stop run immediately after detecting the faulty tasks. ## This policy applies when failed tasks exceed the FAILED_TASK_MAX_ASSIGN number. STOP_IF_FAILED_TASKS = False
#!/usr/bin/env ganga #-*-python-*- # # This script may be placed in ~/diane/submitters (it will then take precedence over any submitters with the same name contained in the release tree). # # usage: # diane-submitter SAGA [options] # # print all available options: diane-submitter SAGA -h # import sys from diane import getLogger logger=getLogger('SAGASubmitter') from diane.submitters import Submitter prog = Submitter() prog.download=False prog.parser.description="Submit worker agents using SAGA. "+prog.parser.description # These parameters are required for the SAGA backend prog.parser.add_option("--jobservice-url",type="string",default="",help="the remote job service url (e.g. gram://qb1.loni.org/jobmanager-pbs)") prog.parser.add_option("--filesystem-url",type="string",default="",help="the remote filesystem root (e.g. gsiftp://qb1.loni.org/work/oweidner/diane-worker/)") prog.parser.add_option("--allocation",type="string",default="",help="the allocation to be used for job accounting") prog.parser.add_option("--delay",dest="delay",type="int",default=0,help="delay in seconds in between the worker submission (to avoid spikes in the worker agent registration)") # this wrapper will start a number of worker agents using ssh hosts specified by $PBS_NODEFILE # it assumes, however, that all the nodes are connected by a shared file system
from crash import * run2 = run from diane import getLogger logger = getLogger('FinalizeCrashApplication') class FinalizeCrashApplicationManager(CrashApplicationManager): def finalize(self): N = 20 import time logger.info('finalizing for %d seconds',N) time.sleep(N) CrashApplicationManager.finalize(self) logger.info('finalize finished') def run(input,config): run2(input,config) input.manager = FinalizeCrashApplicationManager
import atexit, diane, gzip, logging, os, pickle, random, sys, threading, time from diane.util import importName #store lockfile, timestamp data and error logs in gangadir/agent_factory DATA_PATH = os.path.join(config.Configuration.gangadir, 'agent_factory') #pickled timestamps AGENT_FACTORY_DATA_FILE = os.path.join(DATA_PATH, 'agent_factory_data') #failure_log directory to store the information about the failed jobs FAILURE_LOG_DIR = os.path.join(DATA_PATH, 'failure_log') #setup the logger logger = diane.getLogger('agent_factory') #exception definitions for the FLock class class LockAcquireError(Exception): pass class LockReleaseError(Exception): pass class FLock(object): """A simple file lock class. Creates a directory (since mkdir on UNIX is atomic) which serves as a lock and allows detecting previous instances of the application.""" def __init__(self, path): self.path = path self.filename = 'lockfile' self.pid = os.getpid() self.lockfile = os.path.join(self.path, self.filename + '_' + str(self.pid)) def acquire(self):
from diane import getLogger logger = getLogger('sample1') from diane import IApplicationWorker, SimpleApplicationManager from diane.application import ApplicationFailure class Worker(IApplicationWorker): def initialize(self,x): logger.info('app.initialize(%s)',str(x)) return None def finalize(self,x): # PENDING: define what gets called on do_work() error or when there is a system problem logger.info('app.finalize(%s)',str(x)) def do_work(self,x): import time #time.sleep(0.5) logger.info('app.do_work(%s)',x) if x%2: #logger.info('failing task %s',x) #raise ApplicationFailure() pass return None class SampleStaticApplicationManager(SimpleApplicationManager): def initialize(self,job_input): self.N = 10 self.done_counter = 0 return [self._task() for tid in range(1,self.N+1)] def tasks_done(self,tasks):
parser.add_option("-c","--config", dest="config",default=None,help="specify the configuration file",metavar="FILE.cfg") options,args = parser.parse_args() if options.config: configs = [options.config] # use a specified config file else: configs = configs() # use all *.cfg files import diane.test.utils import logging diane.test.utils.logger.setLevel(logging.CRITICAL) from diane import getLogger logger = getLogger('test.driver') if len(configs)>1: logger.user('selected test configurations: %s',configs) import subprocess for c in configs: # run each test configuration in a separate process (to avoid possible interference between runs) if subprocess.call(['python',sys.argv[0],'--config=%s'%c]) != 0: break else: c = configs[0] logger.user('running full test suite using %s',c) cf = os.path.abspath(c) if not os.path.exists(cf): logger.error('config file %s does not exist',c)
import subprocess import time import os, signal import diane logger = diane.getLogger('test.case') from diane.test.utils import * # added optional detection of false positives (e.g. NameError), case does not matter def report_errors(fn,false_positives=[]): """The false_positives must contain ERROR or EXCEPT substrings (the case does not matter). However the search for false_positives is case-sensistve. """ for sub in false_positives: if sub.upper().find('ERROR') == -1 and sub.upper().find('EXCEPT') == -1: raise ValueError('False positive %s must contain ERROR or EXCEPT substring'%sub) for l in file(fn).readlines(): false_positives_cnt = 0 for sub in false_positives: false_positives_cnt+=l.upper().count(sub.upper()) if false_positives_cnt != l.upper().count('ERROR') + l.upper().count('EXCEPT'): logger.error('%s: %s',fn,l) return False return True def get_output_dir(): try: return os.path.join('output',os.path.basename(os.environ['DIANE_CONFIG'])) except KeyError:
import diane logger = diane.getLogger("application") class Boot: def __init__(self): self.application_name = None self.worker_class_name = None self.config = None self.runid = None self.master_uuid = None def log(self): for x in self.__dict__: logger.info("%s = %s", x, repr(getattr(self, x))) from diane.util import importName # recoverable application problem class ApplicationFailure(Exception): pass # FIXME: traceback_string variable # unrecoverable application problem (e.g. crash due to abort()) class ApplicationCritical(Exception): pass import traceback