def setUp(self): from pulse2.scheduler.config import SchedulerConfig config = SchedulerConfig() config.setup("/etc/mmc/pulse2/scheduler/scheduler.ini") installed_phases = [ExecPhase, DelPhase, DonePhase] self.circuit = MyCircuit(1, installed_phases, config) self.circuit.cohq.cmd.id = 1 self.circuit.cohq.coh.id = 1 self.circuit.cohq.target.target_name = "my_hostname" self.circuit.cohq.target.target_ipaddr = "55.12.120.83||127.0.0.1" self.circuit.cohq.target.target_macaddr = "00:01:00:52:2d:01||00:01:00:52:2d:01" self.circuit.cohq.target.target_network = "255.255.0.0||255.0.0.0" # some needed objects class Statistics (object): stats = [] def update(cls, id) :pass dispatcher = type("MscContainer", (object,), {"release": lambda x : x, "statistics" : Statistics() }) self.circuit.install_dispatcher(dispatcher)
def _putResult(deferred, function, args, kwargs): start = time.time() logging.getLogger().debug( 'scheduler "%s": THREAD: Thread #%s : start %s' % (SchedulerConfig().name, threading.currentThread().getName().split("-")[2], function.__name__)) try: result = function(*args, **kwargs) except: logging.getLogger().error( 'scheduler "%s": THREAD: Thread #%s : error %s' % (SchedulerConfig().name, threading.currentThread().getName().split("-")[2], sys.exc_info()[0])) twisted.internet.reactor.callFromThread( deferred.errback, twisted.python.failure.Failure()) else: if isinstance(result, twisted.internet.defer.Deferred): result.addCallback(_cbSuccess, deferred) result.addErrback(_cbFailure, deferred) else: twisted.internet.reactor.callFromThread( deferred.callback, result) logging.getLogger().debug( 'scheduler "%s": THREAD: Thread #%s : passed %s' % (SchedulerConfig().name, threading.currentThread().getName().split("-")[2], time.time() - start))
def setUp(self): from pulse2.scheduler.config import SchedulerConfig config = SchedulerConfig() config.setup("/etc/mmc/pulse2/scheduler/scheduler.ini") installed_phases = [ExecPhase, DelPhase, DonePhase] self.circuit = MyCircuit(1, installed_phases, config) self.circuit.cohq.cmd.id = 1 self.circuit.cohq.coh.id = 1 self.circuit.cohq.target.target_name = "my_hostname" self.circuit.cohq.target.target_ipaddr = "55.12.120.83||127.0.0.1" self.circuit.cohq.target.target_macaddr = "00:01:00:52:2d:01||00:01:00:52:2d:01" self.circuit.cohq.target.target_network = "255.255.0.0||255.0.0.0" # some needed objects class Statistics(object): stats = [] def update(cls, id): pass dispatcher = type("MscContainer", (object, ), { "release": lambda x: x, "statistics": Statistics() }) self.circuit.install_dispatcher(dispatcher)
def get_ip_resolve(cls): resolve_order = SchedulerConfig().resolv_order networks = SchedulerConfig().preferred_network netbios_path = SchedulerConfig().netbios_path ip_resolve = IPResolve(resolve_order, networks, netbios_path=netbios_path) return ip_resolve
def getProxy(url): """ Return a suitable SchedulerProxy object to communicate with launchers """ verifypeer = SchedulerConfig().verifypeer cacert = SchedulerConfig().cacert localcert = SchedulerConfig().localcert return Pulse2XMLRPCProxy(url, verifypeer=verifypeer, cacert=cacert, localcert=localcert )
def getLaunchersBalance(): """ return balancing status for launchers """ def _eb(reason, stats, launchers, current_launcher): logging.getLogger().error( "scheduler %s: while talking to launcher %s: %s" % (SchedulerConfig().name, current_launcher, reason.getErrorMessage())) if launchers: (next_launcher_name, next_launcher_uri) = launchers.popitem() d = callOnLauncher(None, next_launcher_uri, 'get_balance') d.addCallback(_callback, stats, launchers, next_launcher_name).\ addErrback(_eb, stats, launchers, next_launcher_name) return d else: # no more launcher left, give up return stats def _callback(result, stats, launchers, current_launcher): # we just got a result from a launcher, let's stack it if result: stats.update({current_launcher: result}) # if there is at least one launcher to process, do it if launchers: (next_launcher_name, next_launcher_uri) = launchers.popitem() d = callOnLauncher(None, next_launcher_uri, 'get_balance') d.addCallback(_callback, stats, launchers, next_launcher_name).\ addErrback(_eb, stats, launchers, next_launcher_name) return d else: # no more launcher left, give up return stats return _callback(None, {}, SchedulerConfig().launchers_uri.copy(), None)
def getServerCheck(target): return getCheck(SchedulerConfig().server_check, { 'uuid': target.getUUID(), 'shortname': target.getShortName(), 'ips': target.getIps(), 'macs': target.getMacs() })
def _finalize(result): if type(result) == list: # got expected struct (launcher, host, port) = result if host == '': host = SchedulerConfig().launchers[launcher]['host'] return (host, port) else: return False
def checkLoops(): ret = True try : if startLoopTS.delta() > 3 * SchedulerConfig().awake_time: # sounds the alarm if more than 3 start iteration were missed logging.getLogger().warn('scheduler %s: CHECK: NOK: seems the START loop is running into trouble; this may be due to load / network issue; please check your network environment !' % (SchedulerConfig().name)) ret = False if stopLoopTS.delta() > 3 * SchedulerConfig().awake_time: # sounds the alarm if more than 3 stop iteration were missed logging.getLogger().warn('scheduler %s: CHECK: NOK: seems the STOP loop is running into trouble; this may be due to load / network issue; please check your network environment !' % (SchedulerConfig().name)) ret = False if preemptLoopTS.delta() > SchedulerConfig().awake_time: # sounds the alarm if no preempt was done in awake-time interval logging.getLogger().warn('scheduler %s: CHECK: NOK: seems the PREEMPT loop is running into trouble; this may be due to load / network issue; please check your network environment !' % (SchedulerConfig().name)) ret = False if logLoopTS.delta() > SchedulerConfig().awake_time: # sounds the alarm if no log was done in awake-time interval logging.getLogger().warn('scheduler %s: CHECK: NOK: seems the HEALTH loop is running into trouble; this may be due to load issue; please check your scheduler settings !' % (SchedulerConfig().name)) ret = False except Exception, e: logging.getLogger().warn('scheduler %s: CHECK: NOK: got the following error : %s' % (SchedulerConfig().name, e)) ret = False
def _finalback(stats): used_slots = 0 if len(stats.keys()) == 0: raise Exception("Every launchers seems to be dead !!!") # remove full launchers for k, v in stats.items(): if 'slots' in v: v = v[ 'slots'] # to ensure backward compatibility with pre-20090224 launchers used_slots += v['slotused'] if v['slottotal'] == v['slotused']: del stats[k] # give up if we may go beyond limit if used_slots >= SchedulerConfig().max_slots: raise Exception("Gone beyond our max of %s slots used" % SchedulerConfig().max_slots) if len(stats.keys()) == 0: raise Exception("No free slots on launchers") return SchedulerConfig().launchers_uri[_extract_best_candidate(stats)]
def _eb(reason, stats, launchers, current_launcher): logging.getLogger().error( "scheduler %s: while talking to launcher %s: %s" % (SchedulerConfig().name, current_launcher, reason.getErrorMessage())) if launchers: (next_launcher_name, next_launcher_uri) = launchers.popitem() d = callOnLauncher(None, next_launcher_uri, 'get_health') d.addCallback(_callback, stats, launchers, next_launcher_name).\ addErrback(_eb, stats, launchers, next_launcher_name) return d else: # no more launcher left, give up return _finalback(stats)
def _finalize(result): if type(result) == list: # got expected struct (launcher, host, port, key) = result if key == '-': # Key not provided => TCP Proxy logging.getLogger().info( 'VNC Proxy: launcher "%s" created new TCP Proxy to "%s:%s"' % (launcher, host, str(port))) else: # Key provided => Websocket Proxy logging.getLogger().info( 'VNC Proxy: launcher "%s" created new WebSocket Proxy to "%s:%s" with key "%s"' % (str(launcher), str(host), str(port), str(key))) if host == '': host = SchedulerConfig().launchers[launcher]['host'] return (host, port, key) else: return False
def callOnLauncher(coh_id, launcher, method, *args): # coh_id to keep a track of the command, set to None if we don't want to keep a track if coh_id: # FIXME: we may want to log launcher_name instead of launcher_uri session = sqlalchemy.orm.create_session() myCommandOnHost = session.query(CommandsOnHost).get(coh_id) session.close() for (k, v) in SchedulerConfig().launchers_uri.items(): if v == launcher: myCommandOnHost.setCurrentLauncher(k) break def _eb(reason): logging.getLogger().warn( "scheduler %s: while sending command to launcher %s : %s" % (SchedulerConfig().name, launcher, reason.getErrorMessage())) return pulse2.scheduler.xmlrpc.getProxy(launcher).\ callRemote(method, *args).\ addErrback(_eb)
# our modules import pulse2.scheduler.scheduling import pulse2.scheduler.threads from pulse2.scheduler.tracking.commands import CommandsOnHostTracking from pulse2.scheduler.config import SchedulerConfig def tell_i_am_alive(launcher): """ A launcher just contact us, log it """ logging.getLogger().info("Scheduler: launcher %s tells us it is alive" % launcher) return True def completed_push(launcher, (exitcode, stdout, stderr), id): if SchedulerConfig( ).lock_processed_commands and not CommandsOnHostTracking().preempt(id): return False logging.getLogger().info( "Scheduler: launcher %s tells us that push of CoH #%s is done" % (launcher, id)) try: if SchedulerConfig().multithreading: pulse2.scheduler.threads.runInThread( pulse2.scheduler.scheduling.parsePushResult, (exitcode, stdout, stderr), id) else: pulse2.scheduler.scheduling.parsePushResult( (exitcode, stdout, stderr), id) return True except Exception, e: pulse2.scheduler.scheduling.gotErrorInResult(e, id)
def checkStatus(): if checkPool() and checkLoops(): logging.getLogger().info('scheduler %s: CHECK: OK' % SchedulerConfig().name)
def getHealth(): # take basic informations health = basicHealth() try: # add data about the current database connections pool pool = MscDatabase().db.pool health['db'] = { 'poolsize' : str(pool.size()), 'checkedinconns' : str(pool.checkedin()), 'overflow' : str(pool.overflow()), 'checkedoutconns': str(pool.checkedout()), 'recycle' : str(pool._recycle) } except Exception, e: logging.getLogger().warn('scheduler %s: HEALTH: got the following error : %s' % (SchedulerConfig().name, e))
def checkPool(): ret = True try : pool = MscDatabase().db.pool if pool._max_overflow > -1 and pool._overflow >= pool._max_overflow : logging.getLogger().error('scheduler %s: CHECK: NOK: timeout then overflow (%d vs. %d) detected in SQL pool : check your network connectivity !' % (SchedulerConfig().name, pool._overflow, pool._max_overflow)) pool.dispose() pool = pool.recreate() ret = False except Exception, e: logging.getLogger().warn('scheduler %s: CHECK: NOK: got the following error : %s' % (SchedulerConfig().name, e)) ret = False
def getAnnounceCheck(announce): if not announce: return '' if not announce in SchedulerConfig().announce_check: return '' return SchedulerConfig().announce_check[announce]
def __init__(self): self.logger = logging.getLogger() self.config = SchedulerConfig()
def getClientCheck(target): return getCheck(SchedulerConfig().client_check, target)
def getServerCheck(target): return getCheck(SchedulerConfig().server_check, target)
def chooseLauncher(): """ return a good launcher, URI form """ def _extract_best_candidate(launchers): # return the best launcher, return the corresponding key best_launcher = None best_score = 0 for (k, v) in launchers.items(): if 'slots' in v: v = v[ 'slots'] # to ensure backward compatibility with pre-20090224 launchers score = v['slottotal'] - v[ 'slotused'] # score computed using free slots if score > best_score: best_score = score best_launcher = k return best_launcher def _finalback(stats): used_slots = 0 if len(stats.keys()) == 0: raise Exception("Every launchers seems to be dead !!!") # remove full launchers for k, v in stats.items(): if 'slots' in v: v = v[ 'slots'] # to ensure backward compatibility with pre-20090224 launchers used_slots += v['slotused'] if v['slottotal'] == v['slotused']: del stats[k] # give up if we may go beyond limit if used_slots >= SchedulerConfig().max_slots: raise Exception("Gone beyond our max of %s slots used" % SchedulerConfig().max_slots) if len(stats.keys()) == 0: raise Exception("No free slots on launchers") return SchedulerConfig().launchers_uri[_extract_best_candidate(stats)] def _eb(reason, stats, launchers, current_launcher): logging.getLogger().error( "scheduler %s: while talking to launcher %s: %s" % (SchedulerConfig().name, current_launcher, reason.getErrorMessage())) if launchers: (next_launcher_name, next_launcher_uri) = launchers.popitem() d = callOnLauncher(None, next_launcher_uri, 'get_health') d.addCallback(_callback, stats, launchers, next_launcher_name).\ addErrback(_eb, stats, launchers, next_launcher_name) return d else: # no more launcher left, give up return _finalback(stats) def _callback(result, stats, launchers, current_launcher): # we just got a result from a launcher, let's stack it if result: stats.update({current_launcher: result}) # if there is at least one launcher to process, do it if launchers: # shuffle launchers a = launchers.items() random.shuffle(a) (next_launcher_name, next_launcher_uri) = a.pop() launchers = dict(a) d = callOnLauncher(None, next_launcher_uri, 'get_health') d.addCallback(_callback, stats, launchers, next_launcher_name).\ addErrback(_eb, stats, launchers, next_launcher_name) return d else: # no more launcher left, give up return _finalback(stats) return _callback(None, {}, SchedulerConfig().launchers_uri.copy(), None)
def _eb(reason): logging.getLogger().error( "scheduler %s: while choosing the best launcher : %s" % (SchedulerConfig().name, reason.getErrorMessage())) return default_error_return
def _eb(reason): logging.getLogger().warn( "scheduler %s: while sending command to launcher %s : %s" % (SchedulerConfig().name, launcher, reason.getErrorMessage()))