def stop() : s = socket.socket() try : s.connect(('localhost', smds.Foundry.FOUNDRY_PORT)) except : pass else : smds.sendMsg(s, smds.messages['QUIT'])
def adjustLoads() : global nextAdjustTime if nextAdjustTime > time() : return False if unassigned > 0 or working == 0 : return False times = [ b['etoc'] for b in bees if b['task'] ] if 0 in times : return False # must have an estimate from everyone. nextAdjustTime = time()+90.0 sum = 0.0 for x in times : sum += x avg = sum/len(times) if avg < time() : proj = 0 else : proj = (avg-time())*working/len(bees)+time() a = False for b in bees : if b['task'] and b['task'].p.Unity : continue if b['etoc'] and b['etoc']+75 < time() : # remote is 75s past its ETOC if 'bail' not in b : # request remote to terminate a = True b['bail'] = time() smds.msg("Request %s to terminate now." % b['host'], smds.MSG_DBUG) smds.sendMsg(b['socket'], smds.messages['RELEASE'], 0) elif b['bail']+75 < time() : a = True # remote did not respond to request to terminate smds.msg("%s is unresponsive." % b['host'], smds.MSG_DBUG) remoteDead(b) elif proj and b['etoc'] > proj+120.0 : r = proj - time() # remote will take much longer than avg if r > 120.0 : a = True smds.msg("Request %s terminate in %f s." % (b['host'], r), smds.MSG_DBUG) smds.sendMsg(b['socket'], smds.messages['RELEASE'], r) return a
def remoteDead(b) : hosts.append(b['host']) reapWork(b) bees.remove(b) smds.sendMsg(b['socket'], smds.messages['QUIT']) b['socket'].close() smds.msg("Remote %s dead (%d available)." % (b['host'], len(bees)), smds.MSG_DBUG)
def removeHost(host) : smds.msg("Host %s down" % host, smds.MSG_DBUG) for b in bees : if b['host'] == host : smds.sendMsg(b['socket'], smds.messages['QUIT']) remoteDead(b) if host in waiting : del waiting[host] if host in hosts : hosts.remove(host)
def shutdown() : global theFoundry, theSocket, myPort, bees, hosts, waiting, initialized if theFoundry : theFoundry.close() if theSocket : theSocket.close() (theFoundry, myPort, theSocket) = (None, None, None) for b in bees : smds.sendMsg(b['socket'], smds.messages['QUIT']) b['socket'].close() bees = [] hosts = [] waiting = {} initialized = False
def returnTask(t, worktag) : mx.acquire() try : smds.sendMsg(parent, smds.messages['WORK'], worktag, t.strip(True)) except : mx.release() smds.msg("Error sending work to master.") parent.close() smds.shutdown() exit() else : mx.release() return None
def initialize() : global theThread, theProcThread, theFoundry, myPort, mySocket from os import getpid if theFoundry == None : # Attempt to connect to local foundary s = socket.socket() try : s.connect(('localhost', FOUNDRY_PORT)) except : return False if not smds.sendMsg(s, smds.messages['WORK'], getpid()) : return False theFoundry = s p = 8740 while myPort == None : try : s = socket.socket() s.bind(('', p)) except socket.error, (errno, errstr) : if errno == EADDRINUSE : p += 1 else : raise else : myPort = p s.setblocking(0) s.listen(30) mySocket = s theProcThread = threading.Thread(None, theProcessor, "SMDS Dispatch Postprocessor") theProcThread.setDaemon(True) theProcThread.start() theThread = threading.Thread(None, theDispatcher, "SMDS Dispatch Engine") theThread.setDaemon(True) theThread.start()
def shutdown() : global theFoundry, intensCompressedCache if theFoundry : halt.set() mx.acquire() work.notify() process.notify() for b in bees : smds.sendMsg(b['socket'], smds.messages['QUIT']) b['socket'].close() mx.release() theThread.join() theProcThread.join() mySocket.close() theFoundry.close() theFoundry = None intensCompressedCache = {}
def deactivateFoundry(h) : smds.msg("deactivateFoundry(%s)" % h['name']) defunct = [] for m in myMasters : if not smds.sendMsg(m, smds.messages['REMOVE'], h['name']) : defunct.append(m) for m in defunct : masterDead(m)
def moreWork(workID, funcID, args) : smds.msg("moreWork(%s, %s)" % (funcID, str(args)), smds.MSG_DBUG) if funcID not in store : store[funcID] = FunctionType(mLoad(retrieveData(funcID)), env) func = store[funcID] try : ret = func(*args) ; err = False except SystemExit : raise except : from traceback import format_exc ret = format_exc() err = True smds.msg("Returning %s to the master." % str(ret), smds.MSG_DBUG) if err : r = smds.sendMsg(theSocket, smds.messages['RELEASE'], workID, ret) else : r = smds.sendMsg(theSocket, smds.messages['WORK'], workID, ret) if not r : smds.msg("Error sending work to master.", smds.MSG_WARN) exit()
def registerMaster(s, id) : global includeMe id = str(id) mx.acquire() smds.msg("registerMaster(%s)" % id) refreshFoundries(True) if includeMe: if not smds.sendMsg(s, smds.messages['ADD'], myName) : masterDead(s, id) return for h in [ x for x in hosts if x['usable'] and x['socket'] ] : if not smds.sendMsg(s, smds.messages['ADD'], h['name']) : masterDead(s, id) return myMasters[s] = id theSocket.settimeout(300.0) mx.release()
def dataRequest(b, name) : if name not in store : smds.msg("Unnown data request <%s> by %s." % (name, b['host'])) remoteDead(b) else : smds.msg("Data [%s] request by %s" % (name, b['host']), smds.MSG_DBUG) if not smds.sendMsg(b['socket'], smds.messages['INTENS'], name, store[name]) : remoteDead(b)
def releaseWork(etoc) : r = smds.Engine.releaseWork(etoc) if r > 0 : if not smds.sendMsg(parent, smds.messages['RELEASE'], r) : mx.release() smds.msg("Error writing to master.") parent.close() smds.shutdown() exit()
def incomingConnections() : go = True while go : try : (c, addr) = theSocket.accept() except : go = False else : r = True c.setblocking(1) (host, aliases, addrs) = socket.gethostbyaddr(addr[0]) if host in waiting : del waiting[host] if host in hosts : hosts.remove(host) if host in [ b['host'] for b in bees ] : smds.sendMsg(c, smds.messages['QUIT']) else : bees.append({ 'host' : host, 'socket' : c, 'work' : None, }) smds.msg("New Bee: %s (%d/%d avail)" % (host, len(bees)-working, len(bees)), smds.MSG_DBUG)
def refreshFoundries(fast = False) : checkConnections() for h in [x for x in hosts if (x['usable'] and not x['socket']) ] : s = socket.socket() if fast : s.settimeout(1.0) else : s.settimeout(10.0) try : s.connect((h['name'], FOUNDRY_PORT)) except : pass else : if smds.sendMsg(s, smds.messages['FOUNDRY'], socket.gethostname()) : h['socket'] = s activateFoundry(h)
def sendWork() : global queue, working, unassigned if time() < workWaitTime : return False r = False avail = [ b for b in bees if b['task'] == None ] availPower = 0.0 i = 0 for b in avail : if b['rate'] : availPower += b['rate'] ; i += 1 if i == 0 : availPower = float(len(avail)) else : availPower *= float(len(avail))/float(i) if len(avail) : avgPower = availPower / float(len(avail)) i = 0 while (len(avail) > 0 and unassigned > 0) : r = True # Get next bee b = avail.pop() b['etoc'] = 0 if b['rate'] : power = b['rate']/availPower availPower -= b['rate'] else : power = avgPower/availPower availPower -= avgPower # Get next work unit while (queue[i].rem-queue[i].assigned == 0) : i += 1 t = queue[i] if t.p.Unity : stint = t.rem else : stint = int(unassigned * power + 0.5) if stint < 1 : stint = 1 if t.rem-t.assigned-stint <= 10 : stint = t.rem-t.assigned b['solo'] = (stint == t.rem and t.completed == 0 and t.assigned == 0) b['stint'] = stint if smds.sendMsg(b['socket'], smds.messages['WORK'], t.strip(False), stint, b['solo'], worktag) : t.status = smds.Task.Status_Running t.assigned += stint unassigned -= stint b['task'] = t b['worktag'] = worktag working += 1 smds.msg("Sent %d from <%s> to %s, %d/%d assigned; %d in queue." % (stint, t.ID, b['host'], t.assigned+t.completed, t.rem+t.completed, unassigned), smds.MSG_DBUG) else : remoteDead(b) return r
def activateFoundry(h) : mx.acquire() if not h['usable'] : mx.relase() return smds.msg("activateFoundry(%s)" % h['name']) defunct = [] for m in myMasters : if not smds.sendMsg(m, smds.messages['ADD'], h['name']) : defunct.append(m) for m in defunct : masterDead(m) mx.release()
def retrieveData(name) : smds.msg("Requesting <%s> from parent." % name, smds.MSG_DBUG) try : smds.sendMsg(theSocket, smds.messages['INTENS'], name) packet = smds.recvMsg(theSocket) if not packet : theSocket.close() exit("Error reading from parent.") (msg, args) = packet if msg == smds.messages['QUIT'] : quit() if msg != smds.messages['INTENS'] : smds.msg("Unknown message from parent: %s.\nGiving up." % str(msg, args), smds.MSG_WARN) quit() if args[0] != name : smds.msg("Unknown data <%s> from parent. Giving up.", smds.MSG_WARN) quit() except : from traceback import format_exc smds.msg("Error retrieving data <%s> from master." % name, smds.MSG_WARN) print format_exc() exit() return args[1]
def moreWork(t, stint, solo, worktag) : global waiting t.status = smds.Task.Status_Ready t.solo = solo t.rem = stint names = [] if isinstance(t.p, smds.Params.Base_intens) : name = t.p.intens.name if (name not in intensCache) : names.append(name) else : t.p.intens = intensCache[name] if isinstance(t.p, smds.Params.Base_triplet) : name = t.p.cef.name if (name not in intensCache) : names.append(name) else : t.p.cef = intensCache[name] if isinstance(t.p, smds.Params.Base_pot) : name = t.p.pot.name if (name not in intensCache) : names.append(name) else : t.p.pot = intensCache[name] if len(names) > 0 : smds.sendMsg(parent, smds.messages['INTENS'], names) waiting = (t, stint, solo, worktag) return smds.addToQueue(t, lambda task : returnTask(task, worktag))
def sendWork() : global working, workID avail = [ b for b in bees if b['work'] == None ] while (len(queue) > 0 and len(avail) > 0) : b = avail.pop() (i, args) = queue.pop() working += 1 workID += 1 b['work'] = (i, args, workID) if not smds.sendMsg(b['socket'], smds.messages['WORK'], workID, theFunc, args) : remoteDead(b) else : smds.msg("Sent %s%s <%d> to %s (%d remaining in queue)" % (theFunc, str(args), workID, b['host'], len(queue)), smds.MSG_DBUG)
def checkTime() : global lastEtoc etoc = smds.Engine.etoc() - time() if etoc <= 0 and lastEtoc > 0 : lastEtoc = 0 return if etoc > 0 and abs(smds.Engine.etoc()-lastEtoc) > 90.0 : smds.msg("Notifying master of etoc %f" % etoc, smds.MSG_DBUG) if not smds.sendMsg(parent, smds.messages['ETOC'], etoc, smds.Engine.rate()) : mx.release() smds.msg("Error writing to master.", smds.MSG_WARN) parent.close() smds.shutdown() exit() lastEtoc = smds.Engine.etoc()
def touchHosts() : global waiting for host in waiting.keys() : if time()-waiting[host] > 60.0 : del waiting[host] hosts.append(host) for host in [ x for x in hosts ] : s = socket.socket() s.settimeout(2.0) try : s.connect((host, smds.Foundry.FOUNDRY_PORT)) except : pass else : if smds.sendMsg(s, smds.messages['CALC'], socket.gethostname(), myPort) : waiting[host] = time() hosts.remove(host) s.close()
def fetchIntens(b, names) : global intensCompressedCache intens = [] for name in names : if name not in intensCache : smds.msg("Unknown intens request [%s] from %s." % (name, b['host']), smds.MSG_WARN) return smds.msg("Intens [%s] request by %s" % (name, b['host'])) if name not in intensCompressedCache : from cPickle import dumps from zlib import compress from exceptions import MemoryError try : intensCompressedCache[name] = compress(dumps(intensCache[name],2)) except MemoryError : intensCompressedCache = {} intensCompressedCache[name] = compress(dumps(intensCache[name],2)) intens.append(intensCompressedCache[name]) if not smds.sendMsg(b['socket'], smds.messages['INTENS'], names, intens) : remoteDead(b)
def initialize() : global theFoundry, theSocket, myPort, funcID, workID, initialized from os import getpid from errno import EADDRINUSE from atexit import register from signal import signal, SIGTERM from sys import exit from random import randint register(shutdown) signal(SIGTERM, lambda x,y : exit() ) try : from signal import SIGBREAK signal(SIGBREAK, lambda x,y : exit()) except : pass funcID = randint(0, 1000000) workID = randint(0, 1000) initialized = True if theFoundry == None : # Attempt to connect to local foundary s = socket.socket() s.connect(('localhost', smds.Foundry.FOUNDRY_PORT)) if not smds.sendMsg(s, smds.messages['WORK'], getpid()) : raise IOError, "Could not register with local Foundry." theFoundry = s p = 8740 while myPort == None : try : s = socket.socket() s.bind(('', p)) except socket.error, (errno, errstr) : if errno == EADDRINUSE : p += 1 else : raise else : myPort = p s.setblocking(0) s.listen(30) theSocket = s smds.msg("smds.Calc initialized.", smds.MSG_DBUG)
def touchHosts() : global waiting, seed, workWaitTime r = False for host in waiting.keys() : if time()-waiting[host] > 60.0 : del waiting[host] hosts.append(host) for host in [ x for x in hosts ] : s = socket.socket() s.settimeout(2.0) try : s.connect((host, FOUNDRY_PORT)) except : pass else : seed += 1 if smds.sendMsg(s, smds.messages['START'], socket.gethostname(), myPort, seed) : waiting[host] = time() hosts.remove(host) workWaitTime = time()+10.0 r = True s.close() return r