def addToQueue(task, callback) : """addToQueue(task, callback)\nAdds the given task to the work queue. callback is the function accepting a single argument (the task) to be called when the Task is complete. Note that the task's status may indicate an error occured when the callback is executed. """ global callbacks, unassigned if not isinstance(task, smds.Task.Task) : raise TypeError, \ "Only objects of type smds.Task.Task can be added to the queue." if not callable(callback) : raise TypeError, "Callback is not callable in Dispatch.addToQueue." if (task.status == smds.Task.Status_Done) : callback(task) return if (task.status != smds.Task.Status_Ready) : raise ValueError, "Task <%s> is not ready." % (task.ID,) if smds.theCache and smds.theCache.fetch(task) : callback(task) return mx.acquire() if task in queue or task in out_queue : smds.msg("Task <%s> already on queue." % task.ID) else : smds.msg("Adding task <%s> to queue." % task.ID) task.status = smds.Task.Status_Waiting task.assigned = 0 queue.append(task) callbacks[task] = callback unassigned += task.rem clear.clear() work.notify() mx.release()
def prepIntens(intens, Pad) : from math import ceil, log from numpy import repeat, transpose, reshape from numpy.oldnumeric.fft import fftnd if Pad : dyadicize = lambda N: 2**int(ceil(log(N)/log(2))) name = "__padded_" + intens.name else : dyadicize = lambda N: N+1 name = intens.name if name in intensPrep : return (name, intensPrep[name]) smds.msg("Preparing intens <%s>" % name, smds.MSG_DBUG) (Nx, Ny, Nz) = map(lambda x : dyadicize(2*x+1), (intens.numBins_x, intens.numBins_x, intens.numBins_z)) r2 = (transpose(repeat([repeat([range(-Nx/2, Nx/2)], Ny, 0)], Nz, 0), (2, 1, 0))*intens.binWidth_x) ** 2 # X r2 += (transpose(repeat([repeat([range(-Ny/2, Ny/2)], Nx, 0)], Nz, 0), (1, 2, 0))*intens.binWidth_x) ** 2 # Y r2 += (repeat([repeat([range(-Nz/2, Nz/2)], Ny, 0)], Nx, 0) * \ intens.binWidth_z) ** 2 # Z r2 = r2.astype('f') O = pad(intens.getData().astype('f'), (Nx, Ny, Nz)) del intens Ohat = fftnd(O) intensPrep[name] = (O, Ohat, r2) return (name, intensPrep[name])
def removeHost(host) : smds.msg("Host %s down" % host, smds.MSG_DBUG) for b in bees : if b['host'] == host : remoteDead(b) if host in waiting : del waiting[host] if host in hosts : hosts.remove(host)
def theProcessor() : global out_queue mx.acquire() while (1) : if (len(out_queue) == 0 and len(queue) == 0) : clear.set() if (len(out_queue) == 0) : process.wait() if halt.isSet() : mx.release() return t = out_queue[0] out_queue = out_queue[1:] callback = callbacks.pop(t) mx.release() if (t.status == smds.Task.Status_Processing) : for a in t.anal : if a.online : a.finalize() else : a.analyze(t) if smds.theCache : smds.theCache.put(t) try : callback(t) except : smds.msg( "SMDS Engine caught exception executing callback %s for Task <%s>:\n" % (str(callback), t.ID) + format_exc(), smds.MSG_WARN ) mx.acquire()
def addHost(host) : # Look to see if we already know about this one for x in hosts + waiting.keys() + [ b['host'] for b in bees ] : if x == host : return hosts.append(host) smds.msg("Adding host %s" % host, smds.MSG_DBUG)
def addToQueue(task, callback) : """addToQueue(task, callback)\nAdds the given task to the work queue. callback is the function accepting a single argument (the task) to be called when the Task is complete. Note that the task's status may indicate an error occured when the callback is executed. """ if not isinstance(task, smds.Task.Task) : raise TypeError, \ "Only objects of type smds.Task.Task can be added to the queue." if not callable(callback) : raise TypeError, "Callback is not callable in Engine.addToQueue." if (task.status == smds.Task.Status_Done) : callback(task) return if (task.status != smds.Task.Status_Ready) : raise ValueError, "Task <%s> is not ready." % (task.ID,) if smds.theCache and smds.theCache.fetch(task) : callback(task) return smds.msg("Adding task <%s> to queue." % task.ID) task.status = smds.Task.Status_Waiting mx.acquire() queue.append((task, callback)) clear.clear() work.notify() mx.release()
def removeHost(host) : smds.msg("Host %s down" % host, smds.MSG_DBUG) for b in bees : if b['host'] == host : smds.sendMsg(b['socket'], smds.messages['QUIT']) remoteDead(b) if host in waiting : del waiting[host] if host in hosts : hosts.remove(host)
def remoteDead(b) : hosts.append(b['host']) reapWork(b) bees.remove(b) smds.sendMsg(b['socket'], smds.messages['QUIT']) b['socket'].close() smds.msg("Remote %s dead (%d available)." % (b['host'], len(bees)), smds.MSG_DBUG)
def deactivateFoundry(h) : smds.msg("deactivateFoundry(%s)" % h['name']) defunct = [] for m in myMasters : if not smds.sendMsg(m, smds.messages['REMOVE'], h['name']) : defunct.append(m) for m in defunct : masterDead(m)
def registerData(name, data) : """registerData(name, data) Stores data for retrieval by remote processors. Names must be unique. In registered functions, data can be retrived by calling fetchData(name).""" if name in store : raise ValueError, "An object named <%s> already exists." % name smds.msg("Registering data <%s>" % name, smds.MSG_DBUG) store[name] = compress(pDump(data, 2))
def remoteDead(b) : global working smds.msg("Remote %s dead." % b['host'], smds.MSG_DBUG) if b['work'] : queue.append(b['work'][:-1]) working -= 1 bees.remove(b) hosts.append(b['host'])
def releaseWork(etoc) : r = smds.Engine.releaseWork(etoc) if r > 0 : if not smds.sendMsg(parent, smds.messages['RELEASE'], r) : mx.release() smds.msg("Error writing to master.") parent.close() smds.shutdown() exit()
def dataRequest(b, name) : if name not in store : smds.msg("Unnown data request <%s> by %s." % (name, b['host'])) remoteDead(b) else : smds.msg("Data [%s] request by %s" % (name, b['host']), smds.MSG_DBUG) if not smds.sendMsg(b['socket'], smds.messages['INTENS'], name, store[name]) : remoteDead(b)
def workError(b, workID, ret) : global working if not b['work'] or b['work'][2] != workID : smds.msg("Spurious message from " + b['host'], smds.MSG_DBUG) return smds.msg("Error from %s on argument %d:\n%s" % (b['host'], b['work'][0], ret), smds.MSG_WARN) b['work'] = None working -= 1
def __init__(self, fn = '') : self.name = fn try : self.load() self.okay = True except Exception as e: self.okay = False if fn != '' : smds.msg("Error loading intensity profile <%s>!\n%s: %s" % (fn, type(e).__name__, str(e)), smds.MSG_WARN)
def registerFoundry(s, name) : (host, aliases, addrs) = socket.gethostbyaddr(socket.gethostbyname(name)) smds.msg("registerFoundry(%s) as %s" % (name, host)) x = [ h for h in hosts if h['name'] == host ] if len(x) : h = x[0] h['socket'] = s else : h = { 'name' : host, 'socket' : s, 'usable' : False } hosts.append(h) if h['usable'] : activateFoundry(h)
def returnTask(t, worktag) : mx.acquire() try : smds.sendMsg(parent, smds.messages['WORK'], worktag, t.strip(True)) except : mx.release() smds.msg("Error sending work to master.") parent.close() smds.shutdown() exit() else : mx.release() return None
def reportETOC(b, etoc, rate) : if not b['task'] : smds.msg("%s reported a time to completion but isn't working on anything!" % b['host'], smds.MSG_WARN ) return b['etoc'] = time() + etoc b['rate'] = sum(rate) if 'bail' in b : del b['bail'] smds.msg("ETOC for %s at %s (%s bins/s)." % (b['host'], strftime("%H:%M.%S", localtime(b['etoc'])), ', '.join([ "%f" % x for x in rate]) ), smds.MSG_DBUG)
def reapWork(b) : global working, unassigned t = b['task'] if t == None : return t.assigned -= b['stint'] smds.msg("Reaping %d from <%s>, %d/%d assigned, %d completed" % \ (b['stint'], t.ID, t.assigned+t.completed, t.rem+t.completed, t.completed), smds.MSG_DBUG) if t.assigned == 0 : t.status = smds.Task.Status_Waiting unassigned += b['stint'] working -= 1 b['task'] = None
def sendWork() : global queue, working, unassigned if time() < workWaitTime : return False r = False avail = [ b for b in bees if b['task'] == None ] availPower = 0.0 i = 0 for b in avail : if b['rate'] : availPower += b['rate'] ; i += 1 if i == 0 : availPower = float(len(avail)) else : availPower *= float(len(avail))/float(i) if len(avail) : avgPower = availPower / float(len(avail)) i = 0 while (len(avail) > 0 and unassigned > 0) : r = True # Get next bee b = avail.pop() b['etoc'] = 0 if b['rate'] : power = b['rate']/availPower availPower -= b['rate'] else : power = avgPower/availPower availPower -= avgPower # Get next work unit while (queue[i].rem-queue[i].assigned == 0) : i += 1 t = queue[i] if t.p.Unity : stint = t.rem else : stint = int(unassigned * power + 0.5) if stint < 1 : stint = 1 if t.rem-t.assigned-stint <= 10 : stint = t.rem-t.assigned b['solo'] = (stint == t.rem and t.completed == 0 and t.assigned == 0) b['stint'] = stint if smds.sendMsg(b['socket'], smds.messages['WORK'], t.strip(False), stint, b['solo'], worktag) : t.status = smds.Task.Status_Running t.assigned += stint unassigned -= stint b['task'] = t b['worktag'] = worktag working += 1 smds.msg("Sent %d from <%s> to %s, %d/%d assigned; %d in queue." % (stint, t.ID, b['host'], t.assigned+t.completed, t.rem+t.completed, unassigned), smds.MSG_DBUG) else : remoteDead(b) return r
def activateFoundry(h) : mx.acquire() if not h['usable'] : mx.relase() return smds.msg("activateFoundry(%s)" % h['name']) defunct = [] for m in myMasters : if not smds.sendMsg(m, smds.messages['ADD'], h['name']) : defunct.append(m) for m in defunct : masterDead(m) mx.release()
def run() : global theSocket, theThread from atexit import register from sys import exit from signal import signal, SIGTERM register(shutdown) signal(SIGTERM, lambda x,y: exit() ) try : from signal import SIGBREAK signal(SIGBREAK, lambda x,y : exit()) except : pass loadHosts() s = theSocket s.bind(('', FOUNDRY_PORT)) s.listen(5) theThread = threading.Thread(None, bkg, "SMDS Foundry Background Thread") theThread.setDaemon(True) theThread.start() while True: try : (c, addr) = s.accept() except : pass else : # got a new connection c.setblocking(1) packet = smds.recvMsg(c) if packet : (msg, args) = packet if msg == smds.messages['QUIT'] : quit() if msg == 3 or msg == 7 : args = (c,)+args if msg > maxHandler : h = nullHandler args = (msg,)+args else : h = handlers[msg] try : h(*args) except : smds.msg("Bad message from %s" % addr[0], smds.MSG_WARN) from traceback import format_exc print format_exc() if msg == smds.messages['RELEASE'] : quit() else : smds.msg("Bad connection from %s" % addr[0], smds.MSG_WARN) if len(myMasters) : work.set() else : work.clear() if platform != 'win32' : cleanChildren()
def removeHost(name) : global includeMe (host, aliases, addrs) = socket.gethostbyaddr(socket.gethostbyname(name)) smds.msg("removeHost(%s) as %s" % (name, host)) if host == myName : includeMe = False deactivateFoundry({'name':myName}) return for h in hosts : if h['name'] == host : mx.acquire() h['usable'] = False mx.release() deactivateFoundry(h) return
def sendWork() : global working, workID avail = [ b for b in bees if b['work'] == None ] while (len(queue) > 0 and len(avail) > 0) : b = avail.pop() (i, args) = queue.pop() working += 1 workID += 1 b['work'] = (i, args, workID) if not smds.sendMsg(b['socket'], smds.messages['WORK'], workID, theFunc, args) : remoteDead(b) else : smds.msg("Sent %s%s <%d> to %s (%d remaining in queue)" % (theFunc, str(args), workID, b['host'], len(queue)), smds.MSG_DBUG)
def registerFunction(func) : """registerFunction(func) -> distributedFunc Returns the function wrapped for distributed computation. The numpy and math modules are automatically imported, and data stored with registerData() can be retrieved by calling fetchData(name). The distributed function that is returned may be called with a list of tuples that provide the sets of parameters with which to evaluate the function, and it returns a list of the return values for each tuple.""" global funcID if not initialized : initialize() id = '_func'+str(funcID) funcID += 1 smds.msg("Registering function <%s>" % id, smds.MSG_DBUG) store[id] = mDump(func.func_code, 1) return lambda args : execute(id, args)
def checkTime() : global lastEtoc etoc = smds.Engine.etoc() - time() if etoc <= 0 and lastEtoc > 0 : lastEtoc = 0 return if etoc > 0 and abs(smds.Engine.etoc()-lastEtoc) > 90.0 : smds.msg("Notifying master of etoc %f" % etoc, smds.MSG_DBUG) if not smds.sendMsg(parent, smds.messages['ETOC'], etoc, smds.Engine.rate()) : mx.release() smds.msg("Error writing to master.", smds.MSG_WARN) parent.close() smds.shutdown() exit() lastEtoc = smds.Engine.etoc()
def registerMaster(s, id) : global includeMe id = str(id) mx.acquire() smds.msg("registerMaster(%s)" % id) refreshFoundries(True) if includeMe: if not smds.sendMsg(s, smds.messages['ADD'], myName) : masterDead(s, id) return for h in [ x for x in hosts if x['usable'] and x['socket'] ] : if not smds.sendMsg(s, smds.messages['ADD'], h['name']) : masterDead(s, id) return myMasters[s] = id theSocket.settimeout(300.0) mx.release()
def execute(funcID, args) : global queue, working, done, theFunc if not initialized or funcID not in store : raise ValueError, "Unknown function." smds.msg("Executing <%s> with %d parameter sets." % (funcID, len(args)), smds.MSG_DBUG) theFunc = funcID done = [None]*len(args) queue = [ (i,args[i]) for i in range(len(args)) ] while (len(queue) or working) : processMessages() touchHosts() incomingConnections() sendWork() ret = done done = None theFunc = None return ret
def incomingConnections() : go = True while go : try : (c, addr) = theSocket.accept() except : go = False else : r = True c.setblocking(1) (host, aliases, addrs) = socket.gethostbyaddr(addr[0]) if host in waiting : del waiting[host] if host in hosts : hosts.remove(host) if host in [ b['host'] for b in bees ] : smds.sendMsg(c, smds.messages['QUIT']) else : bees.append({ 'host' : host, 'socket' : c, 'work' : None, }) smds.msg("New Bee: %s (%d/%d avail)" % (host, len(bees)-working, len(bees)), smds.MSG_DBUG)
def addHost(name) : global includeMe (host, aliases, addrs) = socket.gethostbyaddr(socket.gethostbyname(name)) smds.msg("addHost(%s) as %s " % (name, host)) if host == myName : includeMe = True return for h in hosts : if h['name'] == host : if ((not h['usable']) and h['socket']) : h['usable'] = True activateFoundry(h) else : h['usable'] = True return hosts.append({ 'name' : host, 'socket' : None, 'usable' : True, })