def _graduateCmd(self, cmd, cluster, fail, custom): # Cluster callback needs to passthrough this object, # so that we know when a cluster is finished, otherwise # we can't mark ourselves as needing work. # Alternatively, "need for work" can be defined by polling # and checking some mix of processing load and queue length. # custom is of the form: #{'filename.nc': ["('http://host:8082/pathname/munged.nc', 1234)"]} # FIXME: don't forget to do the unregistering. if custom: def unbundle(x): props = x[1][0] # Want to do eval(x[1][0]), but it's unsafe. props = props[1:-1].split(', ') # Drop the parens and split. return (x[0], props[0][1:-1], int(props[1])) cmd.actualOutputs = [unbundle(x) for x in custom.items()] log.debug("Remote cmd produced %s" %(str(cmd.actualOutputs))) self.actual.update([(x[0],x[1]) for x in cmd.actualOutputs]) else: cmd.actualOutputs = [] log.warning("Remote cmd produced no outputs") # Do cluster bookkeeping cluster.exec_finishCount += 1 if cluster.exec_finishCount == cluster.exec_criticalCount: # Request deferred discard. cluster.exec_finishFunc() self.runningClusters.discard(cluster) #discard supresses errors. files = chain(*imap(lambda c: c.outputs, cluster.deferred)) self.rpc.discardFiles([x for x in files]) self.finishedClusters.add(cluster) pass
def listenTwisted(self, extInit=lambda : None): from twisted.internet import reactor root = tResource.Resource() tStatic.loadMimeTypes() # load from /etc/mime.types # setup static file paths map(lambda x: root.putChild(x[0],tStatic.File(x[1])), self.staticPaths) # setup exportable interface print "publish",self.soapPath root.putChild(self.soapPath, self._makeWrapper(self.funcExports, "soap", tSoap.SOAPPublisher)) if self.xmlPath: print "publish",self.xmlPath root.putChild(self.xmlPath, self._makeWrapper(self.funcExports, "xmlrpc", tXmlrpc.XMLRPC)) map(lambda x: root.putChild(x[0],x[1]), self.customChildren) # init listening reactor.listenTCP(self.soapPort, tServer.Site(root)) log.debug("Starting SWAMP interface at: %s"% self.url) print "Starting SWAMP interface at: %s"% self.url extInit() subproc.usingTwisted = True reactor.run() pass
def _publishIfOutput(self, obj): """object can be either a logical filename or a command, or a list of either""" if getattr(obj, "actualOutputs", False): # duck-typing actfiles = obj.actualOutputs elif getattr(obj, "outputs", False): log.error("publishifoutput expected cmd, but got %s"%str(obj)) #don't know how to publish. return else: # Remote cmd will have empty actualOutputs # (which evaluates to False) return # I don't need to publish if actualOutputs is empty pass log.debug("raw outs are %s" %(str(actfiles))) files = filter(lambda f: f[0] in self.logOuts, actfiles) log.debug("filtered is %s" %(str(files))) if files and (len(files[0]) > 3): localfiles = imap(itemgetter(3), files) else: localfiles = itertools.repeat(None) #Unmap local files. # map(self.outMap.discardLogical, # imap(itemgetter(0), ifilter(lambda t: t[1], izip(ft,localfiles)))) targetfiles = map(lambda ft: (ft[0], ft[1], self.outMap.mapWriteFile(ft[0])), files) # fork a thread for this in the future. self._publishHelper(izip(targetfiles, localfiles))
def _fetchLogicals(self, logicals, srcs): fetched = [] if len(logicals) == 0: return [] log.info("need fetch for %s from %s" %(str(logicals),str(srcs))) d = dict(srcs) for lf in logicals: self._fetchLock.acquire() if self.filemap.existsForRead(lf): self._fetchLock.release() log.debug("satisfied by other thread") continue start = time.time() self.fetchFile = lf phy = self.filemap.mapWriteFile(lf) if lf not in d: log.error("Missing source for %s" %lf) continue log.debug("fetching %s from %s" % (lf, d[lf])) self._fetchPhysical(phy, d[lf]) fetched.append((lf, phy)) self.fetchFile = None end = time.time() diff = end-start statistics.tracker().logTransfer(d[lf], diff) self._fetchLock.release() return fetched
def discardLogical(self, f): if f in self.private: log.debug( "linked remove"+str(f)) self.private.remove(f) return self.parent.discardLogical(self.pref + f) else: log.debug("tried to discard unmapped file " + f) pass
def assign(lhs, rhs): if len(rhs) == 1: rhs = rhs[0] self.varMap[lhs] = rhs #print "assigning %s = %s" % (lhs,rhs) log.debug("assigning %s = %s" % (lhs,rhs)) return
def newScriptedFlow(self, script, paramList=None): self.tokenLock.acquire() self.token += 1 token = self.token + 0 self.tokenLock.release() #log.info("Received new workflow (%d) {%s}" % (token, script)) log.info("Received new workflow (%d) {%s}" % (token, "")) self._threadedLaunch(script, paramList, token) log.debug("return from thread launch (%d)" % (token)) return token
def _graduateCmd(self, cmdTuple): self.stateLock.acquire() # fix internal structures to be consistent: cmd = cmdTuple[0] cluster = cmdTuple[1] # Update cluster status cluster.exec_finishedCmds.add(cmd) self.finished.add(cmd) ## DEBUG. REMOVE later. for x in chain(cmd.actualOutputs, cmd.rFetchedFiles): self.actual[x[0]] = x[1] if cluster.exec_outputPatch: cmd.actualOutputs = map(lambda t: (t[0], cluster.exec_outputPatch(t[1]), os.stat(t[1]).st_size,t[1]), chain(cmd.actualOutputs, cmd.rFetchedFiles)) log.debug("Local graduation with outputs: %s" %(str(cmd.actualOutputs))) # put children on root queue if ready newready = set() for c in cmd.children: if c not in cluster: # don't dispatch outside my cluster continue ready = reduce(lambda x,y: x and y, map(lambda f: f in self.actual, c.inputs), True) #print "inputs",c.inputs, "availfiles",self.actual.keys(),ready #print "ready?", ready if ready: newready.add(c) if not reduce(lambda a,b: a and b, map(lambda p: p in self.finished, c.parents), True): partialprod = map(lambda f: f in self.actual, c.inputs) print "Are ", c.inputs, "in", self.actual.keys(), partialprod, id(c) print id(c), "----CONFLICT--- files 'ready' but parents not", id(cmd) # Protect enqueuing since threads can race here # (2 parents-> 1 child) self.cmdsEnqueuedLock.acquire() enq = newready.difference(self.cmdsEnqueued) self.cmdsEnqueued.update(enq) self.cmdsEnqueuedLock.release() map(lambda c: self._enqueue(c,cluster), enq) # report results if hasattr(cmd, 'callbackUrl'): self._touchUrl(cmd.callbackUrl[0], cmd.actualOutputs) else: log.debug("deferring callback for cmd %s" % cmd.cmd) if len(cluster.exec_finishedCmds) == len(cluster.cmds): # call cluster graduation. func = self.runningClusters.pop(cluster) func() self.finishedClusters.add(cluster) self.stateLock.release()
def graduate(self, cmd, gradHook, executor, fail, custom): #The dispatcher isn't really in charge of dependency #checking, so it doesn't really need to know when things #are finished. gradHook(cmd, fail, custom) # Service the hook function first (better later?) # this is the executor's hook #print "graduate",cmd.cmd, cmd.argList, "total=",self.count, fail self.count += 1 if fail: origline = ' '.join([cmd.cmd] + map(lambda t: ' '.join(t), cmd.argList) + cmd.leftover) s = "Bad return code %s from cmdline %s %d outs=%s" % ( "", origline, cmd.referenceLineNum, str(cmd.outputs)) log.error(s) # For nicer handling, we should find the original command line # and pass it back as the failing line (+ line number) # It would be nice to trap the stderr for that command, but that # can be done later, since it's a different set of pipes # to connect. self.result = "Error at line %d : %s" %(cmd.referenceLineNum, origline) self.resultEvent.set() return #raise StandardError(s) else: # figure out which one finished, and graduate it. #self.finished[cmd] = code log.debug("graduating %s %d" %(cmd.cmd, cmd.referenceLineNum)) self.finished.add(cmd) #print "New Finished set:", len(self.finished),"\n","\n".join(map(lambda x:x.original,self.finished)) # Are any clusters made ready? # Check this cluster's descendents. For each of them, # see if the all their parent cmds are finished. # For now, don't dispatch a cluster until all its parents # are ready. # If it's a leaf cmd, then publish its results. # Apply reaper logic: should be same as before. # delete consumed files. if self.okayToReap: self._reapCommands([cmd]) e = executor # token is (executor, etoken) map(lambda o: appendList(self.execLocation, o[0], (executor,o[1])), cmd.actualOutputs) self.gradHook(cmd) if self.idle(): self._cleanupExecs() self.result = True self.resultEvent.set() return
def _touchUrl(self, url, actualOutputs): if isinstance(url, type(lambda : True)): return url(None) try: pkg = dict([(x[0],(x[1],x[2])) for x in actualOutputs]) data = urllib.urlencode(pkg) log.debug("Touching URL: %s" %url) f = urllib2.urlopen(url + "?"+data) f.read() # read result, discard for now except KeyError: return False return True
def postReadFixup(self): if self.execSlaveNodes > 0: urlStr = "slave%dUrl" slotStr = "slave%dSlots" self.slave = [] for i in range(1, self.execSlaveNodes+1): u = self.config.get("exec", urlStr % i) s = int(self.config.get("exec", slotStr % i)) self.slave.append((u, s)) log.debug("Added slave: url=%s slots=%d" %(u,s)) pass pass
def registerWorker(self, certificate, offer): # for now, accept all certificates. log.debug("Received offer from %s with %d slots" %(offer[0],offer[1])) (workerUrl, workerSlots) = (offer[0], offer[1]) result = self.swampInterface.addWorker(workerUrl, workerSlots) token = self._nextWorkerToken self._nextWorkerToken += 1 self._workers[token] = result if not result: log.error("Error registering worker " + url) return None return token
def _bulkGraduate(self, cmds, executor): """Perform barebones graduation of commands to maintain bookkeeping invariants""" self.count += len(cmds) self.finished.update(cmds) log.debug("Bulk graduating: %s" % ( ", ".join(imap(lambda c: c.cmd+str(c.referenceLineNum), cmds)))) if self.okayToReap: self._reapCommands(cmds) map(lambda o: appendList(self.execLocation, o, (executor,o)), chain(*imap(lambda c: c.outputs,cmds))) # Service our hook (our parent scheduler) map(self.gradHook, cmds)
def releaseFiles(self, files): if not files: return log.debug("ready to delete " + str(files)+ "from " + str(self.executors)) # collect by executors map(lambda e: e.discardFilesIfHosted(files), self.executors) try: # cleanup execLocation map(self.execLocation.pop, files) except: print "error execlocationpop",files print "keys:",self.execLocation.keys() pass
def executeSerialAll(self, executor=None): def run(cmd): if executor: tok = executor.launch(cmd) retcode = executor.join(tok) return retcode for c in self.cmdList: ret = run(c) if ret != 0: log.debug( "ret was "+str(ret)) log.error("error running command %s" % (c)) self.result = "ret was %s, error running %s" %(str(ret), c) break
def _publishHelper(self, filetuples): for t in filetuples: log.debug("publish " + str(t)) t0 = t[0] actual = t0[1] target = t0[2] local = t[1] if local: actual = local if (not local) and isRemote(actual): #download, then add to local map (in db?) #log.debug("Download start %s -> %s" % (actual, target)) urllib.urlretrieve(actual, target) #Don't forget to discard. log.debug("Fetch-published "+actual) else: #it's local! # this will break if we request a read on the file # # remove the file from the old mapping (discard) log.debug("start file move") shutil.move(actual, target) # FIXME: ping the new mapper so that it's aware of the file. log.debug("Published " + actual) self._publishedFiles.append((t[0], os.stat(target).st_size)) pass
def run(self): if not self.result(): log.debug("Starting parallel dispatcher") self.scheduler.executeParallelAll(self.remoteExec) else: # refuse to run if we have a failure logged. log.debug("refusing to dispatch: " + str(self.result())) pass self.stat.stop() self.stat.outputFiles(self._publishedFiles) self.stat.inputFiles(map(lambda x: (x, os.stat(self.config.execSourcePath +"/"+x).st_size), self._commandFactory.scriptIns)) self.stat.commandList(self.scheduler.cmdList) self.stat.finish() pass
def read(self): log.info("Reading configfile %s" % (self.filepath)) map(self.config.read, self.filepath) # read all configs in-order. for m in Config.CFGMAP: val = m[3] # preload with default if self.config.has_option(m[1], m[2]): val = self.config.get(m[1],m[2]) if val in Config.REMAP: val = Config.REMAP[val] elif not isinstance(val, type(m[3])): val = type(m[3])(val.split()[0]) # coerce type to match default setattr(self, m[0], val) log.debug( "set config %s to %s"%(m[0], str(val))) pass self.postReadFixup() pass
def parseScript(self, script, factory): """Parse and accept/reject commands in a script, where the script is a single string containing script lines""" self._factory = factory lineCount = 0 for line in script.splitlines(): self._parseScriptLine(factory, line) lineCount += 1 if (lineCount % 500) == 0: log.debug("%d lines parsed" % (lineCount)) #log.debug("factory cmd_By_log_in: " + str(factory.commandByLogicalIn)) #log.debug("factory uselist " + str(factory.scrFileUseCount)) #print "root context has,", self._context assert self._context == self._rootContext #should have popped back to top. self._context.evaluateAll(self) pass
def __init__(self, url, slots): """ url: SOAP url for SWAMP slave slots: max number of running slots RemoteExecutor adapts a remote worker's execution resources so that they may be used by a parallel dispatcher. """ self.url = url self.slots = slots self.rpc = SOAPProxy(url) log.debug("reset slave at %s with %d slots" %(url,slots)) try: self.rpc.reset() except Exception, e: import traceback, sys tb_list = traceback.format_exception(*sys.exc_info()) msg = "".join(tb_list) raise StandardError("can't connect to "+url+str(msg))
def runReadyTask(self): """run the first ready task at the head of the list. precondition: Task list is locked (i.e. condition is acquired) There exists at least one ready job postcondition: Task list is locked (i.e. condition is acquired) One job has been run from the top of the list. """ # move top of readylist to running assert self.ready log.debug("running one task") self.running = self.ready.pop(0) # release lock! self.freeTaskCondition.release() # execute self.running.run() # re-acquire lock to log termination self.freeTaskCondition.acquire() # move running to done. self.done.append(self.running) self.running = None
def _parseScriptLine4(self, line): """Parse a single script line. We will build a parse tree, deferring all evaluation until later. """ self.lineNum += 1 cline = self.stripComments(line) if self.continuation: cline = self.continuation + cline self.continuation = None if cline.endswith("\\"): # handle line continuation self.continuation = cline[:-1] # excise backslash return self._context if not cline: return self._context (r, nextContext) = self._context.addLine((cline, self.lineNum, line)) self._context = nextContext if not r: log.debug("parse error: " + line) return self._context
def _parseScript(self, script): try: log.debug("Starting parse") self.parser.parseScript(script, self._commandFactory) log.debug("Finish parse") self.scheduler.finish() log.debug("finish scheduler prep") self.scrAndLogOuts = self._commandFactory.realOuts() self.logOuts = map(lambda x: x[1], self.scrAndLogOuts) log.debug("outs are " + str(self.scrAndLogOuts)) #self.fail = "Testing: Real operation disabled" #print self.stat._dagGraph(self.scheduler.cmdList) except StandardError, e: self.fail = str(e)
def evaluate(self, evalContext): """ Do "real" statement evaluation (generate command) return: a list of command objects? evalContext: object with variableParser, stdAccept, and handleFunc """ # apply variable handling #print "evaluating:", self._lineNum, self._line line = evalContext.variableParser.apply(self._line) if not isinstance(line, str): return [] command = evalContext.stdAccept((line, self._lineNum)) if isinstance(command, types.InstanceType): command.referenceLineNum = self._lineNum command.original = self._original command.expanded = self._line if not command: log.debug("reject:"+ self._line) elif evalContext.handleFunc is not None: evalContext.handleFunc(command) return [command]
def _launchLocal(self, cmd, locations=[]): if not reduce(lambda a,b: a and b, map(lambda c: c in self.finished, cmd.parents), True): print id(cmd), "was queued, but isn't ready!" # make sure our inputs are ready missing = filter(lambda f: not self.filemap.existsForRead(f), cmd.inputs) cmd.rFetchedFiles = [] if locations: cmd.inputSrcs = locations if missing: fetched = self._fetchLogicals(missing, cmd.inputSrcs) cmd.rFetchedFiles = fetched fetched = self._verifyLogicals(set(cmd.inputs).difference(missing)) cmdLine = cmd.makeCommandLine(self.filemap.mapReadFile, self.filemap.mapWriteFile) #Make room for outputs (shouldn't be needed) self._clearFiles(map(lambda t: t[1], cmd.actualOutputs)) log.debug("Launch %s " %(" ".join(cmdLine))) (code,out) = subproc.call(self.binaryFinder(cmd), cmdLine) cmd.exec_output = out return code
def addWorker(self, url, slots): log.debug("trying to add new worker: %s with %d" %(url,slots)) re = RemoteExecutor(url,slots) self.executor.append(re) assert re in self.executor return re
def selfDestruct(self): log.debug("Task %d is self-destructing" %(str(self.scheduler.taskId)))
def discardFlow(self, token): task = self.jobs[token] task.cleanPhysicals() self.discardedJobs[token] = self.jobs.pop(token) log.debug("discarding for token %d" %(token)) pass
def _failCmd(self, cmdTuple, code): log.debug("Fail %s code: %s" %(cmdTuple[0], str(code))) if hasattr(cmdTuple[0], 'callbackUrl'): self._touchUrl(cmdTuple[0].callbackUrl[1],[]) else: log.debug("Deferring failure: NOT IMPLEMENTED.")
def _discardHosted(self, files): log.debug("req discard of %s on %s" %(str(files), self.url)) map(self.actual.pop, files) self.rpc.discardFiles(files)