def _fetchLogicals(self, logicals, srcs): fetched = [] if len(logicals) == 0: return [] log.info("need fetch for %s from %s" %(str(logicals),str(srcs))) d = dict(srcs) for lf in logicals: self._fetchLock.acquire() if self.filemap.existsForRead(lf): self._fetchLock.release() log.debug("satisfied by other thread") continue start = time.time() self.fetchFile = lf phy = self.filemap.mapWriteFile(lf) if lf not in d: log.error("Missing source for %s" %lf) continue log.debug("fetching %s from %s" % (lf, d[lf])) self._fetchPhysical(phy, d[lf]) fetched.append((lf, phy)) self.fetchFile = None end = time.time() diff = end-start statistics.tracker().logTransfer(d[lf], diff) self._fetchLock.release() return fetched
def _launchScript(self, script, paramList, token): self._updateToken(token, thread.get_ident()) # put in a placeholder log.info("Admitting workflow for execution") task = self.swampInterface.submit(script, paramList, self.filemap) log.info("Admitted workflow: workflow id=%s" % task.taskId()) self._updateToken(token, task) return task
def dropWorker(self, executor): if executor in self.executor: log.info("Removing worker " + executor.url ) self.executor.remove(executor) return True else: log.warning("Tried, but couldn't remove " + executor.url) return True
def _actualToPub(self, f): for ((ppath, ipath),pref) in izip(self.publishedPaths, self.exportPrefix): relative = f.split(ipath + os.sep, 1) if len(relative) >= 2: return pref + relative[1] log.info("Got request for %s which is not available"%f) return self.exportPrefix[0]
def newScriptedFlow(self, script, paramList=None): self.tokenLock.acquire() self.token += 1 token = self.token + 0 self.tokenLock.release() #log.info("Received new workflow (%d) {%s}" % (token, script)) log.info("Received new workflow (%d) {%s}" % (token, "")) self._threadedLaunch(script, paramList, token) log.debug("return from thread launch (%d)" % (token)) return token
def mapWriteFile(self, f, altPrefix=None): if altPrefix is not None: pf = altPrefix + f else: pf = self.writePrefix + f if not self.spaceLeft(pf): pf = self.bulkPrefix + f log.info("mapping %s to bulk at %s" %(f,pf)) self.logical[pf] = f self.physical[f] = pf return pf
def read(self): log.info("Reading configfile %s" % (self.filepath)) map(self.config.read, self.filepath) # read all configs in-order. for m in Config.CFGMAP: val = m[3] # preload with default if self.config.has_option(m[1], m[2]): val = self.config.get(m[1],m[2]) if val in Config.REMAP: val = Config.REMAP[val] elif not isinstance(val, type(m[3])): val = type(m[3])(val.split()[0]) # coerce type to match default setattr(self, m[0], val) log.debug( "set config %s to %s"%(m[0], str(val))) pass self.postReadFixup() pass
def finish(self): """Mark as finished, and perform whatever else we need to do to close things down, e.g. calculate durations, flush to disk, etc. """ if not self.finishTime: self.stop() report = [ "flush script " + str(self.runTime) + " seconds", "compute time " + str(self.computeTime) + " seconds", "parse time " + str(self.parseTime) + " seconds", "internal xfer time " + str(self.transferTime) + " seconds", "output size " + str(self.outputSize), "input size " + str(self.inputSize), "intermediate size " + str(self.intermedSize), "overall tree width " + str(self.dagWidth), "local slots " + str(self.task.config.execLocalSlots)] joined = "\n".join(report) log.info(joined) print joined
def testSwampInterface(): from swamp.execution import LocalExecutor from swamp.execution import FakeExecutor #logging.basicConfig(level=logging.DEBUG) wholelist = open("full_resamp.swamp").readlines() test = [ "".join(wholelist[:10]), "".join(wholelist[:6000]), "".join(wholelist), testScript4] c = Config("swamp.conf") c.read() fe = FakeExecutor() le = LocalExecutor.newInstance(c) #si = SwampInterface(fe) si = SwampInterface(c, le) log.info("after configread at " + time.ctime()) #evilly force the interface to use a remote executor assert len(si.remote) > 0 si.executor = si.remote[0] taskid = si.submit(test[1]) log.info("finish at " + time.ctime()) print "submitted with taskid=", taskid
def reset(self): # Clean up trash from before: # - For now, don't worry about checking jobs still in progress # - Delete all the physical files we allocated in the file mapper if self.config.serviceLevel == "production": log.info("refusing to do hard reset: unsafe for production") return assert self.config.serviceLevel in ["debug","testing"] log.info("Reset requested--disabled") #self.fileMapper.cleanPhysicals() log.info("Reset finish")
def submit(self, script, paramList, outputMapper): t = SwampTask(self.executor, self.config, script, outputMapper, self._makeCustomizer(paramList)) log.info("after parse: " + time.ctime()) self.mainThread.acceptTask(t) return t
def _setupLogging(self, config): swamp.setupLog(config.logLocation, config.logLevel) log.info("Swamp master logging at " + config.logLocation)