Beispiel #1
0
    def _graduateCmd(self, cmd, cluster, fail, custom):
        # Cluster callback needs to passthrough this object,
        # so that we know when a cluster is finished, otherwise
        # we can't mark ourselves as needing work.
        # Alternatively, "need for work" can be defined by polling
        # and checking some mix of processing load and queue length.

        # custom is of the form: 
        #{'filename.nc': ["('http://host:8082/pathname/munged.nc', 1234)"]}
        # FIXME: don't forget to do the unregistering.
        if custom:
            def unbundle(x):
                props = x[1][0] # Want to do eval(x[1][0]), but it's unsafe.
                props = props[1:-1].split(', ') # Drop the parens and split.
                
                return (x[0], props[0][1:-1], int(props[1]))
            cmd.actualOutputs = [unbundle(x) for x in custom.items()]
            log.debug("Remote cmd produced %s" %(str(cmd.actualOutputs)))
            self.actual.update([(x[0],x[1]) for x in cmd.actualOutputs])
        else:
            cmd.actualOutputs = []
            log.warning("Remote cmd produced no outputs")
        
        # Do cluster bookkeeping    
        cluster.exec_finishCount += 1
        if cluster.exec_finishCount == cluster.exec_criticalCount:
            # Request deferred discard.
            cluster.exec_finishFunc()
            self.runningClusters.discard(cluster) #discard supresses errors.
            files = chain(*imap(lambda c: c.outputs, cluster.deferred))
            self.rpc.discardFiles([x for x in files])

            self.finishedClusters.add(cluster)
        pass
Beispiel #2
0
    def listenTwisted(self, extInit=lambda : None):
        from twisted.internet import reactor
        root = tResource.Resource()
        tStatic.loadMimeTypes() # load from /etc/mime.types

        # setup static file paths
        map(lambda x: root.putChild(x[0],tStatic.File(x[1])),
            self.staticPaths)

        # setup exportable interface
        print "publish",self.soapPath
        root.putChild(self.soapPath, self._makeWrapper(self.funcExports,
                                                      "soap",
                                                      tSoap.SOAPPublisher))
        if self.xmlPath:
            print "publish",self.xmlPath
            root.putChild(self.xmlPath, self._makeWrapper(self.funcExports,
                                                          "xmlrpc",
                                                          tXmlrpc.XMLRPC))
        
        map(lambda x: root.putChild(x[0],x[1]), self.customChildren)

        # init listening
        reactor.listenTCP(self.soapPort, tServer.Site(root))

        log.debug("Starting SWAMP interface at: %s"% self.url)
        print "Starting SWAMP interface at: %s"% self.url
        extInit()
        subproc.usingTwisted = True
        reactor.run()
        pass
Beispiel #3
0
    def _publishIfOutput(self, obj):
        """object can be either a logical filename or a command,
        or a list of either"""
        if getattr(obj, "actualOutputs", False): # duck-typing
            actfiles = obj.actualOutputs
        elif getattr(obj, "outputs", False):  
            log.error("publishifoutput expected cmd, but got %s"%str(obj))
            #don't know how to publish.
            return
        else: 
            # Remote cmd will have empty actualOutputs 
            # (which evaluates to False)
            return # I don't need to publish if actualOutputs is empty
            pass
        log.debug("raw outs are %s" %(str(actfiles)))
        files = filter(lambda f: f[0] in self.logOuts, actfiles)
        log.debug("filtered is %s" %(str(files)))
        if files and (len(files[0]) > 3):
            localfiles = imap(itemgetter(3), files)
        else:
            localfiles = itertools.repeat(None)
        #Unmap local files.
#        map(self.outMap.discardLogical,
 #           imap(itemgetter(0), ifilter(lambda t: t[1], izip(ft,localfiles))))
        targetfiles = map(lambda ft: (ft[0], ft[1],
                                      self.outMap.mapWriteFile(ft[0])),
                          files)        
        # fork a thread for this in the future.
        self._publishHelper(izip(targetfiles, localfiles))
Beispiel #4
0
    def _fetchLogicals(self, logicals, srcs):
        fetched = []
        if len(logicals) == 0:
            return []
        log.info("need fetch for %s from %s" %(str(logicals),str(srcs)))
        d = dict(srcs)
        for lf in logicals:
            self._fetchLock.acquire()
            if self.filemap.existsForRead(lf):
                self._fetchLock.release()
                log.debug("satisfied by other thread")
                continue
            start = time.time()
        
            self.fetchFile = lf
            phy = self.filemap.mapWriteFile(lf)
            if lf not in d:
                log.error("Missing source for %s" %lf)
                continue
            log.debug("fetching %s from %s" % (lf, d[lf]))
            self._fetchPhysical(phy, d[lf])
            fetched.append((lf, phy))
            self.fetchFile = None
            end = time.time()
            diff = end-start
            statistics.tracker().logTransfer(d[lf], diff)
            self._fetchLock.release()

        return fetched
Beispiel #5
0
 def discardLogical(self, f):
     if f in self.private:
         log.debug( "linked remove"+str(f))
         self.private.remove(f)
         return self.parent.discardLogical(self.pref + f)
     else:
         log.debug("tried to discard unmapped file " + f)
     pass
Beispiel #6
0
        def assign(lhs, rhs):
            if len(rhs) == 1:
                rhs = rhs[0]
            self.varMap[lhs] = rhs
            #print "assigning %s = %s" % (lhs,rhs)

            log.debug("assigning %s = %s" % (lhs,rhs))
            return
Beispiel #7
0
 def newScriptedFlow(self, script, paramList=None):
     self.tokenLock.acquire()
     self.token += 1
     token = self.token + 0
     self.tokenLock.release()
     #log.info("Received new workflow (%d) {%s}" % (token, script))
     log.info("Received new workflow (%d) {%s}" % (token, ""))
     self._threadedLaunch(script, paramList, token)
     log.debug("return from thread launch (%d)" % (token))
     return token
Beispiel #8
0
    def _graduateCmd(self, cmdTuple):

        self.stateLock.acquire()
        # fix internal structures to be consistent:
        cmd = cmdTuple[0]
        cluster = cmdTuple[1]
        # Update cluster status
        cluster.exec_finishedCmds.add(cmd)
        self.finished.add(cmd) ## DEBUG. REMOVE later.

        for x in chain(cmd.actualOutputs, cmd.rFetchedFiles):
            self.actual[x[0]] = x[1]
        if cluster.exec_outputPatch:
            cmd.actualOutputs = map(lambda t:
                                    (t[0], cluster.exec_outputPatch(t[1]),
                                            os.stat(t[1]).st_size,t[1]),
                                    chain(cmd.actualOutputs, cmd.rFetchedFiles))
        log.debug("Local graduation with outputs: %s" %(str(cmd.actualOutputs)))
        # put children on root queue if ready
        newready = set()
        for c in cmd.children:
            if c not in cluster: # don't dispatch outside my cluster
                continue
            ready = reduce(lambda x,y: x and y,
                           map(lambda f: f in self.actual, c.inputs),
                           True)
            #print "inputs",c.inputs, "availfiles",self.actual.keys(),ready
            #print "ready?", ready
            if ready:
                newready.add(c)
                
                if not reduce(lambda a,b: a and b, map(lambda p: p in self.finished,  c.parents), True):
                    partialprod =  map(lambda f: f in self.actual, c.inputs)
                    print "Are ", c.inputs, "in", self.actual.keys(), partialprod, id(c)
                    print id(c), "----CONFLICT--- files 'ready' but parents not", id(cmd)

        # Protect enqueuing since threads can race here
        # (2 parents-> 1 child)
        self.cmdsEnqueuedLock.acquire()
        enq = newready.difference(self.cmdsEnqueued)
        self.cmdsEnqueued.update(enq)
        self.cmdsEnqueuedLock.release()
        map(lambda c: self._enqueue(c,cluster), enq)
        
        # report results
        if hasattr(cmd, 'callbackUrl'):
            self._touchUrl(cmd.callbackUrl[0], cmd.actualOutputs)
        else:
            log.debug("deferring callback for cmd %s" % cmd.cmd)
        if len(cluster.exec_finishedCmds) == len(cluster.cmds):
            # call cluster graduation.
            func = self.runningClusters.pop(cluster)
            func()
            self.finishedClusters.add(cluster)
        self.stateLock.release()
Beispiel #9
0
    def graduate(self, cmd, gradHook, executor, fail, custom):
        #The dispatcher isn't really in charge of dependency
        #checking, so it doesn't really need to know when things
        #are finished.
        gradHook(cmd, fail, custom) # Service the hook function first (better later?)
        # this is the executor's hook
        #print "graduate",cmd.cmd, cmd.argList, "total=",self.count, fail
        self.count += 1
        if fail:
            origline = ' '.join([cmd.cmd] + map(lambda t: ' '.join(t), cmd.argList) + cmd.leftover)
            s = "Bad return code %s from cmdline %s %d outs=%s" % (
                "", origline, cmd.referenceLineNum, str(cmd.outputs))
            log.error(s)
            # For nicer handling, we should find the original command line
            # and pass it back as the failing line (+ line number)
            # It would be nice to trap the stderr for that command, but that
            # can be done later, since it's a different set of pipes
            # to connect.

            self.result = "Error at line %d : %s" %(cmd.referenceLineNum,
                                                    origline)
            self.resultEvent.set()
            return
            #raise StandardError(s)
        else:
            # figure out which one finished, and graduate it.
            #self.finished[cmd] = code
            log.debug("graduating %s %d" %(cmd.cmd,
                                           cmd.referenceLineNum))
            self.finished.add(cmd)
            #print "New Finished set:", len(self.finished),"\n","\n".join(map(lambda x:x.original,self.finished))
            # Are any clusters made ready?
            # Check this cluster's descendents.  For each of them,
            # see if the all their parent cmds are finished.
            # For now, don't dispatch a cluster until all its parents
            # are ready.

            # If it's a leaf cmd, then publish its results.
            # Apply reaper logic: should be same as before.

            # delete consumed files.
            if self.okayToReap:
                self._reapCommands([cmd])
            e = executor # token is (executor, etoken)
            map(lambda o: appendList(self.execLocation, o[0], (executor,o[1])),
                cmd.actualOutputs)
            

            self.gradHook(cmd)
            if self.idle():
                self._cleanupExecs()
                self.result = True 
                self.resultEvent.set()
            return
Beispiel #10
0
 def _touchUrl(self, url, actualOutputs):
     if isinstance(url, type(lambda : True)):
         return url(None)
     try:
         pkg = dict([(x[0],(x[1],x[2])) for x in actualOutputs])
         data = urllib.urlencode(pkg)
         log.debug("Touching URL: %s" %url)
         f = urllib2.urlopen(url + "?"+data)
         f.read() # read result, discard for now
     except KeyError:
         return False
     return True
Beispiel #11
0
 def postReadFixup(self):
     if self.execSlaveNodes > 0:
         urlStr = "slave%dUrl"
         slotStr = "slave%dSlots"
         self.slave = []
         for i in range(1, self.execSlaveNodes+1):
             u = self.config.get("exec", urlStr % i)
             s = int(self.config.get("exec", slotStr % i))
             self.slave.append((u, s))
             log.debug("Added slave: url=%s slots=%d" %(u,s))
         pass
     pass
Beispiel #12
0
    def registerWorker(self, certificate, offer):
        # for now, accept all certificates.
        log.debug("Received offer from %s with %d slots" %(offer[0],offer[1]))
        (workerUrl, workerSlots) = (offer[0], offer[1])
        result = self.swampInterface.addWorker(workerUrl, workerSlots)
        token = self._nextWorkerToken
        self._nextWorkerToken += 1
        self._workers[token] = result

        if not result:
            log.error("Error registering worker " + url)
            return None
        return token
Beispiel #13
0
 def _bulkGraduate(self, cmds, executor):
     """Perform barebones graduation of commands to maintain bookkeeping
     invariants"""
     self.count += len(cmds)
     self.finished.update(cmds)
     log.debug("Bulk graduating: %s" % (
         ", ".join(imap(lambda c: c.cmd+str(c.referenceLineNum), cmds))))
     if self.okayToReap:
         self._reapCommands(cmds)
     map(lambda o: appendList(self.execLocation, o, (executor,o)),
         chain(*imap(lambda c: c.outputs,cmds)))
     # Service our hook (our parent scheduler)
     map(self.gradHook, cmds)
Beispiel #14
0
    def releaseFiles(self, files):
        if not files:
            return
        log.debug("ready to delete " + str(files)+ "from " + str(self.executors))
        # collect by executors
        map(lambda e: e.discardFilesIfHosted(files), self.executors)

        try:
            # cleanup execLocation
            map(self.execLocation.pop, files)
        except:
            print "error execlocationpop",files
            print "keys:",self.execLocation.keys()
            pass
Beispiel #15
0
 def executeSerialAll(self, executor=None):
 
     def run(cmd):
         if executor:
             tok = executor.launch(cmd)
             retcode = executor.join(tok)
             return retcode
     for c in self.cmdList:
         ret = run(c)
         if ret != 0:
             log.debug( "ret was "+str(ret))
             log.error("error running command %s" % (c))
             self.result = "ret was %s, error running %s" %(str(ret), c)
             break
Beispiel #16
0
    def _publishHelper(self, filetuples):
        for t in filetuples:
            log.debug("publish " + str(t))
            t0 = t[0]
            actual = t0[1]
            target = t0[2]
            local = t[1]
            if local:
                actual = local
            if (not local) and isRemote(actual):
                #download, then add to local map (in db?)
                #log.debug("Download start %s -> %s" % (actual, target))
                urllib.urlretrieve(actual, target)
                #Don't forget to discard.
                log.debug("Fetch-published "+actual)
            else: #it's local!
                # this will break if we request a read on the file
                #
                # remove the file from the old mapping (discard)
                log.debug("start file move")
                shutil.move(actual, target)
                # FIXME: ping the new mapper so that it's aware of the file.
                log.debug("Published " + actual)
            self._publishedFiles.append((t[0], os.stat(target).st_size))

        pass
Beispiel #17
0
 def run(self):
     if not self.result():
         log.debug("Starting parallel dispatcher")
         self.scheduler.executeParallelAll(self.remoteExec)
     else: # refuse to run if we have a failure logged.
         log.debug("refusing to dispatch: " + str(self.result()))
         pass
     self.stat.stop()
     self.stat.outputFiles(self._publishedFiles)
     self.stat.inputFiles(map(lambda x: (x,
                                         os.stat(self.config.execSourcePath
                                                 +"/"+x).st_size),
                              self._commandFactory.scriptIns))
     self.stat.commandList(self.scheduler.cmdList)
     self.stat.finish()
     pass
Beispiel #18
0
 def read(self):
     log.info("Reading configfile %s" % (self.filepath))
     map(self.config.read, self.filepath) # read all configs in-order.
     for m in Config.CFGMAP:
         val = m[3] # preload with default
         if self.config.has_option(m[1], m[2]):
             val = self.config.get(m[1],m[2])
             if val in Config.REMAP:
                 val = Config.REMAP[val]
             elif not isinstance(val, type(m[3])):
                 val = type(m[3])(val.split()[0]) # coerce type to match default
         setattr(self, m[0], val)
         log.debug( "set config %s to %s"%(m[0], str(val)))
         pass
     self.postReadFixup()
     pass
Beispiel #19
0
  def parseScript(self, script, factory):
      """Parse and accept/reject commands in a script, where the script
      is a single string containing script lines"""
      self._factory = factory
      lineCount = 0
      for line in script.splitlines():
          self._parseScriptLine(factory, line)
          lineCount += 1
          if (lineCount % 500) == 0:
              log.debug("%d lines parsed" % (lineCount))
 
      #log.debug("factory cmd_By_log_in: " + str(factory.commandByLogicalIn))
      #log.debug("factory uselist " + str(factory.scrFileUseCount))
      #print "root context has,", self._context
      assert self._context == self._rootContext #should have popped back to top.
      self._context.evaluateAll(self)
      pass
Beispiel #20
0
    def __init__(self, url, slots):
        """ url: SOAP url for SWAMP slave
            slots: max number of running slots

            RemoteExecutor adapts a remote worker's execution resources
            so that they may be used by a parallel dispatcher.
            """
        self.url = url
        self.slots = slots
        self.rpc = SOAPProxy(url)
        log.debug("reset slave at %s with %d slots" %(url,slots))
        try:
            self.rpc.reset()
        except Exception, e:
            import traceback, sys
            tb_list = traceback.format_exception(*sys.exc_info())
            msg =  "".join(tb_list)
            raise StandardError("can't connect to "+url+str(msg))
Beispiel #21
0
        def runReadyTask(self):
            """run the first ready task at the head of the list.
            precondition:  Task list is locked (i.e. condition is acquired)
                           There exists at least one ready job
            postcondition: Task list is locked (i.e. condition is acquired)
                           One job has been run from the top of the list.
                           """
            # move top of readylist to running
            assert self.ready
            log.debug("running one task")

            self.running = self.ready.pop(0)
            # release lock!
            self.freeTaskCondition.release()
            # execute
            self.running.run()
            # re-acquire lock to log termination
            self.freeTaskCondition.acquire()
            # move running to done.
            self.done.append(self.running)
            self.running = None            
Beispiel #22
0
    def _parseScriptLine4(self, line):
        """Parse a single script line.
        We will build a parse tree, deferring all evaluation until later.
        """

        self.lineNum += 1
        cline = self.stripComments(line)
        if self.continuation:
            cline = self.continuation + cline
            self.continuation = None
        if cline.endswith("\\"): # handle line continuation
            self.continuation = cline[:-1] # excise backslash
            return self._context
        if not cline:
            return self._context
        
        (r, nextContext) = self._context.addLine((cline, self.lineNum, line))
        self._context = nextContext
        if not r:
            log.debug("parse error: " + line) 
        return self._context
Beispiel #23
0
 def _parseScript(self, script):
     try:
         log.debug("Starting parse")
         self.parser.parseScript(script, self._commandFactory)
         log.debug("Finish parse")
         self.scheduler.finish()
         log.debug("finish scheduler prep")
         self.scrAndLogOuts = self._commandFactory.realOuts()
         self.logOuts = map(lambda x: x[1], self.scrAndLogOuts)
         log.debug("outs are " + str(self.scrAndLogOuts))
         #self.fail = "Testing: Real operation disabled"
         #print self.stat._dagGraph(self.scheduler.cmdList)
     except StandardError, e:
         self.fail = str(e)
Beispiel #24
0
        def evaluate(self, evalContext):
            """ Do "real" statement evaluation (generate command)
            return: a list of command objects?
            evalContext: object with variableParser, stdAccept, and handleFunc
            """

            # apply variable handling
            #print "evaluating:", self._lineNum, self._line
            line = evalContext.variableParser.apply(self._line)
            if not isinstance(line, str):
                return []
            command = evalContext.stdAccept((line, self._lineNum))
            if isinstance(command, types.InstanceType):
                command.referenceLineNum = self._lineNum
                command.original = self._original
                command.expanded = self._line
        
            if not command:
                log.debug("reject:"+ self._line)
            elif evalContext.handleFunc is not None:
                evalContext.handleFunc(command)
            return [command]
Beispiel #25
0
 def _launchLocal(self, cmd, locations=[]):
     if not reduce(lambda a,b: a and b, map(lambda c: c in self.finished,  cmd.parents), True):
         print id(cmd), "was queued, but isn't ready!"
     # make sure our inputs are ready
     missing = filter(lambda f: not self.filemap.existsForRead(f),
                      cmd.inputs)
     cmd.rFetchedFiles = []
     if locations:
         cmd.inputSrcs = locations
     if missing:
         fetched = self._fetchLogicals(missing, cmd.inputSrcs)
         cmd.rFetchedFiles = fetched
         fetched = self._verifyLogicals(set(cmd.inputs).difference(missing))
     cmdLine = cmd.makeCommandLine(self.filemap.mapReadFile,
                                   self.filemap.mapWriteFile)
     #Make room for outputs (shouldn't be needed)
     self._clearFiles(map(lambda t: t[1], cmd.actualOutputs))
     log.debug("Launch %s " %(" ".join(cmdLine)))
     (code,out) = subproc.call(self.binaryFinder(cmd), cmdLine)
     cmd.exec_output = out
     
     return code
Beispiel #26
0
 def addWorker(self, url, slots):
     log.debug("trying to add new worker: %s with %d" %(url,slots))
     re = RemoteExecutor(url,slots)
     self.executor.append(re)
     assert re in self.executor
     return re
Beispiel #27
0
 def selfDestruct(self):
     log.debug("Task %d is self-destructing" %(str(self.scheduler.taskId)))
Beispiel #28
0
 def discardFlow(self, token):
     task = self.jobs[token]
     task.cleanPhysicals()
     self.discardedJobs[token] = self.jobs.pop(token)
     log.debug("discarding for token %d" %(token))
     pass
Beispiel #29
0
 def _failCmd(self, cmdTuple, code):
     log.debug("Fail %s code: %s" %(cmdTuple[0], str(code)))
     if hasattr(cmdTuple[0], 'callbackUrl'):
         self._touchUrl(cmdTuple[0].callbackUrl[1],[])
     else:
         log.debug("Deferring failure: NOT IMPLEMENTED.")
Beispiel #30
0
 def _discardHosted(self, files):
     log.debug("req discard of %s on %s" %(str(files), self.url))
     map(self.actual.pop, files)
     self.rpc.discardFiles(files)