Example #1
0
 def getCommandOutputData(cmdID, workerServer):
     log.log(cpc.util.log.TRACE,"Trying to pull command output from %s"%
             workerServer)
     s2smsg=ServerMessage(workerServer)  
     rundata_response = s2smsg.pullAssetRequest(cmdID, Asset.cmdOutput())
     
     if rundata_response.getType() != "application/x-tar":
         log.error("Incorrect response type: %s, should be %s"%
                   (rundata_response.getType(), 'application/x-tar'))
         if rundata_response.getType() == "text/json":
             errormsg=rundata_response.message.read(len(rundata_response.
                                                        message))
             presp=ProcessedResponse(rundata_response)
             if not presp.isOK():
                 log.error('Response from worker server not OK: %s'%
                           errormsg)
     else:
         s2smsg.clearAssetRequest(cmdID)
         log.log(cpc.util.log.TRACE,
                 "Successfully pulled command output data from %s."%
                 workerServer)
         return rundata_response
         #runfile = rundata_response.getRawData()
         #this doesnt work because the mmap closes as it is returned
     return None
Example #2
0
 def _fetchRemoteRunFiles(self, rc):
     """Get the result files from a remote run directory to a local
         command directory.
         Return true if successful. May throw exception in case of failure"""
     if rc.haveData:
         log.debug("Fetching remote results directory %s to %s"%
                   (rc.runDir, rc.cmd.getDir()))
         # the data is remote: we must fetch data through a
         # server-to-server command.
         msg=ServerMessage(rc.workerServer)
         resp=msg.deadWorkerFetchRequest(rc.workerDir, rc.runDir)
         if resp.getType() == "application/x-tar":
             # untar the return data and  use it.
             runfile=resp.getRawData()
             log.debug("extracting file for %s to dir %s"%
                       (rc.cmd.id,rc.cmd.getDir()))
             cpc.util.file.extractSafely(rc.cmd.getDir(), fileobj=runfile)
             return True
     return False
Example #3
0
 def _fetchRemoteRunFiles(self, rc):
     """Get the result files from a remote run directory to a local
         command directory.
         Return true if successful. May throw exception in case of failure"""
     if rc.haveData:
         log.debug("Fetching remote results directory %s to %s" %
                   (rc.runDir, rc.cmd.getDir()))
         # the data is remote: we must fetch data through a
         # server-to-server command.
         msg = ServerMessage(rc.workerServer)
         resp = msg.deadWorkerFetchRequest(rc.workerDir, rc.runDir)
         if resp.getType() == "application/x-tar":
             # untar the return data and  use it.
             runfile = resp.getRawData()
             log.debug("extracting file for %s to dir %s" %
                       (rc.cmd.id, rc.cmd.getDir()))
             cpc.util.file.extractSafely(rc.cmd.getDir(), fileobj=runfile)
             return True
     return False
Example #4
0
    def run(self, serverState, request, response):
        # first read platform capabilities and executables
        rdr=cpc.command.platform_exec_reader.PlatformExecutableReader()
        workerData=request.getParam('worker')
        if request.hasParam('worker-id'):
            workerID=request.getParam('worker-id')
        else:
            workerID='(none)'
        log.debug("Worker platform + executables: %s"%workerData)
        rdr.readString(workerData,"Worker-reported platform + executables")
        # match queued commands to executables.
        cwm=CommandWorkerMatcher(rdr.getPlatforms(),
                                 rdr.getExecutableList(),
                                 rdr.getWorkerRequirements())
        cmds=cwm.getWork(serverState.getCmdQueue())
        if not cwm.isDepleted():
            # now sleep for 5 seconds to give the dataflow time to react to any
            # new state.
            time.sleep(5)
            cmds.extend(cwm.getWork(serverState.getCmdQueue()))
        # now check the forwarded variables
        conf=serverState.conf
        originatingServer=None
        heartbeatInterval=None
        try:
            # check whether there is an originating server. If not, we're it
            if self.forwarded:
                if 'originating-server-id' in request.headers:
                    originatingServer = request.headers['originating-server-id']
                # check the expected heartbeat time.
                log.debug("Forwarded message")
                if request.hasParam('heartbeat-interval'):
                    heartbeatInterval = int(request.getParam('heartbeat-interval'))
                    log.debug("Forwarded heartbeat interval is %d"%
                            heartbeatInterval)
        except NameError:
            # self.forwarded does not exist. Treat it as if self.forwarded == False
            pass

        if originatingServer is None:
            # If the originating server property has not been set,  the
            # request hasn't been forwarded, therefore we are the originating
            # server
            selfNode=Node.getSelfNode(conf)
            originatingServer = selfNode.getId()
            # we only store worker state in the server the worker connects to
            serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID,
                                       request.headers['originating-client'])
        if heartbeatInterval is None:
            heartbeatInterval = conf.getHeartbeatTime()
        log.debug("worker identified %s"%request.headers['originating-client'] )

        if len(cmds) > 0:
            # first add them to the running list so they never get lost
            runningCmdList=serverState.getRunningCmdList()
            runningCmdList.add(cmds, originatingServer, heartbeatInterval)
            # construct the tar file with the workloads.
            tff=tempfile.TemporaryFile()
            tf=tarfile.open(fileobj=tff, mode="w:gz")
            # make the commands ready
            for cmd in cmds:
                log.debug("Adding command id %s to tar file."%cmd.id)
                # write the command description to the command's directory
                task=cmd.getTask()
                #log.debug(cmd)
                project=task.getProject()
                taskDir = "task_%s"%task.getID()
                cmddir=cmd.getDir()
                if not os.path.exists(cmddir):
                    log.debug("cmddir %s did not exist. Created directory."%cmd.id)
                    os.mkdir(cmddir)
                arcdir="%s"%(cmd.id)
                log.debug("cmddir=%s"%cmddir)
                outf=open(os.path.join(cmddir, "command.xml"), "w")
                cmd.writeWorkerXML(outf)
                outf.close()
                tf.add(cmddir, arcname=arcdir, recursive=True)
                # set the state of the command.
            tf.close()
            del(tf)
            tff.seek(0)
            # now send it back
            response.setFile(tff,'application/x-tar')
            #project.writeTasks()
            # the file is closed after the response is sent.
            log.info("Did direct worker-ready")
        else:
            nodes = conf.getNodes().getNodesByPriority()

            topology = Nodes()
            if request.hasParam('topology'):
                topology = json.loads(request.getParam('topology')
                                      ,object_hook = json_serializer.fromJson)

            thisNode = Node.getSelfNode(conf)
            thisNode.nodes = conf.getNodes()
            topology.addNode(thisNode)

            hasJob =False # temporary flag that should be removed
            for node in nodes:
                if topology.exists(node.getId()) == False:
                    clnt=ServerMessage(node.getId())

                    clientResponse=clnt.workerReadyForwardedRequest(workerID,
                                        workerData,
                                        topology,
                                        originatingServer,
                                        heartbeatInterval,
                                        request.headers['originating-client'])

                    if clientResponse.getType() == 'application/x-tar':

                        log.log(cpc.util.log.TRACE,
                                'got work from %s'%
                                (clientResponse.headers[
                                     'originating-server-id']))
                        hasJob=True
                        # we need to rewrap the message

                        #TODO stupid intermediary step because the mmap form
                        # clientresponse is prematurely closed
                        tmp = tempfile.TemporaryFile('w+b')

                        message = clientResponse.getRawData()

                        tmp.write(message.read(len(message)))
                        tmp.seek(0)

                        #for key in clientResponse.headers:
                        #    print "%s:%s"%(key,clientResponse.headers[key])

                        response.setFile(tmp,'application/x-tar')
                        response.headers['originating-server-id']=\
                                  clientResponse.headers[
                                      'originating-server-id']
                    #OPTIMIZE leads to a lot of folding and unfolding of
                    #packages
            if not hasJob:
                response.add("No command")
            log.info("Did delegated worker-ready")
Example #5
0
    def run(self, serverState, request, response):
        workerID=request.getParam('worker_id')
        workerDir=request.getParam('worker_dir')
        iteration=request.getParam('iteration')
        itemsXML=request.getParam('heartbeat_items')
        version=0
        if request.hasParam('version'):
            version=int(request.getParam('version'))
        hwr=cpc.command.heartbeat.HeartbeatItemReader()
        hwr.readString(itemsXML, "worker heartbeat items")
        heartbeatItems=hwr.getItems()
        # The worker data list
        workerDataList=serverState.getWorkerDataList()
        haveADir=False
        # Order the heartbeat items by destination server
        destList={}
        Nhandled=0
        for item in heartbeatItems:
            dest=item.getServerName()
            item.checkRunDir()
            if item.getHaveRunDir():
                haveADir=True
            if dest in destList:
                destList[dest].append(item)
            else:
                destList[dest]=[item]
            Nhandled+=1
        if haveADir:
            if iteration!="final":
                workerDataList.add(workerDir)
        if iteration=="final":
            workerDataList.remove(workerDir)
        # get my own name to compare
        selfNode= Node.getSelfNode(serverState.conf)
        selfName = selfNode.getId()

        #updating the status at every hearbeat. This is how we knwo that the worker
        # is still talking to the server
        serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID,
                                   request.headers['originating-client'])
        # now iterate over the destinations, and send them their heartbeat
        # items.
        # Once we have many workers, this would be a place to pool heartbeat
        # items and send them as one big request.
        faultyItems=[]
        for dest, items in destList.iteritems():
            if dest == selfName:
                ret=serverState.getRunningCmdList().ping(workerID, workerDir,
                                                         iteration, items, True,
                                                         faultyItems)
            else:
                msg=ServerMessage(dest)
                co=StringIO()
                co.write('<heartbeat worker_id="%s" worker_server_id="%s">'%
                         (workerID, selfName))
                for item in items:
                    item.writeXML(co)
                co.write('</heartbeat>')
                resp = msg.heartbeatForwardedRequest(workerID, workerDir,
                                                     selfName, iteration,
                                                     co.getvalue())
                presp=ProcessedResponse(resp)
                if presp.getStatus() != "OK":
                    log.info("Heartbeat response from %s not OK"%dest)
                    retitems=presp.getData()
                    for item in retitems:
                        faultyItems.append(item)
        if version > 1:
            retData = { 'heartbeat-time' : serverState.conf.
                                                getHeartbeatTime(),
                        'random-file': workerDataList.getRnd(workerDir) }
        else:
            retData=serverState.conf.getHeartbeatTime()
        if len(faultyItems)==0:
            response.add('', data=retData)
        else:
            if version > 1:
                retData['faulty']=faultyItems
            # TODO: per-workload error reporting
            response.add('Heatbeat NOT OK', status="ERROR", data=retData)
        log.info("Handled %d heartbeat signal items."%(Nhandled))
Example #6
0
    def runLocal(self, serverState, request, response):
        #self.lock = threading.Lock()
        cmdID=request.getParam('cmd_id')

        selfNode=Node.getSelfNode(serverState.conf)
        selfName = selfNode.getId()

        # get the source server if set. If not set, it means that this server
        # is the worker server.
        if request.hasParam('worker_server'):
            workerServer=request.getParam('worker_server')
        else:
            workerServer=selfName

        # get the destination server if set
        if request.hasParam('project_server'):
            projServer=request.getParam('project_server')
        else:
            # for backward compatibility, we assume that we are the project
            # server if it's forwarded. If not, there's something wrong.
            projServer=selfName
            if not self.forwarded:
                raise CommandFinishError(
                           "no project server set in command finished request.")

        returncode=None
        if request.hasParam('return_code'):
            returncode=int(request.getParam('return_code'))
        cputime=0
        if request.hasParam('used_cpu_time'):
            cputime=float(request.getParam('used_cpu_time'))

        runfile=None
        if request.haveFile('run_data'):
            runfile=request.getFile('run_data')
        elif request.haveFile('rundata'):
            # backward compatibility
            runfile=request.getFile('rundata')

        if projServer != selfName:
            # forward the request using remote assets. Note that the workers
            # usually don't take this path anyway and forward directly to the
            # project server. This might change in the futuure.
            # TODO: some sort of verification  to check whether this was in fact
            #       the client that we sent the command to
            serverState.getLocalAssets().addCmdOutputAsset(cmdID,
                                                           projServer, runfile)
            #forward CommandFinished-signal to project server
            msg=ServerMessage(projServer)
            ret = msg.commandFinishedForwardedRequest(cmdID,
                                                      workerServer,
                                                      projServer,
                                                      returncode,
                                                      cputime,
                                                      runfile is not None)
        else:
            # handle the input locally.
            # get the remote asset if it exists
            if ( workerServer is not None and
                 runfile is None and
                 ( request.hasParam('run_data') and
                   int(request.getParam('run_data'))!=0 ) ):
                #remote asset tracking
                log.info("Pulling asset from %s"%workerServer)
                serverState.getRemoteAssets().addAsset(cmdID, workerServer)
                #for now, get the command data output immediately
                rundata = Tracker.getCommandOutputData(cmdID, workerServer)
                if rundata != None:
                    runfile = rundata.getRawData()
            # now handle the finished command.
            runningCmdList=serverState.getRunningCmdList()
            runningCmdList.handleFinished(cmdID, returncode, cputime, runfile)