Example #1
0
 def run(self, serverState, request, response):
     cmdID=request.getParam('cmd_id')
     if serverState.getLocalAssets().removeAsset(cmdID):
         response.add("Local asset with cmdID=%s removed successfully."%
                      cmdID)
     else:
         response.add("Local asset with cmdID=%s NOT removed successfully."%
                      cmdID, status="ERROR")
     log.info("Cleared asset %s"%(cmdID))
Example #2
0
 def run(self, serverState, request, response):
     cmdID = request.getParam('cmd_id')
     if serverState.getLocalAssets().removeAsset(cmdID):
         response.add("Local asset with cmdID=%s removed successfully." %
                      cmdID)
     else:
         response.add(
             "Local asset with cmdID=%s NOT removed successfully." % cmdID,
             status="ERROR")
     log.info("Cleared asset %s" % (cmdID))
Example #3
0
def serveHTTPSWithNoCertReq(serverState):
    try:
        httpd = HTTPSServerNoCertAuthentication(request_handler.handlerForRequestWithNoCertReq, ServerConf(), serverState)
        sa = httpd.socket.getsockname()
        log.info("Serving HTTPS for client communication on %s port %s..."%(sa[0], sa[1]))
        httpd.serve_forever()

    except KeyboardInterrupt:
        print "Interrupted"
        serverState.doQuit()
    except Exception:
        #TODO better error handling of server errors during startup
        print "HTTPS port %s already taken"%ServerConf().getClientSecurePort()
        serverState.doQuit()
Example #4
0
 def run(self, serverState, request, response):
     cmdID=request.getParam('cmd_id')
     assetType=request.getParam('asset_type')
     try:
         runfile=serverState.getLocalAssets().getAsset(cmdID, 
                                                       assetType).getData()
     except:
         log.error("Local asset cmdid=%s not found!"%cmdID)
         response.add("Command output data from cmdID %s not found on this server (%s)."%
                      (cmdID,serverState.conf.getHostName()), 
                      status="ERROR")
     else:
         asset=serverState.getLocalAssets().getCmdOutputAsset(cmdID)
         log.log(cpc.util.log.TRACE,"Local asset cmdid=%s \nproject server=%s"%
                                    (asset.cmdID, asset.projectServer))
         response.setFile(runfile,'application/x-tar')
     log.info("Pulled asset %s/%s"%(cmdID, assetType))
Example #5
0
 def run(self, serverState, request, response):
     workerID=request.getParam('worker_id')
     workerDir=request.getParam('worker_dir')
     iteration=request.getParam('iteration')
     itemsXML=request.getParam('heartbeat_items')
     log.log(cpc.util.log.TRACE, 'items: %s'%itemsXML)
     hwr=cpc.command.heartbeat.HeartbeatItemReader()
     hwr.readString(itemsXML, "worker heartbeat items")
     faultyItems=[]
     Nhandled=len(hwr.getItems())
     ret=serverState.getRunningCmdList().ping(workerID, workerDir, iteration,
                                              hwr.getItems(), False,
                                              faultyItems)
     if len(faultyItems)==0:
         response.add('', data=serverState.conf.getHeartbeatTime())
     else:
         response.add('Heatbeat NOT OK', status="ERROR", data=faultyItems)
     log.info("Handled %d forwarded heartbeat signal items."%(Nhandled))
Example #6
0
def serveHTTPSWithNoCertReq(serverState):
    try:
        httpd = HTTPSServerNoCertAuthentication(
            request_handler.handlerForRequestWithNoCertReq, ServerConf(),
            serverState)
        sa = httpd.socket.getsockname()
        log.info("Serving HTTPS for client communication on %s port %s..." %
                 (sa[0], sa[1]))
        httpd.serve_forever()

    except KeyboardInterrupt:
        print "Interrupted"
        serverState.doQuit()
    except Exception:
        #TODO better error handling of server errors during startup
        print "HTTPS port %s already taken" % ServerConf().getClientSecurePort(
        )
        serverState.doQuit()
Example #7
0
 def run(self, serverState, request, response):
     cmdID = request.getParam('cmd_id')
     assetType = request.getParam('asset_type')
     try:
         runfile = serverState.getLocalAssets().getAsset(
             cmdID, assetType).getData()
     except:
         log.error("Local asset cmdid=%s not found!" % cmdID)
         response.add(
             "Command output data from cmdID %s not found on this server (%s)."
             % (cmdID, serverState.conf.getHostName()),
             status="ERROR")
     else:
         asset = serverState.getLocalAssets().getCmdOutputAsset(cmdID)
         log.log(
             cpc.util.log.TRACE,
             "Local asset cmdid=%s \nproject server=%s" %
             (asset.cmdID, asset.projectServer))
         response.setFile(runfile, 'application/x-tar')
     log.info("Pulled asset %s/%s" % (cmdID, assetType))
Example #8
0
 def run(self, serverState, request, response):
     # TODO: some verification that the request comes from the server that
     # owns the file
     workerDir=request.getParam('worker_dir')
     runDir=request.getParam('run_dir')
     workerDataList=serverState.getWorkerDataList()
     # check the directory and throw an exception if not allowed
     if workerDataList.checkDirectory(workerDir, [runDir]):
         # first check whether we have any of these files
         if os.path.isdir(runDir):
             tff=tempfile.TemporaryFile()
             tf=tarfile.open(fileobj=tff, mode="w:gz")
             tf.add(runDir, arcname=".", recursive=True)
             tf.close()
             del(tf)
             tff.seek(0)
             response.setFile(tff,'application/x-tar')
             request.setFlag('remove', True)
         response.add('Returning data')
         log.info("Fetched data from dead worker")
     else:
         log.info("Did not fetch data from dead worker")
Example #9
0
    def run(self, serverState, request, response):
        # first read platform capabilities and executables
        rdr=cpc.command.platform_exec_reader.PlatformExecutableReader()
        workerData=request.getParam('worker')
        if request.hasParam('worker-id'):
            workerID=request.getParam('worker-id')
        else:
            workerID='(none)'
        log.debug("Worker platform + executables: %s"%workerData)
        rdr.readString(workerData,"Worker-reported platform + executables")
        # match queued commands to executables.
        cwm=CommandWorkerMatcher(rdr.getPlatforms(),
                                 rdr.getExecutableList(),
                                 rdr.getWorkerRequirements())
        cmds=cwm.getWork(serverState.getCmdQueue())
        if not cwm.isDepleted():
            # now sleep for 5 seconds to give the dataflow time to react to any
            # new state.
            time.sleep(5)
            cmds.extend(cwm.getWork(serverState.getCmdQueue()))
        # now check the forwarded variables
        conf=serverState.conf
        originatingServer=None
        heartbeatInterval=None
        try:
            # check whether there is an originating server. If not, we're it
            if self.forwarded:
                if 'originating-server-id' in request.headers:
                    originatingServer = request.headers['originating-server-id']
                # check the expected heartbeat time.
                log.debug("Forwarded message")
                if request.hasParam('heartbeat-interval'):
                    heartbeatInterval = int(request.getParam('heartbeat-interval'))
                    log.debug("Forwarded heartbeat interval is %d"%
                            heartbeatInterval)
        except NameError:
            # self.forwarded does not exist. Treat it as if self.forwarded == False
            pass

        if originatingServer is None:
            # If the originating server property has not been set,  the
            # request hasn't been forwarded, therefore we are the originating
            # server
            selfNode=Node.getSelfNode(conf)
            originatingServer = selfNode.getId()
            # we only store worker state in the server the worker connects to
            serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID,
                                       request.headers['originating-client'])
        if heartbeatInterval is None:
            heartbeatInterval = conf.getHeartbeatTime()
        log.debug("worker identified %s"%request.headers['originating-client'] )

        if len(cmds) > 0:
            # first add them to the running list so they never get lost
            runningCmdList=serverState.getRunningCmdList()
            runningCmdList.add(cmds, originatingServer, heartbeatInterval)
            # construct the tar file with the workloads.
            tff=tempfile.TemporaryFile()
            tf=tarfile.open(fileobj=tff, mode="w:gz")
            # make the commands ready
            for cmd in cmds:
                log.debug("Adding command id %s to tar file."%cmd.id)
                # write the command description to the command's directory
                task=cmd.getTask()
                #log.debug(cmd)
                project=task.getProject()
                taskDir = "task_%s"%task.getID()
                cmddir=cmd.getDir()
                if not os.path.exists(cmddir):
                    log.debug("cmddir %s did not exist. Created directory."%cmd.id)
                    os.mkdir(cmddir)
                arcdir="%s"%(cmd.id)
                log.debug("cmddir=%s"%cmddir)
                outf=open(os.path.join(cmddir, "command.xml"), "w")
                cmd.writeWorkerXML(outf)
                outf.close()
                tf.add(cmddir, arcname=arcdir, recursive=True)
                # set the state of the command.
            tf.close()
            del(tf)
            tff.seek(0)
            # now send it back
            response.setFile(tff,'application/x-tar')
            #project.writeTasks()
            # the file is closed after the response is sent.
            log.info("Did direct worker-ready")
        else:
            nodes = conf.getNodes().getNodesByPriority()

            topology = Nodes()
            if request.hasParam('topology'):
                topology = json.loads(request.getParam('topology')
                                      ,object_hook = json_serializer.fromJson)

            thisNode = Node.getSelfNode(conf)
            thisNode.nodes = conf.getNodes()
            topology.addNode(thisNode)

            hasJob =False # temporary flag that should be removed
            for node in nodes:
                if topology.exists(node.getId()) == False:
                    clnt=ServerMessage(node.getId())

                    clientResponse=clnt.workerReadyForwardedRequest(workerID,
                                        workerData,
                                        topology,
                                        originatingServer,
                                        heartbeatInterval,
                                        request.headers['originating-client'])

                    if clientResponse.getType() == 'application/x-tar':

                        log.log(cpc.util.log.TRACE,
                                'got work from %s'%
                                (clientResponse.headers[
                                     'originating-server-id']))
                        hasJob=True
                        # we need to rewrap the message

                        #TODO stupid intermediary step because the mmap form
                        # clientresponse is prematurely closed
                        tmp = tempfile.TemporaryFile('w+b')

                        message = clientResponse.getRawData()

                        tmp.write(message.read(len(message)))
                        tmp.seek(0)

                        #for key in clientResponse.headers:
                        #    print "%s:%s"%(key,clientResponse.headers[key])

                        response.setFile(tmp,'application/x-tar')
                        response.headers['originating-server-id']=\
                                  clientResponse.headers[
                                      'originating-server-id']
                    #OPTIMIZE leads to a lot of folding and unfolding of
                    #packages
            if not hasJob:
                response.add("No command")
            log.info("Did delegated worker-ready")
Example #10
0
    def run(self, serverState, request, response):
        workerID=request.getParam('worker_id')
        workerDir=request.getParam('worker_dir')
        iteration=request.getParam('iteration')
        itemsXML=request.getParam('heartbeat_items')
        version=0
        if request.hasParam('version'):
            version=int(request.getParam('version'))
        hwr=cpc.command.heartbeat.HeartbeatItemReader()
        hwr.readString(itemsXML, "worker heartbeat items")
        heartbeatItems=hwr.getItems()
        # The worker data list
        workerDataList=serverState.getWorkerDataList()
        haveADir=False
        # Order the heartbeat items by destination server
        destList={}
        Nhandled=0
        for item in heartbeatItems:
            dest=item.getServerName()
            item.checkRunDir()
            if item.getHaveRunDir():
                haveADir=True
            if dest in destList:
                destList[dest].append(item)
            else:
                destList[dest]=[item]
            Nhandled+=1
        if haveADir:
            if iteration!="final":
                workerDataList.add(workerDir)
        if iteration=="final":
            workerDataList.remove(workerDir)
        # get my own name to compare
        selfNode= Node.getSelfNode(serverState.conf)
        selfName = selfNode.getId()

        #updating the status at every hearbeat. This is how we knwo that the worker
        # is still talking to the server
        serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID,
                                   request.headers['originating-client'])
        # now iterate over the destinations, and send them their heartbeat
        # items.
        # Once we have many workers, this would be a place to pool heartbeat
        # items and send them as one big request.
        faultyItems=[]
        for dest, items in destList.iteritems():
            if dest == selfName:
                ret=serverState.getRunningCmdList().ping(workerID, workerDir,
                                                         iteration, items, True,
                                                         faultyItems)
            else:
                msg=ServerMessage(dest)
                co=StringIO()
                co.write('<heartbeat worker_id="%s" worker_server_id="%s">'%
                         (workerID, selfName))
                for item in items:
                    item.writeXML(co)
                co.write('</heartbeat>')
                resp = msg.heartbeatForwardedRequest(workerID, workerDir,
                                                     selfName, iteration,
                                                     co.getvalue())
                presp=ProcessedResponse(resp)
                if presp.getStatus() != "OK":
                    log.info("Heartbeat response from %s not OK"%dest)
                    retitems=presp.getData()
                    for item in retitems:
                        faultyItems.append(item)
        if version > 1:
            retData = { 'heartbeat-time' : serverState.conf.
                                                getHeartbeatTime(),
                        'random-file': workerDataList.getRnd(workerDir) }
        else:
            retData=serverState.conf.getHeartbeatTime()
        if len(faultyItems)==0:
            response.add('', data=retData)
        else:
            if version > 1:
                retData['faulty']=faultyItems
            # TODO: per-workload error reporting
            response.add('Heatbeat NOT OK', status="ERROR", data=retData)
        log.info("Handled %d heartbeat signal items."%(Nhandled))
Example #11
0
 def run(self, serverState, request, response):
     cmdID=request.getParam('cmd_id')
     self.runLocal(serverState, request, response)
     log.info("Run failure reported on %s"%cmdID)
Example #12
0
 def run(self, serverState, request, response):
     cmdID=request.getParam('cmd_id')
     self.runLocal(serverState, request, response)
     log.info("Finished command %s"%cmdID)
Example #13
0
    def runLocal(self, serverState, request, response):
        #self.lock = threading.Lock()
        cmdID=request.getParam('cmd_id')

        selfNode=Node.getSelfNode(serverState.conf)
        selfName = selfNode.getId()

        # get the source server if set. If not set, it means that this server
        # is the worker server.
        if request.hasParam('worker_server'):
            workerServer=request.getParam('worker_server')
        else:
            workerServer=selfName

        # get the destination server if set
        if request.hasParam('project_server'):
            projServer=request.getParam('project_server')
        else:
            # for backward compatibility, we assume that we are the project
            # server if it's forwarded. If not, there's something wrong.
            projServer=selfName
            if not self.forwarded:
                raise CommandFinishError(
                           "no project server set in command finished request.")

        returncode=None
        if request.hasParam('return_code'):
            returncode=int(request.getParam('return_code'))
        cputime=0
        if request.hasParam('used_cpu_time'):
            cputime=float(request.getParam('used_cpu_time'))

        runfile=None
        if request.haveFile('run_data'):
            runfile=request.getFile('run_data')
        elif request.haveFile('rundata'):
            # backward compatibility
            runfile=request.getFile('rundata')

        if projServer != selfName:
            # forward the request using remote assets. Note that the workers
            # usually don't take this path anyway and forward directly to the
            # project server. This might change in the futuure.
            # TODO: some sort of verification  to check whether this was in fact
            #       the client that we sent the command to
            serverState.getLocalAssets().addCmdOutputAsset(cmdID,
                                                           projServer, runfile)
            #forward CommandFinished-signal to project server
            msg=ServerMessage(projServer)
            ret = msg.commandFinishedForwardedRequest(cmdID,
                                                      workerServer,
                                                      projServer,
                                                      returncode,
                                                      cputime,
                                                      runfile is not None)
        else:
            # handle the input locally.
            # get the remote asset if it exists
            if ( workerServer is not None and
                 runfile is None and
                 ( request.hasParam('run_data') and
                   int(request.getParam('run_data'))!=0 ) ):
                #remote asset tracking
                log.info("Pulling asset from %s"%workerServer)
                serverState.getRemoteAssets().addAsset(cmdID, workerServer)
                #for now, get the command data output immediately
                rundata = Tracker.getCommandOutputData(cmdID, workerServer)
                if rundata != None:
                    runfile = rundata.getRawData()
            # now handle the finished command.
            runningCmdList=serverState.getRunningCmdList()
            runningCmdList.handleFinished(cmdID, returncode, cputime, runfile)
Example #14
0
def shutdownServer(self):
    log.info("shutdown complete")
Example #15
0
def shutdownServer(self):
    log.info("shutdown complete")