Exemple #1
0
 def _sendPing(self, first, last):
     """Do the actual sending"""
     changed=self.cmdsChanged
     self.cmdsChanged=False
     with self.runCondVar:
         # first write the items to xml
         cmds=self.worker._getWorkloads()
         co=StringIO()
         co.write('<heartbeat worker_id="%s">'%self.workerID)
         for item in cmds:
             if item.running:
                 item.hbi.writeXML(co)
                 for subwl in item.joinedTo:
                     subwl.hbi.writeXML(co)
         co.write("</heartbeat>")
     clnt=WorkerMessage()
     resp=clnt.workerHeartbeatRequest(self.workerID, self.workerDir, 
                                      first, last, changed, 
                                      co.getvalue())
     presp=ProcessedResponse(resp)
     if last:
         timestr=" last"
     else:
         timestr="" 
     if first:
         timestr+=" first"
     if changed:
         timestr+=" update"
     log.debug("Sent%s heartbeat signal. Result was %s"%
               (timestr, presp.getStatus()))
     if presp.getStatus() != "OK":
         # if the response was not OK, the upstream server thinks we're 
         # dead and has signaled that to the originating server. We 
         # should just die now.
         faulty=presp.getData()
         log.info("Error from heartbeat request. Stopping %s"%str(faulty))
         #log.error("Got error from heartbeat request. Stopping worker.")
         if ( type(faulty) == type(dict()) and 'faulty' in faulty): 
             for faultyItem in faulty['faulty']:
                 self.worker.killWorkload(faultyItem)
         else:
             pass
             #sys.exit(1)
     respData=presp.getData()
     if type(respData) == type(dict()):
         rettime=int(respData['heartbeat-time'])
         self.randomFile=respData['random-file']
         self._createRandomFile()
     else:
         rettime=int(respData)
     #rettime=int(presp.getData())
     log.debug("Waiting %s seconds for next ping"%(rettime))
     return rettime
Exemple #2
0
 def _sendPing(self, first, last):
     """Do the actual sending"""
     changed = self.cmdsChanged
     self.cmdsChanged = False
     with self.runCondVar:
         # first write the items to xml
         cmds = self.worker._getWorkloads()
         co = StringIO()
         co.write('<heartbeat worker_id="%s">' % self.workerID)
         for item in cmds:
             if item.running:
                 item.hbi.writeXML(co)
                 for subwl in item.joinedTo:
                     subwl.hbi.writeXML(co)
         co.write("</heartbeat>")
     clnt = WorkerMessage()
     resp = clnt.workerHeartbeatRequest(self.workerID, self.workerDir,
                                        first, last, changed, co.getvalue())
     presp = ProcessedResponse(resp)
     if last:
         timestr = " last"
     else:
         timestr = ""
     if first:
         timestr += " first"
     if changed:
         timestr += " update"
     log.debug("Sent%s heartbeat signal. Result was %s" %
               (timestr, presp.getStatus()))
     if presp.getStatus() != "OK":
         # if the response was not OK, the upstream server thinks we're
         # dead and has signaled that to the originating server. We
         # should just die now.
         faulty = presp.getData()
         log.info("Error from heartbeat request. Stopping %s" % str(faulty))
         #log.error("Got error from heartbeat request. Stopping worker.")
         if (type(faulty) == type(dict()) and 'faulty' in faulty):
             for faultyItem in faulty['faulty']:
                 self.worker.killWorkload(faultyItem)
         else:
             pass
             #sys.exit(1)
     respData = presp.getData()
     if type(respData) == type(dict()):
         rettime = int(respData['heartbeat-time'])
         self.randomFile = respData['random-file']
         self._createRandomFile()
     else:
         rettime = int(respData)
     #rettime=int(presp.getData())
     log.debug("Waiting %s seconds for next ping" % (rettime))
     return rettime
Exemple #3
0
    def run(self, serverState, request, response):
        conf = ServerConf()
        host = request.getParam('host')

        client_secure_port = request.getParam('client_secure_port')
        result = dict()
        #do we have a server with this hostname or fqdn?
        connectedNodes = conf.getNodes()

        if (connectedNodes.hostnameOrFQDNExists(host) == False):
            serv = RawServerMessage(host, client_secure_port)
            resp = ProcessedResponse(serv.sendAddNodeRequest(host))

            if resp.isOK():
                result = resp.getData()
                nodeConnectRequest = NodeConnectRequest(result['serverId'],
                    int(client_secure_port),None,None,result['fqdn'],host)

                conf.addSentNodeConnectRequest(nodeConnectRequest)
                result['nodeConnectRequest']=nodeConnectRequest
                log.info("Added node %s" % host)
                response.add('', result)
            else:
                response.add("Remote server said: %s"%resp.getMessage(),
                            status="ERROR")

        else:
            errorMessage = "%s is already trusted" % host
            response.add(errorMessage, status="ERROR")
            log.info(errorMessage)
Exemple #4
0
    def run(self, serverState, request, response):
        conf = ServerConf()
        host = request.getParam('host')

        client_secure_port = request.getParam('client_secure_port')
        result = dict()
        #do we have a server with this hostname or fqdn?
        connectedNodes = conf.getNodes()

        if (connectedNodes.hostnameOrFQDNExists(host) == False):
            serv = RawServerMessage(host, client_secure_port)
            resp = ProcessedResponse(serv.sendAddNodeRequest(host))

            if resp.isOK():
                result = resp.getData()
                nodeConnectRequest = NodeConnectRequest(
                    result['serverId'], int(client_secure_port), None, None,
                    result['fqdn'], host)

                conf.addSentNodeConnectRequest(nodeConnectRequest)
                result['nodeConnectRequest'] = nodeConnectRequest
                log.info("Added node %s" % host)
                response.add('', result)
            else:
                response.add("Remote server said: %s" % resp.getMessage(),
                             status="ERROR")

        else:
            errorMessage = "%s is already trusted" % host
            response.add(errorMessage, status="ERROR")
            log.info(errorMessage)
    def requestNetworkTopology(topology,serverState=None):
        """
        Asks each neigbouring node for their network topology

        inputs:
            topology:Nodes The list of the topology generated so far
            serverState:ServerState
                if provided worker states are fetched.
                since this method is called by getNetworkTopology() which in turn
                is called from places where we do not pass (and don't want) the serverState
                we provide this option. Also it is not needed as the calling server always
                knows the most up to date state of its own workers.

        """
        conf = ServerConf()
        thisNode = Node.getSelfNode(conf)
        thisNode.setNodes(conf.getNodes())
        topology.addNode(thisNode)
        if serverState:
            thisNode.workerStates = WorkerStateHandler.getConnectedWorkers(serverState.getWorkerStates())

        for node in thisNode.getNodes().nodes.itervalues():
            if topology.exists(node.getId()) == False:
                #connect to correct node
                if node.isConnected():
                    try:
                        clnt = DirectServerMessage(node,conf=conf)
                        #send along the current topology
                        rawresp = clnt.networkTopology(topology)
                        processedResponse = ProcessedResponse(rawresp)
                        topology = processedResponse.getData()
                    except ServerConnectionError as e:
                        #we cannot connect to the node,
                        # and its marked as unreachable
                        #we must still add it to the topology
                        log.error("node %s unreachable when asking for network "
                                  "topology: error was %s"%(node.getId(),e.__str__()))
                        topology.addNode(node)

                #todo notify in topology that this node is not connected?
        return topology
Exemple #6
0
    def run(self, serverState, request, response):
        workerID=request.getParam('worker_id')
        workerDir=request.getParam('worker_dir')
        iteration=request.getParam('iteration')
        itemsXML=request.getParam('heartbeat_items')
        version=0
        if request.hasParam('version'):
            version=int(request.getParam('version'))
        hwr=cpc.command.heartbeat.HeartbeatItemReader()
        hwr.readString(itemsXML, "worker heartbeat items")
        heartbeatItems=hwr.getItems()
        # The worker data list
        workerDataList=serverState.getWorkerDataList()
        haveADir=False
        # Order the heartbeat items by destination server
        destList={}
        Nhandled=0
        for item in heartbeatItems:
            dest=item.getServerName()
            item.checkRunDir()
            if item.getHaveRunDir():
                haveADir=True
            if dest in destList:
                destList[dest].append(item)
            else:
                destList[dest]=[item]
            Nhandled+=1
        if haveADir:
            if iteration!="final":
                workerDataList.add(workerDir)
        if iteration=="final":
            workerDataList.remove(workerDir)
        # get my own name to compare
        selfNode= Node.getSelfNode(serverState.conf)
        selfName = selfNode.getId()

        #updating the status at every hearbeat. This is how we knwo that the worker
        # is still talking to the server
        serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID,
                                   request.headers['originating-client'])
        # now iterate over the destinations, and send them their heartbeat
        # items.
        # Once we have many workers, this would be a place to pool heartbeat
        # items and send them as one big request.
        faultyItems=[]
        for dest, items in destList.iteritems():
            if dest == selfName:
                ret=serverState.getRunningCmdList().ping(workerID, workerDir,
                                                         iteration, items, True,
                                                         faultyItems)
            else:
                msg=ServerMessage(dest)
                co=StringIO()
                co.write('<heartbeat worker_id="%s" worker_server_id="%s">'%
                         (workerID, selfName))
                for item in items:
                    item.writeXML(co)
                co.write('</heartbeat>')
                resp = msg.heartbeatForwardedRequest(workerID, workerDir,
                                                     selfName, iteration,
                                                     co.getvalue())
                presp=ProcessedResponse(resp)
                if presp.getStatus() != "OK":
                    log.info("Heartbeat response from %s not OK"%dest)
                    retitems=presp.getData()
                    for item in retitems:
                        faultyItems.append(item)
        if version > 1:
            retData = { 'heartbeat-time' : serverState.conf.
                                                getHeartbeatTime(),
                        'random-file': workerDataList.getRnd(workerDir) }
        else:
            retData=serverState.conf.getHeartbeatTime()
        if len(faultyItems)==0:
            response.add('', data=retData)
        else:
            if version > 1:
                retData['faulty']=faultyItems
            # TODO: per-workload error reporting
            response.add('Heatbeat NOT OK', status="ERROR", data=retData)
        log.info("Handled %d heartbeat signal items."%(Nhandled))