def run(self, serverState, request, response): workerID=request.getParam('worker_id') workerDir=request.getParam('worker_dir') iteration=request.getParam('iteration') itemsXML=request.getParam('heartbeat_items') version=0 if request.hasParam('version'): version=int(request.getParam('version')) hwr=cpc.command.heartbeat.HeartbeatItemReader() hwr.readString(itemsXML, "worker heartbeat items") heartbeatItems=hwr.getItems() # The worker data list workerDataList=serverState.getWorkerDataList() haveADir=False # Order the heartbeat items by destination server destList={} Nhandled=0 for item in heartbeatItems: dest=item.getServerName() item.checkRunDir() if item.getHaveRunDir(): haveADir=True if dest in destList: destList[dest].append(item) else: destList[dest]=[item] Nhandled+=1 if haveADir: if iteration!="final": workerDataList.add(workerDir) if iteration=="final": workerDataList.remove(workerDir) # get my own name to compare selfNode= Node.getSelfNode(serverState.conf) selfName = selfNode.getId() #updating the status at every hearbeat. This is how we knwo that the worker # is still talking to the server serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID, request.headers['originating-client']) # now iterate over the destinations, and send them their heartbeat # items. # Once we have many workers, this would be a place to pool heartbeat # items and send them as one big request. faultyItems=[] for dest, items in destList.iteritems(): if dest == selfName: ret=serverState.getRunningCmdList().ping(workerID, workerDir, iteration, items, True, faultyItems) else: msg=ServerMessage(dest) co=StringIO() co.write('<heartbeat worker_id="%s" worker_server_id="%s">'% (workerID, selfName)) for item in items: item.writeXML(co) co.write('</heartbeat>') resp = msg.heartbeatForwardedRequest(workerID, workerDir, selfName, iteration, co.getvalue()) presp=ProcessedResponse(resp) if presp.getStatus() != "OK": log.info("Heartbeat response from %s not OK"%dest) retitems=presp.getData() for item in retitems: faultyItems.append(item) if version > 1: retData = { 'heartbeat-time' : serverState.conf. getHeartbeatTime(), 'random-file': workerDataList.getRnd(workerDir) } else: retData=serverState.conf.getHeartbeatTime() if len(faultyItems)==0: response.add('', data=retData) else: if version > 1: retData['faulty']=faultyItems # TODO: per-workload error reporting response.add('Heatbeat NOT OK', status="ERROR", data=retData) log.info("Handled %d heartbeat signal items."%(Nhandled))