def testCacheObjects(self): node1 = Node("testhost", 8080, 9090) node2 = Node("testhost", 8081, 9091) nodes = Nodes() nodes.addNode(node1) nodes.addNode(node2) Cache().add("network-topology", nodes) cachedNodes = Cache().get("network-topology") self.assertEquals(2, cachedNodes.size())
def run(self, serverState, request, response): conf = ServerConf() sentConnectRequests = conf.getSentNodeConnectRequests() node = json.loads(request.getParam('connectRequest'), object_hook=json_serializer.fromJson) if (sentConnectRequests.exists(node.getId())): nodeToAdd = sentConnectRequests.get(node.getId()) conf.addNode( Node(node.server_id, node.getClientSecurePort(), node.getServerSecurePort(), node.getQualifiedName(), nodeToAdd.getHostname())) #conf.addNode(nodeToAdd) openssl = OpenSSL(conf) openssl.addCa(node.key) sentConnectRequests.removeNode(node.getId()) conf.set('sent_node_connect_requests', sentConnectRequests) # need to send back a status in the data notifying ok response.add('Connection to node %s established' % node.toString()) log.info("Node connection accepted") #add it to the node list else: response.add('No previous node request sent for host %s' % node.toString()) log.info("Node connection not accepted")
def grant(key): #key is server-id conf = ServerConf() nodeConnectRequests = conf.getNodeConnectRequests() if nodeConnectRequests.exists(key): nodeToAdd = nodeConnectRequests.get( key) #this returns a nodeConnectRequest object serv = RawServerMessage(nodeToAdd.getHostname(), nodeToAdd.getClientSecurePort()) #letting the requesting node know that it is accepted #also sending this servers connection parameters resp = serv.addNodeAccepted() conf.addNode( Node(nodeToAdd.server_id, nodeToAdd.getClientSecurePort(), nodeToAdd.getServerSecurePort(), nodeToAdd.getQualifiedName(), nodeToAdd.getHostname())) #trust the key openssl = OpenSSL(conf) openssl.addCa(nodeToAdd.key) nodeConnectRequests.removeNode(nodeToAdd.getId()) conf.set('node_connect_requests', nodeConnectRequests) return True else: return False
def initialize(self,endNodeId): topology=self.getNetworkTopology() if not topology: log.error("Cannot get network topology") return # this is myself: startNode = Node.getSelfNode(self.conf) self.endNode = topology.nodes.get(endNodeId) log.log(cpc.util.log.TRACE,"Finding route between %s(%s %s) and %s(%s " "%s"")"%(startNode.server_id,startNode.getHostname(), startNode.getServerSecurePort(), self.endNode.server_id, self.endNode.getHostname(),self.endNode.getServerSecurePort())) route = Nodes.findRoute(startNode, self.endNode,topology) self.hostNode = route[1] #route[0] is the current host self.host = self.hostNode.getHostname() self.port = self.hostNode.getServerSecurePort() self.serverId = self.hostNode.getId() log.log(cpc.util.log.TRACE,"Server-to-server connecting to %s(%s:%s)"% (self.serverId,self.host,self.port))
def fromJson(jsonObj): if 'class' in jsonObj: if jsonObj['class'] == 'Node': node = Node(jsonObj['server_id'], int(jsonObj['client_secure_port']), int(jsonObj['server_secure_port']), jsonObj['qualified_name'], jsonObj['hostname']) if "nodes" in jsonObj: node.setNodes(jsonObj['nodes']) if "priority" in jsonObj: node.setPriority(jsonObj['priority']) if "workerStates" in jsonObj: node.workerStates = jsonObj['workerStates'] return node if jsonObj['class'] == 'WorkerState': return WorkerState(jsonObj['host'], jsonObj['state'], jsonObj['workerId']) if jsonObj['class'] == 'Nodes': nodes = Nodes() for node in jsonObj['nodes'].itervalues(): nodes.addNode(node) return nodes if jsonObj['class'] == 'NodeConnectRequest': return NodeConnectRequest(jsonObj['server_id'], jsonObj['client_secure_port'], jsonObj['server_secure_port'], jsonObj['key'], jsonObj['qualified_name'], jsonObj['hostname']) return jsonObj
def broadcast(self, fields, files=[], headers=dict()): topology = ServerToServerMessage.getNetworkTopology() if not topology: log.error("Cannot find network topology.") return # we dont want to broadcast to ourself node = Node.getSelfNode(ServerConf()) topology.removeNode(node.getId()) for node in topology.nodes.itervalues(): self._sendMessage(node, fields, files, headers)
def broadcast(self,fields,files = [],headers=dict()): topology = ServerToServerMessage.getNetworkTopology() if not topology: log.error("Cannot find network topology.") return #we dont want to broadcast to ourself node = Node.getSelfNode(ServerConf()) topology.removeNode(node.getId()) for node in topology.nodes.itervalues(): self._sendMessage(node,fields,files,headers)
def requestNetworkTopology(topology,serverState=None): """ Asks each neigbouring node for their network topology inputs: topology:Nodes The list of the topology generated so far serverState:ServerState if provided worker states are fetched. since this method is called by getNetworkTopology() which in turn is called from places where we do not pass (and don't want) the serverState we provide this option. Also it is not needed as the calling server always knows the most up to date state of its own workers. """ conf = ServerConf() thisNode = Node.getSelfNode(conf) thisNode.setNodes(conf.getNodes()) topology.addNode(thisNode) if serverState: thisNode.workerStates = WorkerStateHandler.getConnectedWorkers(serverState.getWorkerStates()) for node in thisNode.getNodes().nodes.itervalues(): if topology.exists(node.getId()) == False: #connect to correct node if node.isConnected(): try: clnt = DirectServerMessage(node,conf=conf) #send along the current topology rawresp = clnt.networkTopology(topology) processedResponse = ProcessedResponse(rawresp) topology = processedResponse.getData() except ServerConnectionError as e: #we cannot connect to the node, # and its marked as unreachable #we must still add it to the topology log.error("node %s unreachable when asking for network " "topology: error was %s"%(node.getId(),e.__str__())) topology.addNode(node) #todo notify in topology that this node is not connected? return topology
def fromJson(jsonObj): if "class" in jsonObj: if jsonObj["class"] == "Node": node = Node( jsonObj["server_id"], int(jsonObj["client_secure_port"]), int(jsonObj["server_secure_port"]), jsonObj["qualified_name"], jsonObj["hostname"], ) if "nodes" in jsonObj: node.setNodes(jsonObj["nodes"]) if "priority" in jsonObj: node.setPriority(jsonObj["priority"]) if "workerStates" in jsonObj: node.workerStates = jsonObj["workerStates"] return node if jsonObj["class"] == "WorkerState": return WorkerState(jsonObj["host"], jsonObj["state"], jsonObj["workerId"]) if jsonObj["class"] == "Nodes": nodes = Nodes() for node in jsonObj["nodes"].itervalues(): nodes.addNode(node) return nodes if jsonObj["class"] == "NodeConnectRequest": return NodeConnectRequest( jsonObj["server_id"], jsonObj["client_secure_port"], jsonObj["server_secure_port"], jsonObj["key"], jsonObj["qualified_name"], jsonObj["hostname"], ) return jsonObj
def run(self, serverState, request, response): # first read platform capabilities and executables rdr=cpc.command.platform_exec_reader.PlatformExecutableReader() workerData=request.getParam('worker') if request.hasParam('worker-id'): workerID=request.getParam('worker-id') else: workerID='(none)' log.debug("Worker platform + executables: %s"%workerData) rdr.readString(workerData,"Worker-reported platform + executables") # match queued commands to executables. cwm=CommandWorkerMatcher(rdr.getPlatforms(), rdr.getExecutableList(), rdr.getWorkerRequirements()) cmds=cwm.getWork(serverState.getCmdQueue()) if not cwm.isDepleted(): # now sleep for 5 seconds to give the dataflow time to react to any # new state. time.sleep(5) cmds.extend(cwm.getWork(serverState.getCmdQueue())) # now check the forwarded variables conf=serverState.conf originatingServer=None heartbeatInterval=None try: # check whether there is an originating server. If not, we're it if self.forwarded: if 'originating-server-id' in request.headers: originatingServer = request.headers['originating-server-id'] # check the expected heartbeat time. log.debug("Forwarded message") if request.hasParam('heartbeat-interval'): heartbeatInterval = int(request.getParam('heartbeat-interval')) log.debug("Forwarded heartbeat interval is %d"% heartbeatInterval) except NameError: # self.forwarded does not exist. Treat it as if self.forwarded == False pass if originatingServer is None: # If the originating server property has not been set, the # request hasn't been forwarded, therefore we are the originating # server selfNode=Node.getSelfNode(conf) originatingServer = selfNode.getId() # we only store worker state in the server the worker connects to serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID, request.headers['originating-client']) if heartbeatInterval is None: heartbeatInterval = conf.getHeartbeatTime() log.debug("worker identified %s"%request.headers['originating-client'] ) if len(cmds) > 0: # first add them to the running list so they never get lost runningCmdList=serverState.getRunningCmdList() runningCmdList.add(cmds, originatingServer, heartbeatInterval) # construct the tar file with the workloads. tff=tempfile.TemporaryFile() tf=tarfile.open(fileobj=tff, mode="w:gz") # make the commands ready for cmd in cmds: log.debug("Adding command id %s to tar file."%cmd.id) # write the command description to the command's directory task=cmd.getTask() #log.debug(cmd) project=task.getProject() taskDir = "task_%s"%task.getID() cmddir=cmd.getDir() if not os.path.exists(cmddir): log.debug("cmddir %s did not exist. Created directory."%cmd.id) os.mkdir(cmddir) arcdir="%s"%(cmd.id) log.debug("cmddir=%s"%cmddir) outf=open(os.path.join(cmddir, "command.xml"), "w") cmd.writeWorkerXML(outf) outf.close() tf.add(cmddir, arcname=arcdir, recursive=True) # set the state of the command. tf.close() del(tf) tff.seek(0) # now send it back response.setFile(tff,'application/x-tar') #project.writeTasks() # the file is closed after the response is sent. log.info("Did direct worker-ready") else: nodes = conf.getNodes().getNodesByPriority() topology = Nodes() if request.hasParam('topology'): topology = json.loads(request.getParam('topology') ,object_hook = json_serializer.fromJson) thisNode = Node.getSelfNode(conf) thisNode.nodes = conf.getNodes() topology.addNode(thisNode) hasJob =False # temporary flag that should be removed for node in nodes: if topology.exists(node.getId()) == False: clnt=ServerMessage(node.getId()) clientResponse=clnt.workerReadyForwardedRequest(workerID, workerData, topology, originatingServer, heartbeatInterval, request.headers['originating-client']) if clientResponse.getType() == 'application/x-tar': log.log(cpc.util.log.TRACE, 'got work from %s'% (clientResponse.headers[ 'originating-server-id'])) hasJob=True # we need to rewrap the message #TODO stupid intermediary step because the mmap form # clientresponse is prematurely closed tmp = tempfile.TemporaryFile('w+b') message = clientResponse.getRawData() tmp.write(message.read(len(message))) tmp.seek(0) #for key in clientResponse.headers: # print "%s:%s"%(key,clientResponse.headers[key]) response.setFile(tmp,'application/x-tar') response.headers['originating-server-id']=\ clientResponse.headers[ 'originating-server-id'] #OPTIMIZE leads to a lot of folding and unfolding of #packages if not hasJob: response.add("No command") log.info("Did delegated worker-ready")
def run(self, serverState, request, response): workerID=request.getParam('worker_id') workerDir=request.getParam('worker_dir') iteration=request.getParam('iteration') itemsXML=request.getParam('heartbeat_items') version=0 if request.hasParam('version'): version=int(request.getParam('version')) hwr=cpc.command.heartbeat.HeartbeatItemReader() hwr.readString(itemsXML, "worker heartbeat items") heartbeatItems=hwr.getItems() # The worker data list workerDataList=serverState.getWorkerDataList() haveADir=False # Order the heartbeat items by destination server destList={} Nhandled=0 for item in heartbeatItems: dest=item.getServerName() item.checkRunDir() if item.getHaveRunDir(): haveADir=True if dest in destList: destList[dest].append(item) else: destList[dest]=[item] Nhandled+=1 if haveADir: if iteration!="final": workerDataList.add(workerDir) if iteration=="final": workerDataList.remove(workerDir) # get my own name to compare selfNode= Node.getSelfNode(serverState.conf) selfName = selfNode.getId() #updating the status at every hearbeat. This is how we knwo that the worker # is still talking to the server serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID, request.headers['originating-client']) # now iterate over the destinations, and send them their heartbeat # items. # Once we have many workers, this would be a place to pool heartbeat # items and send them as one big request. faultyItems=[] for dest, items in destList.iteritems(): if dest == selfName: ret=serverState.getRunningCmdList().ping(workerID, workerDir, iteration, items, True, faultyItems) else: msg=ServerMessage(dest) co=StringIO() co.write('<heartbeat worker_id="%s" worker_server_id="%s">'% (workerID, selfName)) for item in items: item.writeXML(co) co.write('</heartbeat>') resp = msg.heartbeatForwardedRequest(workerID, workerDir, selfName, iteration, co.getvalue()) presp=ProcessedResponse(resp) if presp.getStatus() != "OK": log.info("Heartbeat response from %s not OK"%dest) retitems=presp.getData() for item in retitems: faultyItems.append(item) if version > 1: retData = { 'heartbeat-time' : serverState.conf. getHeartbeatTime(), 'random-file': workerDataList.getRnd(workerDir) } else: retData=serverState.conf.getHeartbeatTime() if len(faultyItems)==0: response.add('', data=retData) else: if version > 1: retData['faulty']=faultyItems # TODO: per-workload error reporting response.add('Heatbeat NOT OK', status="ERROR", data=retData) log.info("Handled %d heartbeat signal items."%(Nhandled))
def runLocal(self, serverState, request, response): #self.lock = threading.Lock() cmdID=request.getParam('cmd_id') selfNode=Node.getSelfNode(serverState.conf) selfName = selfNode.getId() # get the source server if set. If not set, it means that this server # is the worker server. if request.hasParam('worker_server'): workerServer=request.getParam('worker_server') else: workerServer=selfName # get the destination server if set if request.hasParam('project_server'): projServer=request.getParam('project_server') else: # for backward compatibility, we assume that we are the project # server if it's forwarded. If not, there's something wrong. projServer=selfName if not self.forwarded: raise CommandFinishError( "no project server set in command finished request.") returncode=None if request.hasParam('return_code'): returncode=int(request.getParam('return_code')) cputime=0 if request.hasParam('used_cpu_time'): cputime=float(request.getParam('used_cpu_time')) runfile=None if request.haveFile('run_data'): runfile=request.getFile('run_data') elif request.haveFile('rundata'): # backward compatibility runfile=request.getFile('rundata') if projServer != selfName: # forward the request using remote assets. Note that the workers # usually don't take this path anyway and forward directly to the # project server. This might change in the futuure. # TODO: some sort of verification to check whether this was in fact # the client that we sent the command to serverState.getLocalAssets().addCmdOutputAsset(cmdID, projServer, runfile) #forward CommandFinished-signal to project server msg=ServerMessage(projServer) ret = msg.commandFinishedForwardedRequest(cmdID, workerServer, projServer, returncode, cputime, runfile is not None) else: # handle the input locally. # get the remote asset if it exists if ( workerServer is not None and runfile is None and ( request.hasParam('run_data') and int(request.getParam('run_data'))!=0 ) ): #remote asset tracking log.info("Pulling asset from %s"%workerServer) serverState.getRemoteAssets().addAsset(cmdID, workerServer) #for now, get the command data output immediately rundata = Tracker.getCommandOutputData(cmdID, workerServer) if rundata != None: runfile = rundata.getRawData() # now handle the finished command. runningCmdList=serverState.getRunningCmdList() runningCmdList.handleFinished(cmdID, returncode, cputime, runfile)
def __init__(self, server_id, client_secure_port, server_secure_port, key, qualified_name, hostname): Node.__init__(self, server_id, client_secure_port, server_secure_port, qualified_name, hostname) self.key = key #the public key of the server