def fromJson(jsonObj): if 'class' in jsonObj: if jsonObj['class'] == 'Node': node = Node(jsonObj['server_id'], int(jsonObj['client_secure_port']), int(jsonObj['server_secure_port']), jsonObj['qualified_name'], jsonObj['hostname']) if "nodes" in jsonObj: node.setNodes(jsonObj['nodes']) if "priority" in jsonObj: node.setPriority(jsonObj['priority']) if "workerStates" in jsonObj: node.workerStates = jsonObj['workerStates'] return node if jsonObj['class'] == 'WorkerState': return WorkerState(jsonObj['host'], jsonObj['state'], jsonObj['workerId']) if jsonObj['class'] == 'Nodes': nodes = Nodes() for node in jsonObj['nodes'].itervalues(): nodes.addNode(node) return nodes if jsonObj['class'] == 'NodeConnectRequest': return NodeConnectRequest(jsonObj['server_id'], jsonObj['client_secure_port'], jsonObj['server_secure_port'], jsonObj['key'], jsonObj['qualified_name'], jsonObj['hostname']) return jsonObj
def testCacheObjects(self): node1 = Node("testhost", 8080, 9090) node2 = Node("testhost", 8081, 9091) nodes = Nodes() nodes.addNode(node1) nodes.addNode(node2) Cache().add("network-topology", nodes) cachedNodes = Cache().get("network-topology") self.assertEquals(2, cachedNodes.size())
def run(self, serverState, request, response): """ Used when a server wants to generate a network topology """ topology = Nodes() if request.hasParam('topology'): topology = json.loads(request.getParam('topology'), object_hook=json_serializer.fromJson) topology = ServerToServerMessage.requestNetworkTopology(topology,serverState) response.add("", topology) log.info("Returned network topology size %d" % topology.size())
def run(self, serverState, request, response): """ Used when a server wants to generate a network topology """ topology = Nodes() if request.hasParam('topology'): topology = json.loads(request.getParam('topology'), object_hook=json_serializer.fromJson) topology = ServerToServerMessage.requestNetworkTopology( topology, serverState) response.add("", topology) log.info("Returned network topology size %d" % topology.size())
def initialize(self,endNodeId): topology=self.getNetworkTopology() if not topology: log.error("Cannot get network topology") return # this is myself: startNode = Node.getSelfNode(self.conf) self.endNode = topology.nodes.get(endNodeId) log.log(cpc.util.log.TRACE,"Finding route between %s(%s %s) and %s(%s " "%s"")"%(startNode.server_id,startNode.getHostname(), startNode.getServerSecurePort(), self.endNode.server_id, self.endNode.getHostname(),self.endNode.getServerSecurePort())) route = Nodes.findRoute(startNode, self.endNode,topology) self.hostNode = route[1] #route[0] is the current host self.host = self.hostNode.getHostname() self.port = self.hostNode.getServerSecurePort() self.serverId = self.hostNode.getId() log.log(cpc.util.log.TRACE,"Server-to-server connecting to %s(%s:%s)"% (self.serverId,self.host,self.port))
def fromJson(jsonObj): if "class" in jsonObj: if jsonObj["class"] == "Node": node = Node( jsonObj["server_id"], int(jsonObj["client_secure_port"]), int(jsonObj["server_secure_port"]), jsonObj["qualified_name"], jsonObj["hostname"], ) if "nodes" in jsonObj: node.setNodes(jsonObj["nodes"]) if "priority" in jsonObj: node.setPriority(jsonObj["priority"]) if "workerStates" in jsonObj: node.workerStates = jsonObj["workerStates"] return node if jsonObj["class"] == "WorkerState": return WorkerState(jsonObj["host"], jsonObj["state"], jsonObj["workerId"]) if jsonObj["class"] == "Nodes": nodes = Nodes() for node in jsonObj["nodes"].itervalues(): nodes.addNode(node) return nodes if jsonObj["class"] == "NodeConnectRequest": return NodeConnectRequest( jsonObj["server_id"], jsonObj["client_secure_port"], jsonObj["server_secure_port"], jsonObj["key"], jsonObj["qualified_name"], jsonObj["hostname"], ) return jsonObj
def getNetworkTopology(resetCache = False): """ Used when a server wants to initiate a network topology request Tries to first get the topology from the cache resetCache:boolean calls network topology and resets it to cache """ topology=False if (resetCache == False): topology = NetworkTopologyCache().get() if topology==False: topology = Nodes() topology = ServerToServerMessage.requestNetworkTopology(topology) NetworkTopologyCache().add(topology) return topology
def run(self, serverState, request, response): # first read platform capabilities and executables rdr=cpc.command.platform_exec_reader.PlatformExecutableReader() workerData=request.getParam('worker') if request.hasParam('worker-id'): workerID=request.getParam('worker-id') else: workerID='(none)' log.debug("Worker platform + executables: %s"%workerData) rdr.readString(workerData,"Worker-reported platform + executables") # match queued commands to executables. cwm=CommandWorkerMatcher(rdr.getPlatforms(), rdr.getExecutableList(), rdr.getWorkerRequirements()) cmds=cwm.getWork(serverState.getCmdQueue()) if not cwm.isDepleted(): # now sleep for 5 seconds to give the dataflow time to react to any # new state. time.sleep(5) cmds.extend(cwm.getWork(serverState.getCmdQueue())) # now check the forwarded variables conf=serverState.conf originatingServer=None heartbeatInterval=None try: # check whether there is an originating server. If not, we're it if self.forwarded: if 'originating-server-id' in request.headers: originatingServer = request.headers['originating-server-id'] # check the expected heartbeat time. log.debug("Forwarded message") if request.hasParam('heartbeat-interval'): heartbeatInterval = int(request.getParam('heartbeat-interval')) log.debug("Forwarded heartbeat interval is %d"% heartbeatInterval) except NameError: # self.forwarded does not exist. Treat it as if self.forwarded == False pass if originatingServer is None: # If the originating server property has not been set, the # request hasn't been forwarded, therefore we are the originating # server selfNode=Node.getSelfNode(conf) originatingServer = selfNode.getId() # we only store worker state in the server the worker connects to serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID, request.headers['originating-client']) if heartbeatInterval is None: heartbeatInterval = conf.getHeartbeatTime() log.debug("worker identified %s"%request.headers['originating-client'] ) if len(cmds) > 0: # first add them to the running list so they never get lost runningCmdList=serverState.getRunningCmdList() runningCmdList.add(cmds, originatingServer, heartbeatInterval) # construct the tar file with the workloads. tff=tempfile.TemporaryFile() tf=tarfile.open(fileobj=tff, mode="w:gz") # make the commands ready for cmd in cmds: log.debug("Adding command id %s to tar file."%cmd.id) # write the command description to the command's directory task=cmd.getTask() #log.debug(cmd) project=task.getProject() taskDir = "task_%s"%task.getID() cmddir=cmd.getDir() if not os.path.exists(cmddir): log.debug("cmddir %s did not exist. Created directory."%cmd.id) os.mkdir(cmddir) arcdir="%s"%(cmd.id) log.debug("cmddir=%s"%cmddir) outf=open(os.path.join(cmddir, "command.xml"), "w") cmd.writeWorkerXML(outf) outf.close() tf.add(cmddir, arcname=arcdir, recursive=True) # set the state of the command. tf.close() del(tf) tff.seek(0) # now send it back response.setFile(tff,'application/x-tar') #project.writeTasks() # the file is closed after the response is sent. log.info("Did direct worker-ready") else: nodes = conf.getNodes().getNodesByPriority() topology = Nodes() if request.hasParam('topology'): topology = json.loads(request.getParam('topology') ,object_hook = json_serializer.fromJson) thisNode = Node.getSelfNode(conf) thisNode.nodes = conf.getNodes() topology.addNode(thisNode) hasJob =False # temporary flag that should be removed for node in nodes: if topology.exists(node.getId()) == False: clnt=ServerMessage(node.getId()) clientResponse=clnt.workerReadyForwardedRequest(workerID, workerData, topology, originatingServer, heartbeatInterval, request.headers['originating-client']) if clientResponse.getType() == 'application/x-tar': log.log(cpc.util.log.TRACE, 'got work from %s'% (clientResponse.headers[ 'originating-server-id'])) hasJob=True # we need to rewrap the message #TODO stupid intermediary step because the mmap form # clientresponse is prematurely closed tmp = tempfile.TemporaryFile('w+b') message = clientResponse.getRawData() tmp.write(message.read(len(message))) tmp.seek(0) #for key in clientResponse.headers: # print "%s:%s"%(key,clientResponse.headers[key]) response.setFile(tmp,'application/x-tar') response.headers['originating-server-id']=\ clientResponse.headers[ 'originating-server-id'] #OPTIMIZE leads to a lot of folding and unfolding of #packages if not hasJob: response.add("No command") log.info("Did delegated worker-ready")
def initDefaults(self): conf_base.Conf.initDefaults(self) server_host = '' self._add('server_host', server_host, "Address the server listens on", True) self._add('server_fqdn', socket.getfqdn(), "Manually specified fqdn", True) self._add( 'server_secure_port', Conf.getDefaultServerSecurePort(), "Port number the server uses for communication from servers ", True, None, '\d+') self._add( 'client_secure_port', Conf.getDefaultClientSecurePort(), "Port number the server listens on for communication from clients", True, None, '\d+') self._add('nodes', Nodes(), "List of nodes connected to this server", False) self._add('revoked_nodes', Nodes(), "List of revoked nodes", False) self._add('node_connect_requests', Nodes(), "List of nodes requesting to connect to this server", False) self._add('sent_node_connect_requests', Nodes(), "List of connect requests sent", False) self._add('project_file', "projects.xml", "Projects file name (relative to conf_dir)", relTo='conf_dir') self._add('state_save_interval', 240, "Time in seconds between state saves", True, validation='\d+') self._add( 'import_path', "", "Colon-separated list of directories to search for imports, in addition to cpc/lib, .copernicus/lib and .copernicus/<hostname>/lib", True) self._add('mode', 'prod', "The run mode of the server", True, None, None, ['trace', 'debug', 'prod']) self._add('profiling', 'false', "Profile the server CPU usage using yappi (ver >= 0.82)", True, None, None, ['false', 'true']) # run options self._add('run_dir', None, "Base directory of all files produced by running projects.", True) # log options self._add('log_dir', "log", "Directory containing logs", True, relTo='conf_dir') self._add('server_log_file', "server.log", "The server log file", False, relTo='log_dir') self._add('error_log_file', "error.log", "The error log file", False, relTo='log_dir') # heartbeat options self._add('heartbeat_time', 120, "Time in seconds between heartbeats", True, validation='\d+') self._add('heartbeat_file', "heartbeatlist.xml", "Heartbeat monitor list", False, relTo='conf_dir') # Task exec queue size. If it exceeds this size, the dataflow # propagation blocks. self._add('task_queue_size', 1024, "Dataflow execution task queue size", True, validation='\d+') #static configuration self._add('web_root', 'web', "The directory where html,js and css files are located") # assets self._add( 'local_assets_dir', "local_assets", "Directory containing local assets such as command output files", True, relTo='conf_dir') self._add('server_cores', -1, "Number of cores to use on the server (for OpenMP tasks).", userSettable=True, validation='\d+') self._add( 'num_persistent_connections', 5, "Number of persistent connection to establish for each trusted " "server", userSettable=True) self._add( 'keep_alive_interval', 60, "Keep alive interval of server connections,value is in minutes", userSettable=True) self._add('reconnect_interval', 300, "Interval between trying to reestablish failed connections ," "value is in seconds", userSettable=True) self._add('server_verification',True, "By default servers should always require ssl certificate from both directions" \ "setting this to true will let the sending server to use the client port and disregard" \ "certificate checks. This should only be used in very rare circumstances, for example when debugging" \ "ssl incombatibilites between machines " ,writable=False ,userSettable=False) dn = os.path.dirname(sys.argv[0]) self.execBasedir = '' if dn != "": self.execBasedir = os.path.abspath(dn) self._add('exec_base_dir', self.execBasedir, 'executable base directory') # make child processes inherit our path if os.environ.has_key('PYTHONPATH'): os.environ['PYTHONPATH'] += ":%s" % self.execBasedir else: os.environ['PYTHONPATH'] = self.execBasedir