Esempio n. 1
0
def fromJson(jsonObj):
    if 'class' in jsonObj:
        if jsonObj['class'] == 'Node':
            node = Node(jsonObj['server_id'],
                        int(jsonObj['client_secure_port']),
                        int(jsonObj['server_secure_port']),
                        jsonObj['qualified_name'], jsonObj['hostname'])
            if "nodes" in jsonObj:
                node.setNodes(jsonObj['nodes'])
            if "priority" in jsonObj:
                node.setPriority(jsonObj['priority'])
            if "workerStates" in jsonObj:
                node.workerStates = jsonObj['workerStates']
            return node

        if jsonObj['class'] == 'WorkerState':
            return WorkerState(jsonObj['host'], jsonObj['state'],
                               jsonObj['workerId'])

        if jsonObj['class'] == 'Nodes':
            nodes = Nodes()
            for node in jsonObj['nodes'].itervalues():
                nodes.addNode(node)
            return nodes

        if jsonObj['class'] == 'NodeConnectRequest':
            return NodeConnectRequest(jsonObj['server_id'],
                                      jsonObj['client_secure_port'],
                                      jsonObj['server_secure_port'],
                                      jsonObj['key'],
                                      jsonObj['qualified_name'],
                                      jsonObj['hostname'])
    return jsonObj
Esempio n. 2
0
    def testCacheObjects(self):
        node1 = Node("testhost", 8080, 9090)
        node2 = Node("testhost", 8081, 9091)

        nodes = Nodes()
        nodes.addNode(node1)
        nodes.addNode(node2)

        Cache().add("network-topology", nodes)
        cachedNodes = Cache().get("network-topology")

        self.assertEquals(2, cachedNodes.size())
Esempio n. 3
0
    def testCacheObjects(self):
        node1 = Node("testhost", 8080, 9090)
        node2 = Node("testhost", 8081, 9091)

        nodes = Nodes()
        nodes.addNode(node1)
        nodes.addNode(node2)

        Cache().add("network-topology", nodes)
        cachedNodes = Cache().get("network-topology")

        self.assertEquals(2, cachedNodes.size())
Esempio n. 4
0
    def run(self, serverState, request, response):
        """
        Used when a server wants to generate a network topology
        """
        topology = Nodes()
        if request.hasParam('topology'):
            topology = json.loads(request.getParam('topology'),
                object_hook=json_serializer.fromJson)

        topology = ServerToServerMessage.requestNetworkTopology(topology,serverState)

        response.add("", topology)
        log.info("Returned network topology size %d" % topology.size())
Esempio n. 5
0
    def run(self, serverState, request, response):
        """
        Used when a server wants to generate a network topology
        """
        topology = Nodes()
        if request.hasParam('topology'):
            topology = json.loads(request.getParam('topology'),
                                  object_hook=json_serializer.fromJson)

        topology = ServerToServerMessage.requestNetworkTopology(
            topology, serverState)

        response.add("", topology)
        log.info("Returned network topology size %d" % topology.size())
    def initialize(self,endNodeId):

        topology=self.getNetworkTopology()

        if not topology:
            log.error("Cannot get network topology")
            return

        # this is myself:
        startNode = Node.getSelfNode(self.conf)

        self.endNode = topology.nodes.get(endNodeId)

        log.log(cpc.util.log.TRACE,"Finding route between %s(%s %s) and %s(%s "
                                   "%s"")"%(startNode.server_id,startNode.getHostname(),
                                    startNode.getServerSecurePort(),
                                    self.endNode.server_id,
                                    self.endNode.getHostname(),self.endNode.getServerSecurePort()))
        route = Nodes.findRoute(startNode, self.endNode,topology)

        self.hostNode = route[1]   #route[0] is the current host
        self.host = self.hostNode.getHostname()
        self.port = self.hostNode.getServerSecurePort()
        self.serverId = self.hostNode.getId()
        log.log(cpc.util.log.TRACE,"Server-to-server connecting to %s(%s:%s)"%
                (self.serverId,self.host,self.port))
Esempio n. 7
0
def fromJson(jsonObj):
    if "class" in jsonObj:
        if jsonObj["class"] == "Node":
            node = Node(
                jsonObj["server_id"],
                int(jsonObj["client_secure_port"]),
                int(jsonObj["server_secure_port"]),
                jsonObj["qualified_name"],
                jsonObj["hostname"],
            )
            if "nodes" in jsonObj:
                node.setNodes(jsonObj["nodes"])
            if "priority" in jsonObj:
                node.setPriority(jsonObj["priority"])
            if "workerStates" in jsonObj:
                node.workerStates = jsonObj["workerStates"]
            return node

        if jsonObj["class"] == "WorkerState":
            return WorkerState(jsonObj["host"], jsonObj["state"], jsonObj["workerId"])

        if jsonObj["class"] == "Nodes":
            nodes = Nodes()
            for node in jsonObj["nodes"].itervalues():
                nodes.addNode(node)
            return nodes

        if jsonObj["class"] == "NodeConnectRequest":
            return NodeConnectRequest(
                jsonObj["server_id"],
                jsonObj["client_secure_port"],
                jsonObj["server_secure_port"],
                jsonObj["key"],
                jsonObj["qualified_name"],
                jsonObj["hostname"],
            )
    return jsonObj
    def getNetworkTopology(resetCache = False):
        """
        Used when a server wants to initiate a network topology request
        Tries to first get the topology from the cache

        resetCache:boolean calls network topology and resets it to cache
        """
        topology=False
        if (resetCache ==  False):
            topology = NetworkTopologyCache().get()
        if topology==False:
            topology = Nodes()
            topology = ServerToServerMessage.requestNetworkTopology(topology)
            NetworkTopologyCache().add(topology)

        return topology
Esempio n. 9
0
    def run(self, serverState, request, response):
        # first read platform capabilities and executables
        rdr=cpc.command.platform_exec_reader.PlatformExecutableReader()
        workerData=request.getParam('worker')
        if request.hasParam('worker-id'):
            workerID=request.getParam('worker-id')
        else:
            workerID='(none)'
        log.debug("Worker platform + executables: %s"%workerData)
        rdr.readString(workerData,"Worker-reported platform + executables")
        # match queued commands to executables.
        cwm=CommandWorkerMatcher(rdr.getPlatforms(),
                                 rdr.getExecutableList(),
                                 rdr.getWorkerRequirements())
        cmds=cwm.getWork(serverState.getCmdQueue())
        if not cwm.isDepleted():
            # now sleep for 5 seconds to give the dataflow time to react to any
            # new state.
            time.sleep(5)
            cmds.extend(cwm.getWork(serverState.getCmdQueue()))
        # now check the forwarded variables
        conf=serverState.conf
        originatingServer=None
        heartbeatInterval=None
        try:
            # check whether there is an originating server. If not, we're it
            if self.forwarded:
                if 'originating-server-id' in request.headers:
                    originatingServer = request.headers['originating-server-id']
                # check the expected heartbeat time.
                log.debug("Forwarded message")
                if request.hasParam('heartbeat-interval'):
                    heartbeatInterval = int(request.getParam('heartbeat-interval'))
                    log.debug("Forwarded heartbeat interval is %d"%
                            heartbeatInterval)
        except NameError:
            # self.forwarded does not exist. Treat it as if self.forwarded == False
            pass

        if originatingServer is None:
            # If the originating server property has not been set,  the
            # request hasn't been forwarded, therefore we are the originating
            # server
            selfNode=Node.getSelfNode(conf)
            originatingServer = selfNode.getId()
            # we only store worker state in the server the worker connects to
            serverState.setWorkerState(WorkerStatus.WORKER_STATUS_CONNECTED,workerID,
                                       request.headers['originating-client'])
        if heartbeatInterval is None:
            heartbeatInterval = conf.getHeartbeatTime()
        log.debug("worker identified %s"%request.headers['originating-client'] )

        if len(cmds) > 0:
            # first add them to the running list so they never get lost
            runningCmdList=serverState.getRunningCmdList()
            runningCmdList.add(cmds, originatingServer, heartbeatInterval)
            # construct the tar file with the workloads.
            tff=tempfile.TemporaryFile()
            tf=tarfile.open(fileobj=tff, mode="w:gz")
            # make the commands ready
            for cmd in cmds:
                log.debug("Adding command id %s to tar file."%cmd.id)
                # write the command description to the command's directory
                task=cmd.getTask()
                #log.debug(cmd)
                project=task.getProject()
                taskDir = "task_%s"%task.getID()
                cmddir=cmd.getDir()
                if not os.path.exists(cmddir):
                    log.debug("cmddir %s did not exist. Created directory."%cmd.id)
                    os.mkdir(cmddir)
                arcdir="%s"%(cmd.id)
                log.debug("cmddir=%s"%cmddir)
                outf=open(os.path.join(cmddir, "command.xml"), "w")
                cmd.writeWorkerXML(outf)
                outf.close()
                tf.add(cmddir, arcname=arcdir, recursive=True)
                # set the state of the command.
            tf.close()
            del(tf)
            tff.seek(0)
            # now send it back
            response.setFile(tff,'application/x-tar')
            #project.writeTasks()
            # the file is closed after the response is sent.
            log.info("Did direct worker-ready")
        else:
            nodes = conf.getNodes().getNodesByPriority()

            topology = Nodes()
            if request.hasParam('topology'):
                topology = json.loads(request.getParam('topology')
                                      ,object_hook = json_serializer.fromJson)

            thisNode = Node.getSelfNode(conf)
            thisNode.nodes = conf.getNodes()
            topology.addNode(thisNode)

            hasJob =False # temporary flag that should be removed
            for node in nodes:
                if topology.exists(node.getId()) == False:
                    clnt=ServerMessage(node.getId())

                    clientResponse=clnt.workerReadyForwardedRequest(workerID,
                                        workerData,
                                        topology,
                                        originatingServer,
                                        heartbeatInterval,
                                        request.headers['originating-client'])

                    if clientResponse.getType() == 'application/x-tar':

                        log.log(cpc.util.log.TRACE,
                                'got work from %s'%
                                (clientResponse.headers[
                                     'originating-server-id']))
                        hasJob=True
                        # we need to rewrap the message

                        #TODO stupid intermediary step because the mmap form
                        # clientresponse is prematurely closed
                        tmp = tempfile.TemporaryFile('w+b')

                        message = clientResponse.getRawData()

                        tmp.write(message.read(len(message)))
                        tmp.seek(0)

                        #for key in clientResponse.headers:
                        #    print "%s:%s"%(key,clientResponse.headers[key])

                        response.setFile(tmp,'application/x-tar')
                        response.headers['originating-server-id']=\
                                  clientResponse.headers[
                                      'originating-server-id']
                    #OPTIMIZE leads to a lot of folding and unfolding of
                    #packages
            if not hasJob:
                response.add("No command")
            log.info("Did delegated worker-ready")
Esempio n. 10
0
    def initDefaults(self):
        conf_base.Conf.initDefaults(self)
        server_host = ''

        self._add('server_host', server_host, "Address the server listens on",
                  True)

        self._add('server_fqdn', socket.getfqdn(), "Manually specified fqdn",
                  True)

        self._add(
            'server_secure_port', Conf.getDefaultServerSecurePort(),
            "Port number the server uses for communication from servers ",
            True, None, '\d+')

        self._add(
            'client_secure_port', Conf.getDefaultClientSecurePort(),
            "Port number the server listens on for communication from clients",
            True, None, '\d+')

        self._add('nodes', Nodes(), "List of nodes connected to this server",
                  False)

        self._add('revoked_nodes', Nodes(), "List of revoked nodes", False)
        self._add('node_connect_requests', Nodes(),
                  "List of nodes requesting to connect to this server", False)

        self._add('sent_node_connect_requests', Nodes(),
                  "List of connect requests sent", False)

        self._add('project_file',
                  "projects.xml",
                  "Projects file name (relative to conf_dir)",
                  relTo='conf_dir')
        self._add('state_save_interval',
                  240,
                  "Time in seconds between state saves",
                  True,
                  validation='\d+')

        self._add(
            'import_path', "",
            "Colon-separated list of directories to search for imports, in addition to cpc/lib, .copernicus/lib and .copernicus/<hostname>/lib",
            True)

        self._add('mode', 'prod', "The run mode of the server", True, None,
                  None, ['trace', 'debug', 'prod'])

        self._add('profiling', 'false',
                  "Profile the server CPU usage using yappi (ver >= 0.82)",
                  True, None, None, ['false', 'true'])

        # run options
        self._add('run_dir', None,
                  "Base directory of all files produced by running projects.",
                  True)

        # log options
        self._add('log_dir',
                  "log",
                  "Directory containing logs",
                  True,
                  relTo='conf_dir')
        self._add('server_log_file',
                  "server.log",
                  "The server log file",
                  False,
                  relTo='log_dir')
        self._add('error_log_file',
                  "error.log",
                  "The error log file",
                  False,
                  relTo='log_dir')

        # heartbeat options
        self._add('heartbeat_time',
                  120,
                  "Time in seconds between heartbeats",
                  True,
                  validation='\d+')
        self._add('heartbeat_file',
                  "heartbeatlist.xml",
                  "Heartbeat monitor list",
                  False,
                  relTo='conf_dir')

        # Task exec queue size. If it exceeds this size, the dataflow
        # propagation blocks.
        self._add('task_queue_size',
                  1024,
                  "Dataflow execution task queue size",
                  True,
                  validation='\d+')

        #static configuration
        self._add('web_root', 'web',
                  "The directory where html,js and css files are located")

        # assets
        self._add(
            'local_assets_dir',
            "local_assets",
            "Directory containing local assets such as command output files",
            True,
            relTo='conf_dir')

        self._add('server_cores',
                  -1,
                  "Number of cores to use on the server (for OpenMP tasks).",
                  userSettable=True,
                  validation='\d+')

        self._add(
            'num_persistent_connections',
            5, "Number of persistent connection to establish for each trusted "
            "server",
            userSettable=True)

        self._add(
            'keep_alive_interval',
            60,
            "Keep alive interval of server connections,value is in minutes",
            userSettable=True)

        self._add('reconnect_interval',
                  300,
                  "Interval between trying to reestablish failed connections ,"
                  "value is in seconds",
                  userSettable=True)

        self._add('server_verification',True,
                  "By default servers should always require ssl certificate from both directions" \
                  "setting this to true will let the sending server to use the client port and disregard" \
                  "certificate checks. This should only be used in very rare circumstances, for example when debugging" \
                  "ssl incombatibilites between machines " ,writable=False
                  ,userSettable=False)

        dn = os.path.dirname(sys.argv[0])
        self.execBasedir = ''
        if dn != "":
            self.execBasedir = os.path.abspath(dn)
            self._add('exec_base_dir', self.execBasedir,
                      'executable base directory')
        # make child processes inherit our path
        if os.environ.has_key('PYTHONPATH'):
            os.environ['PYTHONPATH'] += ":%s" % self.execBasedir
        else:
            os.environ['PYTHONPATH'] = self.execBasedir