Beispiel #1
0
    def probe(self):
        """Send out heartbeat, node states; do the regular maintenance work."""
        t0 = time.time()
        while True:
            try:
                t1 = time.time()
                t = max(int(t1 - t0), 1)
                msg = {'op':'live', 'agid':self.id,
                       'args':self.stat.get_metric()}
                self.broadcast(msg)
                if t % 10 == 0:
                    if len(self.agents) < 2:
                        self.start_possible_agents()
                    for agid, agtp in self.agents.items():
                        agip, agtm, _ = agtp
                        if t1 - agtm > 60:
                            self.agents.pop(agid)
                            if self.id == max(self.agents.keys()):
                                self.start_agent(agip)
                if t % 3600 == 0 and self.id == max(self.agents.keys()):
                    self.start_possible_agents()
                self.print_summary()

                # Liang: new feature test
                if self.id == self.who_is_boss():
                    if self.jmgmt is None:
                        self.jmgmt = JobMgmt()
                    else:
                        self.jmgmt.update_stat()
                else:
                    self.jmgmt = None
                time.sleep(1)
            except Exception, err:
                print "Exception:Agent.probe():", err
Beispiel #2
0
class Agent(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)
        self.agents = {}
        self.id = random.randint(0, 65535)
        self.ip = get_myip()
        self.stat = NodeStat()
        self.exit_event = threading.Event()
        self.jobs = {}
        self.jobs_lock = threading.Lock()
        self.jmgmt = None

        t = threading.Thread(target=self.probe, args=())
        t.daemon = True
        t.start()
        pass

    def broadcast(self, msg):
        msg['tid'] = random.randint(0, 65535)
        msg = dump_msg(msg)
        bsock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        bsock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
        bsock.sendto(msg, ("<broadcast>", BPORT))
        pass

    def get_app_path(self):
        app_path = os.path.realpath(__file__)
        return app_path

    def get_config_dir(self):
        config_dir = "%s/config" % os.path.dirname(self.get_app_path())
        return config_dir

    def get_idle_nodes_origin(self):
        l = []
        for k, v in self.agents.items():
            agip, agts, agcpu = v
            bisect.insort_left(l,(agcpu,agip))
        l.sort()
        return l

    def get_idle_nodes(self):
        """Liang: Only used in debug version, replace with original
        one in product version."""
        if not hasattr(self, 'agent_in_use'):
            self.agent_in_use = set()
        l = []
        for k, v in self.agents.items():
            agip, agts, agcpu = v
            if agip in self.agent_in_use:
                continue
            else:
                bisect.insort_left(l,(agcpu,agip))
        l.sort()
        return l

    def get_jobcontrol(self, jobid):
        """Get job control object from self.jobs"""
        jc = None
        self.jobs_lock.acquire()
        if jobid not in self.jobs.keys():
            self.jobs[jobid] = JobControl(jobid)
        jc = self.jobs[jobid]
        self.jobs_lock.release()
        return jc

    def has_jobcontrol(self, jobid):
        """Test wheter the node agent has jobcontrol object given the jobid."""
        has = False
        self.jobs_lock.acquire()
        if jobid in self.jobs.keys():
            has = True
        self.jobs_lock.release()
        return has

    def print_summary(self):
        """Print out the summary information on the screen"""
        if self.agents:
            boss = self.who_is_boss()
            idlest = self.get_idle_nodes()[0]
            print("active:%i,\tboss:(%s, %i),\tidlest:(%s, %.2f)" %
                  (len(self.agents), self.agents[boss][0], boss,
                   idlest[1], idlest[0]))
        pass

    def probe(self):
        """Send out heartbeat, node states; do the regular maintenance work."""
        t0 = time.time()
        while True:
            try:
                t1 = time.time()
                t = max(int(t1 - t0), 1)
                msg = {'op':'live', 'agid':self.id,
                       'args':self.stat.get_metric()}
                self.broadcast(msg)
                if t % 10 == 0:
                    if len(self.agents) < 2:
                        self.start_possible_agents()
                    for agid, agtp in self.agents.items():
                        agip, agtm, _ = agtp
                        if t1 - agtm > 60:
                            self.agents.pop(agid)
                            if self.id == max(self.agents.keys()):
                                self.start_agent(agip)
                if t % 3600 == 0 and self.id == max(self.agents.keys()):
                    self.start_possible_agents()
                self.print_summary()

                # Liang: new feature test
                if self.id == self.who_is_boss():
                    if self.jmgmt is None:
                        self.jmgmt = JobMgmt()
                    else:
                        self.jmgmt.update_stat()
                else:
                    self.jmgmt = None
                time.sleep(1)
            except Exception, err:
                print "Exception:Agent.probe():", err
        pass