Example #1
0
 def check_exit(self):
     if self.do_exit:
         self.xspub.end("__server__")
         self.xs2server.join()
         self.heartbeat_thread.join()
         self.ws_server.stop()
         self.web_server.stop()
         cancel_async_tasks()
         del self.xserver
     elif self.hbeat > 4:
         self.hbeat = 0
         service_manager = ServiceManager()
         services_list = service_manager.list()
         started_services = []
         for name, svc in services_list.items():
             if svc['state'] >= service_manager.STARTING:
                 started_services.append(name)
         for service in started_services:
             msg = json.dumps({
                 '__heartbeat__': service,
                 'id': 'hbeat_' + str(self.hbeat_id)
             })
             service_manager.send(service, 0, np.zeros(1), msg)
             self.hbeat_id = (self.hbeat_id + 1) % 9999
     else:
         self.hbeat += 1
Example #2
0
    def xstream2server():
        xs = xstream.Subscribe("__server__")
        while True:
            # subscribe to special "__server__" channel for
            # other processes to send messages to this server
            # e.g. speedodata -> websockets
            msg_str = xs.get_msg()
            if msg_str is None:
                break

            try:
                msg = json.loads(msg_str)
                if msg['topic'] == 'speedodata':
                    WebSocketHandler.broadcast(msg['topic'], msg['message'])
                elif msg['topic'] == 'callback' and 'callback_id' in msg:
                    # print("sending callback message")
                    # print(msg['message'])
                    WebSocketHandler.send_to_client(\
                        msg['callback_id'], msg['topic'], msg['message'])
                elif msg['topic'] == 'xs_throughput':
                    report = json.loads(msg['message'])
                    #print(report)
                    for name, throughput in report.items():
                        serviceName = name.split('.')[0]
                        edgeName = name[name.find('.') + 1:]
                        ServiceManager().update_throughput_stats(
                            serviceName, edgeName, throughput)
            except:
                pass

        cancel_async_tasks()
Example #3
0
    def heartbeat(stop):
        xs = xstream.Subscribe("__heartbeat__", timeout=5000)
        service_manager = ServiceManager()
        node_status = {}

        def check_services(node_status):
            if stop:
                return
            invalid_services = []
            for service, status in node_status.items():
                last_valid = status['last_valid']
                service_state = service_manager._services[service]['state']
                is_starting = service_state == service_manager.STARTING
                is_started = service_state == service_manager.STARTED

                # if the service has been stopped, clear it
                if service_state == service_manager.STOPPED:
                    invalid_services.append(service)
                # if there's a discrepancy in what the service_manager says
                # and what we have cached, clear it
                elif is_starting and node_status[service]['is_started']:
                    invalid_services.append(service)
                # if it's started and hasn't been valid in the last n secs,
                # restart it
                elif is_started and now - last_valid > 5:
                    logger.warning("Service %s is dead, restarting" % service)
                    service_manager.stop(service)
                    service_manager.start(service)
                    node_status[service]['is_started'] = False

            for service in invalid_services:
                del node_status[service]

        logger = logging.getLogger(__name__)

        while True:
            if stop():
                break
            # when enabling coverage, this line will raise an exception for some
            # reason. For now, just catching it
            try:
                msg_str = xs.get_msg()
                now = time.time()
            except Exception:
                logger.exception("Shouldn't happen")

            # the get_msg timed out, i.e. no heartbeats received
            if msg_str == (None, None):
                check_services(node_status)
                continue
            msg = json.loads(msg_str)
            service = msg['service']
            channel = msg['channel']

            # if this is the first time we've seen this service
            if service not in node_status:
                _first_edge, last_edge = service_manager._get_graph_io(service)
                node_status[service] = {
                    'last_valid':
                    0,  # saves the last time this service was valid
                    'is_started':
                    False,  # our check that services haven't stopped
                    'last_edge':
                    last_edge[0],  # saves the last edge of the service
                    'channels': {}  # save heartbeat times for each channel
                }
            node_status[service]['channels'][channel] = now
            service_state = service_manager._services[service]['state']
            if node_status[service]['last_edge'] == channel:
                if service_state == service_manager.STARTING:
                    if not node_status[service]['is_started']:
                        service_manager._services[service][
                            'state'] = service_manager.STARTED
                        node_status[service]['is_started'] = True
                    else:
                        # there's a discrepancy. For example, the service may
                        # have been stopped and something else started with
                        # the same name. In this case, clear the cache
                        del node_status[service]
                        continue
                node_status[service]['last_valid'] = now

            check_services(node_status)

        cancel_async_tasks()