def emit_info(self, channel, info, wrap_in_list): # info can be a list (e.g. for leases) # or a dict, with or without an 'id' field if self.debug: msg = "len={}".format(len(info)) if isinstance(info, list) \ else "id={}".format(info.get('id', '[none]')) logger.info("{} emitting {} -> {}" .format(channel, msg, info)) # wrap info as single elt in a list if wrap_in_list: info = [ info ] message = json.dumps(info) if self.socketio is None: self.connect() try: self.socketio.emit(channel, message, ReconnectableSocketIO.callback) self.counters.setdefault(channel, 0) self.counters[channel] += 1 except Exception as e: # make sure we reconnect later on self.socketio = None label = "{}: {}".format(info.get('id', 'none'), one_char_summary(info)) logger.warn("Dropped message on {} - channel {} - msg {}" .format(self, channel, label)) logger.exception("Dropped because of this exception")
async def keep_connected(self): """ A continuous loop that keeps the connection open """ while True: logger.debug(f"in keep_connected, proto={self.proto}") if self.proto and self.proto.open: pass else: # xxx should we close() our client ? self.proto = None # see if we need ssl secure = websockets.uri.parse_uri(self.url).secure kwds = {} if secure: import ssl kwds.update(dict(ssl=ssl.SSLContext())) try: logger.info(f"(re)-connecting to {self.url} ...") self.proto = await SidecarAsyncClient(self.url, **kwds) logger.debug("connected !") except ConnectionRefusedError: logger.warning( f"Could not connect to {self.url} at this time") except: logger.exception( f"Could not connect to {self.url} at this time") await asyncio.sleep(self.keep_period)
async def probe_forever(self, cycle, ping_timeout, ssh_timeout): """ runs forever, wait <cycle> seconds between 2 runs of probe() """ while True: try: await self.probe(ping_timeout, ssh_timeout) except Exception: logger.exception("monitornodes oops 2") await asyncio.sleep(cycle)
async def probe_forever(self, cycle, ping_timeout, ssh_timeout): """ runs forever, wait <cycle> seconds between 2 runs of probe() """ while True: try: await self.probe(ping_timeout, ssh_timeout) except Exception as e: logger.exception("monitor oops 2") await asyncio.sleep(cycle)
async def watch_back_channel(self, category, callback): while True: if not self.proto: logger.debug(f"backing off for {self.keep_period}s") await asyncio.sleep(self.keep_period) continue try: incoming = await self.proto.recv() umbrella = json.loads(incoming) logger.info( f"tmp - got incoming {umbrella['category']} x {umbrella['action']}" ) if (umbrella['category'] == category and umbrella['action'] == 'request'): callback(umbrella) except Exception as exc: ### to review logger.exception("recv failed .. fix me") self.proto = None
async def emit_infos(self, infos): if not self.proto: logger.warning(f"dropping message {infos}") return False logger.debug(f"Sending {infos}") # xxx use Payload payload = dict(category=self.category, action='info', message=infos) # xxx try/except here try: await self.proto.send(json.dumps(payload)) self.counter += 1 except ConnectionRefusedError: logger.warning(f"Could not send {self.category} - dropped") except Exception as exc: # xxx to review logger.exception("send failed") self.proto = None return False return True
async def run_forever(self): leases = Leases(self.message_bus) while True: self.fast_track = False trigger = time.time() + self.cycle # check for back_channel every 15 ms while not self.fast_track and time.time() < trigger: await asyncio.sleep(self.step) # give a chance to socketio events to trigger self.reconnectable.wait(self.wait) try: await leases.refresh() # xxx this is fragile omf_leases = leases.resources self.reconnectable.emit_info(self.channel, omf_leases, wrap_in_list=False) logger.info("advertising {} leases".format(len(omf_leases))) if self.debug: logger.info("Leases details: {}".format(omf_leases)) except Exception as e: logger.exception("monitor could not get leases")
async def mainloop(self): leases = Leases(self.message_bus) if self.verbose: logger.info("Entering monitor on leases") while True: self.fast_track = False # pylint: disable=w0201 trigger = time.time() + self.cycle # check for back_channel every 50 ms while not self.fast_track and time.time() < trigger: await asyncio.sleep(self.step) try: if self.verbose: logger.info("monitorleases mainloop") await leases.refresh() # xxx this is fragile omf_leases = leases.resources logger.info("advertising {} leases".format(len(omf_leases))) await self.reconnectable.emit_infos(omf_leases) if self.verbose: logger.info("Leases details: {}".format(omf_leases)) except Exception: logger.exception("monitornodes could not get leases")
async def probe( self, # pylint: disable=r0912, r0914, r0915 ping_timeout, ssh_timeout): """ The logic for getting one node's info and send it to sidecar """ if self.verbose: logger.info(f"entering pass1, info={self.info}") # pass1 : check for status padding_dict = { 'control_ping': 'off', 'control_ssh': 'off', # don't overwrite os_release though } # get USRP status no matter what - use "" if we receive None # to limit noise when the node is physically removed usrp_status = await self.node.get_usrpstatus() or 'fail' # replace usrpon and usrpoff with just on and off self.set_info({'usrp_on_off': usrp_status.replace('usrp', '')}) # get CMC status status = await self.node.get_status() if status == "off": await self.set_info_and_report({'cmc_on_off': 'off'}, padding_dict) return elif status != "on": await self.set_info_and_report({'cmc_on_off': 'fail'}, padding_dict) return if self.verbose: logger.info(f"entering pass2, info={self.info}") # pass2 : CMC status is ON - let's try to ssh it self.set_info({'cmc_on_off': 'on'}) padding_dict = { 'control_ping': 'on', 'control_ssh': 'on', } remote_commands = [ "cat /etc/lsb-release /etc/redhat-release /etc/gnuradio-release " "2> /dev/null | grep -i release", "echo -n GNURADIO: ; gnuradio-config-info --version " "2> /dev/null || echo none", # this trick allows to have the filename on each output line "grep . /etc/rhubarbe-image /dev/null", "echo -n UNAME: ; uname -r", "echo -n DOCKER: ; docker --version", "echo -n CONTAINER: ; docker inspect --format='{{.State.Running}} {{.Config.Image}}' container", ] # reconnect each time async with SshProxy(self.node) as ssh: if self.verbose: logger.info( f"trying to ssh-connect to {self.node.control_hostname()} " f"(timeout={ssh_timeout})") try: connected = await asyncio.wait_for(ssh.connect(), timeout=ssh_timeout) except asyncio.TimeoutError: connected = False self.set_info({'control_ssh': 'off'}) if self.verbose: logger.info( f"{self.node.control_hostname()} ssh-connected={connected}" ) if connected: try: command = ";".join(remote_commands) output = await asyncio.wait_for(ssh.run(command), timeout=ssh_timeout) # padding dict here sets control_ssh and control_ping to on self.parse_ssh_probe_output(output, padding_dict) # required as otherwise we leak openfiles try: await ssh.close() except Exception: logger.exception("monitornodes oops 1") except asyncio.TimeoutError: if self.verbose: logger.info( f"received ssh timeout with {self.node.control_hostname()}" ) self.set_info({'control_ssh': 'off'}) except Exception: logger.exception("monitornodes remote_command failed") if self.verbose: logger.info(f"{self.node.control_hostname()} ssh-based logic done " f"ssh is deemed {self.info['control_ssh']}") # if we could ssh then we're done if self.info['control_ssh'] == 'on': await self.report_info() return if self.verbose: logger.info(f"entering pass3, info={self.info}") # pass3 : node is ON but could not ssh # check for ping # I don't know of an asyncio library to deal with icmp # so let's use asyncio.subprocess # xxx maybe a Ping class would be the way to go control = self.node.control_hostname() command = ["ping", "-c", "1", "-t", "1", control] try: subprocess = await asyncio.create_subprocess_exec( *command, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.DEVNULL) # failure occurs through timeout await asyncio.wait_for(subprocess.wait(), timeout=ping_timeout) await self.set_info_and_report({'control_ping': 'on'}) return except asyncio.TimeoutError: await self.set_info_and_report({'control_ping': 'off'}) return
async def probe(self, ping_timeout, ssh_timeout): """ The logic for getting one node's info and send it to sidecar """ if self.debug: logger.info("entering pass1, info={}".format(self.info)) # pass1 : check for status padding_dict = { 'control_ping' : 'off', 'control_ssh' : 'off', # don't overwrite os_release though } # get USRP status no matter what usrp_status = await self.node.get_usrpstatus() # replace usrpon and usrpoff with just on and off self.set_info({'usrp_on_off' : usrp_status.replace('usrp', '')}) # get CMC status status = await self.node.get_status() if status == "off": self.set_info_and_report({'cmc_on_off' : 'off'}, padding_dict) return elif status != "on": self.set_info_and_report({'cmc_on_off' : 'fail'}, padding_dict) return if self.debug: logger.info("entering pass2, info={}".format(self.info)) # pass2 : CMC status is ON - let's try to ssh it self.set_info({'cmc_on_off' : 'on'}) padding_dict = { 'control_ping' : 'on', 'control_ssh' : 'on', } self.zero_wlan_infos() remote_commands = [ "cat /etc/lsb-release /etc/fedora-release /etc/gnuradio-release 2> /dev/null | grep -i release", "echo -n GNURADIO: ; gnuradio-config-info --version 2> /dev/null || echo none", # this trick allows to have the filename on each output line "grep . /etc/rhubarbe-image /dev/null", "echo -n 'UNAME:' ; uname -r", ] if self.report_wlan: remote_commands.append( "head /sys/class/net/wlan?/statistics/[rt]x_bytes" ) # reconnect each time ssh = SshProxy(self.node) if self.debug: logger.info("trying to ssh-connect") try: connected = await asyncio.wait_for(ssh.connect(), timeout=ssh_timeout) except asyncio.TimeoutError as e: connected = False if self.debug: logger.info("ssh-connected={}".format(connected)) if connected: try: command = ";".join(remote_commands) output = await ssh.run(command) # padding dict here sets control_ssh and control_ping to on self.parse_ssh_probe_output(output, padding_dict) # required as otherwise we leak openfiles try: await ssh.close() except Exception as e: logger.exception("monitor oops 1") pass except Exception as e: logger.exception("monitor remote_command failed") else: self.set_info({'control_ssh': 'off'}) # if we could ssh then we're done if self.info['control_ssh'] == 'on': self.report_info() return if self.debug: logger.info("entering pass3, info={}".format(self.info)) # pass3 : node is ON but could not ssh # check for ping # I don't know of an asyncio library to deal with icmp # so let's use asyncio.subprocess # xxx maybe a Ping class would be the way to go control = self.node.control_hostname() command = [ "ping", "-c", "1", "-t", "1", control ] try: subprocess = await asyncio.create_subprocess_exec( *command, stdout = asyncio.subprocess.DEVNULL, stderr = asyncio.subprocess.DEVNULL) retcod = await asyncio.wait_for(subprocess.wait(), timeout=ping_timeout) self.set_info_and_report({'control_ping' : 'on'}) return except asyncio.TimeoutError as e: self.set_info_and_report({'control_ping' : 'off'}) return