async def keep_connected(self): """ A continuous loop that keeps the connection open """ while True: logger.debug(f"in keep_connected, proto={self.proto}") if self.proto and self.proto.open: pass else: # xxx should we close() our client ? self.proto = None # see if we need ssl secure = websockets.uri.parse_uri(self.url).secure kwds = {} if secure: import ssl kwds.update(dict(ssl=ssl.SSLContext())) try: logger.info(f"(re)-connecting to {self.url} ...") self.proto = await SidecarAsyncClient(self.url, **kwds) logger.debug("connected !") except ConnectionRefusedError: logger.warning( f"Could not connect to {self.url} at this time") except: logger.exception( f"Could not connect to {self.url} at this time") await asyncio.sleep(self.keep_period)
def emit_info(self, channel, info, wrap_in_list): # info can be a list (e.g. for leases) # or a dict, with or without an 'id' field if self.debug: msg = "len={}".format(len(info)) if isinstance(info, list) \ else "id={}".format(info.get('id', '[none]')) logger.info("{} emitting {} -> {}" .format(channel, msg, info)) # wrap info as single elt in a list if wrap_in_list: info = [ info ] message = json.dumps(info) if self.socketio is None: self.connect() try: self.socketio.emit(channel, message, ReconnectableSocketIO.callback) self.counters.setdefault(channel, 0) self.counters[channel] += 1 except Exception as e: # make sure we reconnect later on self.socketio = None label = "{}: {}".format(info.get('id', 'none'), one_char_summary(info)) logger.warn("Dropped message on {} - channel {} - msg {}" .format(self, channel, label)) logger.exception("Dropped because of this exception")
def __init__(self, url, category, keep_period=1): # keep_period is the frequency where connection is verified for open-ness self.url = url self.category = category self.keep_period = keep_period # caller MUST run keep_connected() self.proto = None self.counter = 0 logger.info(f"reconnectable sidecar to {url} ")
async def log(self): previous = 0 while True: line = "".join([one_char_summary(mnode.info) for mnode in self.monitor_nodes]) current = self.reconnectable.get_counter(self.main_channel) delta = "+ {}".format(current-previous) line += " {} emits ({})".format(current, delta) previous = current logger.info(line) await asyncio.sleep(self.log_period)
async def log(self): previous = 0 while True: line = "".join( [one_char_summary(mnode.info) for mnode in self.monitor_nodes]) current = self.reconnectable.counter delta = f"+ {current-previous}" line += f" {current} emits ({delta})" previous = current logger.info(line) await asyncio.sleep(self.log_period)
async def run_forever(self): logger.info(f"Starting nodes on {len(self.monitor_nodes)} nodes - " f"report_wlan={self.report_wlan}") return asyncio.gather( *[monitor_node.probe_forever(self.cycle, ping_timeout=self.ping_timeout, ssh_timeout=self.ssh_timeout) for monitor_node in self.monitor_nodes], self.reconnectable.keep_connected(), self.log(), )
def connect(self): action = "connect" if self.socketio is None else "reconnect" try: logger.info("{}ing to {}".format(action, self)) self.socketio = SocketIO(self.hostname, self.port, LoggingNamespace) channel = back_channel def closure(*args): return self.on_channel(channel, *args) self.socketio.on(channel, closure) except: logger.warn("Connection lost to {}".format(self)) self.socketio = None
async def run(self): logger.info("Starting monitor on {} nodes - report_wlan={}" .format(len(self.monitor_nodes), self.report_wlan)) # run n+1 tasks in parallel # one for leases, # plus one per node return asyncio.gather( self.monitor_leases.run_forever(), *[monitor_node.probe_forever(self.cycle, ping_timeout = self.ping_timeout, ssh_timeout = self.ssh_timeout) for monitor_node in self.monitor_nodes] )
async def watch_back_channel(self, category, callback): while True: if not self.proto: logger.debug(f"backing off for {self.keep_period}s") await asyncio.sleep(self.keep_period) continue try: incoming = await self.proto.recv() umbrella = json.loads(incoming) logger.info( f"tmp - got incoming {umbrella['category']} x {umbrella['action']}" ) if (umbrella['category'] == category and umbrella['action'] == 'request'): callback(umbrella) except Exception as exc: ### to review logger.exception("recv failed .. fix me") self.proto = None
async def run_forever(self): leases = Leases(self.message_bus) while True: self.fast_track = False trigger = time.time() + self.cycle # check for back_channel every 15 ms while not self.fast_track and time.time() < trigger: await asyncio.sleep(self.step) # give a chance to socketio events to trigger self.reconnectable.wait(self.wait) try: await leases.refresh() # xxx this is fragile omf_leases = leases.resources self.reconnectable.emit_info(self.channel, omf_leases, wrap_in_list=False) logger.info("advertising {} leases".format(len(omf_leases))) if self.debug: logger.info("Leases details: {}".format(omf_leases)) except Exception as e: logger.exception("monitor could not get leases")
async def probe(self): # connect or reconnect if needed if not self.gateway.is_connected(): try: if self.verbose: logger.info(f"{self}: connecting to gateway " f"{self.gateway}") await self.gateway.connect_lazy() logger.info(f"Connected -> {self.gateway}") except Exception as exc: logger.error("Could not connect -> {} (exc={})".format( self.gateway, exc)) self.info['airplane_mode'] = 'fail' await self.emit() if not self.gateway.is_connected(): logger.error("Not connected to gateway - aborting") return try: if self.verbose: logger.info(f"{self}: retrieving airplane settings to gateway " f"{self.gateway}") self.gateway.formatter.start_capture() retcod = await self.gateway.run( "{} shell \"settings get global airplane_mode_on\"".format( self.adb_bin)) result = self.gateway.formatter.get_capture().strip() airplane_mode = 'fail' if retcod != 0 \ else 'on' if result == '1' else 'off' if self.verbose: logger.info("probed phone {} : retcod={} result={} " "-> airplane_mode = {}".format( self.adb_id, retcod, result, airplane_mode)) self.info['airplane_mode'] = airplane_mode except Exception as exc: logger.error("Could not probe {} -> (e={})".format( self.adb_id, exc)) self.info['airplane_mode'] = 'fail' # force ssh reconnect self.gateway.conn = None await self.emit()
async def mainloop(self): leases = Leases(self.message_bus) if self.verbose: logger.info("Entering monitor on leases") while True: self.fast_track = False # pylint: disable=w0201 trigger = time.time() + self.cycle # check for back_channel every 50 ms while not self.fast_track and time.time() < trigger: await asyncio.sleep(self.step) try: if self.verbose: logger.info("monitorleases mainloop") await leases.refresh() # xxx this is fragile omf_leases = leases.resources logger.info("advertising {} leases".format(len(omf_leases))) await self.reconnectable.emit_infos(omf_leases) if self.verbose: logger.info("Leases details: {}".format(omf_leases)) except Exception: logger.exception("monitornodes could not get leases")
async def probe( self, # pylint: disable=r0912, r0914, r0915 ping_timeout, ssh_timeout): """ The logic for getting one node's info and send it to sidecar """ if self.verbose: logger.info(f"entering pass1, info={self.info}") # pass1 : check for status padding_dict = { 'control_ping': 'off', 'control_ssh': 'off', # don't overwrite os_release though } # get USRP status no matter what - use "" if we receive None # to limit noise when the node is physically removed usrp_status = await self.node.get_usrpstatus() or 'fail' # replace usrpon and usrpoff with just on and off self.set_info({'usrp_on_off': usrp_status.replace('usrp', '')}) # get CMC status status = await self.node.get_status() if status == "off": await self.set_info_and_report({'cmc_on_off': 'off'}, padding_dict) return elif status != "on": await self.set_info_and_report({'cmc_on_off': 'fail'}, padding_dict) return if self.verbose: logger.info(f"entering pass2, info={self.info}") # pass2 : CMC status is ON - let's try to ssh it self.set_info({'cmc_on_off': 'on'}) padding_dict = { 'control_ping': 'on', 'control_ssh': 'on', } remote_commands = [ "cat /etc/lsb-release /etc/redhat-release /etc/gnuradio-release " "2> /dev/null | grep -i release", "echo -n GNURADIO: ; gnuradio-config-info --version " "2> /dev/null || echo none", # this trick allows to have the filename on each output line "grep . /etc/rhubarbe-image /dev/null", "echo -n UNAME: ; uname -r", "echo -n DOCKER: ; docker --version", "echo -n CONTAINER: ; docker inspect --format='{{.State.Running}} {{.Config.Image}}' container", ] # reconnect each time async with SshProxy(self.node) as ssh: if self.verbose: logger.info( f"trying to ssh-connect to {self.node.control_hostname()} " f"(timeout={ssh_timeout})") try: connected = await asyncio.wait_for(ssh.connect(), timeout=ssh_timeout) except asyncio.TimeoutError: connected = False self.set_info({'control_ssh': 'off'}) if self.verbose: logger.info( f"{self.node.control_hostname()} ssh-connected={connected}" ) if connected: try: command = ";".join(remote_commands) output = await asyncio.wait_for(ssh.run(command), timeout=ssh_timeout) # padding dict here sets control_ssh and control_ping to on self.parse_ssh_probe_output(output, padding_dict) # required as otherwise we leak openfiles try: await ssh.close() except Exception: logger.exception("monitornodes oops 1") except asyncio.TimeoutError: if self.verbose: logger.info( f"received ssh timeout with {self.node.control_hostname()}" ) self.set_info({'control_ssh': 'off'}) except Exception: logger.exception("monitornodes remote_command failed") if self.verbose: logger.info(f"{self.node.control_hostname()} ssh-based logic done " f"ssh is deemed {self.info['control_ssh']}") # if we could ssh then we're done if self.info['control_ssh'] == 'on': await self.report_info() return if self.verbose: logger.info(f"entering pass3, info={self.info}") # pass3 : node is ON but could not ssh # check for ping # I don't know of an asyncio library to deal with icmp # so let's use asyncio.subprocess # xxx maybe a Ping class would be the way to go control = self.node.control_hostname() command = ["ping", "-c", "1", "-t", "1", control] try: subprocess = await asyncio.create_subprocess_exec( *command, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.DEVNULL) # failure occurs through timeout await asyncio.wait_for(subprocess.wait(), timeout=ping_timeout) await self.set_info_and_report({'control_ping': 'on'}) return except asyncio.TimeoutError: await self.set_info_and_report({'control_ping': 'off'}) return
def parse_ssh_probe_output(self, # pylint: disable=r0912, r0914, r0915 stdout, padding_dict): os_release = "other" gnuradio_release = "none" uname = "" rxtx_dict = {} rxtx_key = None image_radical = "" for line in stdout.split("\n"): match = self.ubuntu_matcher.match(line) if match: version = match.group('ubuntu_version') os_release = f"ubuntu-{version}" continue match = self.fedora_matcher.match(line) if match: version = match.group('fedora_version') os_release = f"fedora-{version}" continue match = self.centos_matcher.match(line) if match: version = match.group('centos_version') os_release = f"centos-{version}" continue match = self.gnuradio_matcher.match(line) if match: gnuradio_release = match.group('gnuradio_version') continue match = self.uname_matcher.match(line) if match: uname = match.group('uname') continue match = self.rhubarbe_image_matcher.match(line) if match: image_radical = match.group('image_radical') continue match = self.rxtx_matcher.match(line) if match: # use a tuple as the hash rxtx_key = (match.group('wlan_no'), match.group('rxtx')) continue match = self.number_matcher.match(line) if match and rxtx_key: rxtx_dict[rxtx_key] = int(line) continue rxtx_key = None # now that we have the counters we need to translate this into rate # for that purpose we use local clock; # small imprecision should not impact overall result now = time.time() wlan_info_dict = {} for rxtx_key, bytes in rxtx_dict.items(): # pylint: disable=w0622 wlan_no, rxtx = rxtx_key # rather dirty hack for images that use wlan2 and wlan3 # expose in wlan0 or wlan1 depending on parity of actual device try: wlan_no = int(wlan_no) wlan_no = wlan_no % 2 except Exception: pass if self.verbose: logger.info(f"node={self.node} collected {bytes} " f"for device wlan{wlan_no} in {rxtx}") # do we have something on this measurement ? if rxtx_key in self.history: previous_bytes, previous_time = self.history[rxtx_key] info_key = f"wlan_{wlan_no}_{rxtx}_rate" new_rate = 8.*(bytes - previous_bytes) / (now - previous_time) wlan_info_dict[info_key] = new_rate if self.verbose: logger.info(f"node={id} computed {new_rate} bps for key {info_key} " f"- bytes = {bytes}, pr = {previous_bytes}, " f"now = {now}, pr = {previous_time}") # store this measurement for next run self.history[rxtx_key] = (bytes, now) # xxx would make sense to clean up history for measurements that # we were not able to collect at this cycle self.set_info({'os_release': os_release, 'gnuradio_release': gnuradio_release, 'uname': uname, 'image_radical': image_radical}, padding_dict, wlan_info_dict)
def parse_ssh_probe_output(self, stdout, padding_dict): os_release = "other" gnuradio_release = "none" uname = "" rxtx_dict = {} rxtx_key = None image_radical = "" for line in stdout.split("\n"): match = self.ubuntu_matcher.match(line) if match: version = match.group('ubuntu_version') os_release = "ubuntu-{version}".format(**locals()) continue match = self.fedora_matcher.match(line) if match: version = match.group('fedora_version') os_release = "fedora-{version}".format(**locals()) continue match = self.gnuradio_matcher.match(line) if match: gnuradio_release = match.group('gnuradio_version') continue match = self.uname_matcher.match(line) if match: uname = match.group('uname') continue match = self.rhubarbe_image_matcher.match(line) if match: image_radical = match.group('image_radical') continue match = self.rxtx_matcher.match(line) if match: # use a tuple as the hash rxtx_key = (match.group('wlan_no'), match.group('rxtx')) continue match = self.number_matcher.match(line) if match and rxtx_key: rxtx_dict[rxtx_key] = int(line) continue rxtx_key = None # now that we have the counters we need to translate this into rate # for that purpose we use local clock; small imprecision should not impact overall result now = time.time() wlan_info_dict = {} for rxtx_key, bytes in rxtx_dict.items(): wlan_no, rxtx = rxtx_key # rather dirty hack for images that use wlan2 and wlan3 # expose in wlan0 or wlan1 depending on parity of actual device try: wlan_no = int(wlan_no) wlan_no = wlan_no % 2 except: pass if self.debug: logger.info("node={self.node} collected {bytes} for device wlan{wlan_no} in {rxtx}" .format(**locals())) # do we have something on this measurement ? if rxtx_key in self.history: previous_bytes, previous_time = self.history[rxtx_key] info_key = "wlan_{wlan_no}_{rxtx}_rate".format(**locals()) new_rate = 8.*(bytes - previous_bytes) / (now - previous_time) wlan_info_dict[info_key] = new_rate if self.debug: logger.info("node={} computed {} bps for key {} " "- bytes = {}, pr = {}, now = {}, pr = {}" .format(id, new_rate, info_key, bytes, previous_bytes, now, previous_time)) # store this measurement for next run self.history[rxtx_key] = (bytes, now) # xxx would make sense to clean up history for measurements that # we were not able to collect at this cycle self.set_info( { 'os_release' : os_release, 'gnuradio_release' : gnuradio_release, 'uname' : uname, 'image_radical' : image_radical, }, padding_dict, wlan_info_dict)
async def probe(self, ping_timeout, ssh_timeout): """ The logic for getting one node's info and send it to sidecar """ if self.debug: logger.info("entering pass1, info={}".format(self.info)) # pass1 : check for status padding_dict = { 'control_ping' : 'off', 'control_ssh' : 'off', # don't overwrite os_release though } # get USRP status no matter what usrp_status = await self.node.get_usrpstatus() # replace usrpon and usrpoff with just on and off self.set_info({'usrp_on_off' : usrp_status.replace('usrp', '')}) # get CMC status status = await self.node.get_status() if status == "off": self.set_info_and_report({'cmc_on_off' : 'off'}, padding_dict) return elif status != "on": self.set_info_and_report({'cmc_on_off' : 'fail'}, padding_dict) return if self.debug: logger.info("entering pass2, info={}".format(self.info)) # pass2 : CMC status is ON - let's try to ssh it self.set_info({'cmc_on_off' : 'on'}) padding_dict = { 'control_ping' : 'on', 'control_ssh' : 'on', } self.zero_wlan_infos() remote_commands = [ "cat /etc/lsb-release /etc/fedora-release /etc/gnuradio-release 2> /dev/null | grep -i release", "echo -n GNURADIO: ; gnuradio-config-info --version 2> /dev/null || echo none", # this trick allows to have the filename on each output line "grep . /etc/rhubarbe-image /dev/null", "echo -n 'UNAME:' ; uname -r", ] if self.report_wlan: remote_commands.append( "head /sys/class/net/wlan?/statistics/[rt]x_bytes" ) # reconnect each time ssh = SshProxy(self.node) if self.debug: logger.info("trying to ssh-connect") try: connected = await asyncio.wait_for(ssh.connect(), timeout=ssh_timeout) except asyncio.TimeoutError as e: connected = False if self.debug: logger.info("ssh-connected={}".format(connected)) if connected: try: command = ";".join(remote_commands) output = await ssh.run(command) # padding dict here sets control_ssh and control_ping to on self.parse_ssh_probe_output(output, padding_dict) # required as otherwise we leak openfiles try: await ssh.close() except Exception as e: logger.exception("monitor oops 1") pass except Exception as e: logger.exception("monitor remote_command failed") else: self.set_info({'control_ssh': 'off'}) # if we could ssh then we're done if self.info['control_ssh'] == 'on': self.report_info() return if self.debug: logger.info("entering pass3, info={}".format(self.info)) # pass3 : node is ON but could not ssh # check for ping # I don't know of an asyncio library to deal with icmp # so let's use asyncio.subprocess # xxx maybe a Ping class would be the way to go control = self.node.control_hostname() command = [ "ping", "-c", "1", "-t", "1", control ] try: subprocess = await asyncio.create_subprocess_exec( *command, stdout = asyncio.subprocess.DEVNULL, stderr = asyncio.subprocess.DEVNULL) retcod = await asyncio.wait_for(subprocess.wait(), timeout=ping_timeout) self.set_info_and_report({'control_ping' : 'on'}) return except asyncio.TimeoutError as e: self.set_info_and_report({'control_ping' : 'off'}) return
def on_back_channel(self, *args): # when anything is received on the backchannel, we just go to fast track logger.info("MonitorLeases.on_back_channel, args={}".format(args)) self.fast_track = True
def on_back_channel(self, umbrella): # when anything is received on the backchannel, we go to fast track logger.info(f"MonitorLeases.on_back_channel, umbrella={umbrella}") self.fast_track = True # pylint: disable=w0201
def callback(*args, **kwds): logger.info('on socketIO response args={} kwds={}'.format(args, kwds))