def watchdog(self): """Watchdog thread. Its function is to monitor if VPP process is alive. Otherwise it will start VPP and restore configuration from DB. """ while self.router_started: time.sleep(1) # 1 sec try: # Ensure watchdog thread doesn't exit on exception if not fwutils.vpp_does_run( ): # This 'if' prevents debug print by restore_vpp_if_needed() every second fwglobals.log.debug("watchdog: initiate restore") self.vpp_api.disconnect( ) # Reset connection to vpp to force connection renewal restored = self.restore_vpp_if_needed( ) # Rerun VPP and apply configuration if not restored: # If some magic happened and vpp is alive without restore, connect back to VPP if fwutils.vpp_does_run(): fwglobals.log.debug( "watchdog: vpp is alive with no restore!!! (pid=%s)" % str(fwutils.vpp_pid)) self.vpp_api.connect() fwglobals.log.debug("watchdog: no need to restore") else: fwglobals.log.debug("watchdog: restore finished") except Exception as e: fwglobals.log.error("watchdog: exception: %s" % str(e)) pass
def restore_vpp_if_needed(self): """Restore VPP. If vpp doesn't run because of crash or device reboot, and it was started by management, start vpp and restore it's configuration. We do that by simulating 'start-router' request. Restore router state always to support multiple instances of Fwagent. :returns: `False` if no restore was performed, `True` otherwise. """ self._restore_router_failure() # If vpp runs already, or if management didn't request to start it, return. vpp_runs = fwutils.vpp_does_run() vpp_should_be_started = self.db_requests.exists('start-router') if vpp_runs or not vpp_should_be_started: fwglobals.log.debug( "restore_vpp_if_needed: no need to restore(vpp_runs=%s, vpp_should_be_started=%s)" % (str(vpp_runs), str(vpp_should_be_started))) self.router_started = vpp_runs if self.router_started: fwglobals.log.debug("restore_vpp_if_needed: vpp_pid=%s" % str(fwutils.vpp_pid())) self._start_threads() return False # Now start router. fwglobals.log.info("===restore vpp: started===") try: fwglobals.g.handle_request('start-router', None) except Exception as e: fwglobals.log.excep("restore_vpp_if_needed: %s" % str(e)) self._set_router_failure("failed to restore vpp configuration") fwglobals.log.info("====restore vpp: finished===") return True
def get_stats(): """Return a new statistics dictionary. :returns: Statistics dictionary. """ res_update_list = list(updates_list) del updates_list[:] # If the list of updates is empty, append a dummy update to # set the most up-to-date status of the router. If not, update # the last element in the list with the current status of the router if loadsimulator.g.enabled(): status = True state = 'running' reason = '' else: status = True if fwutils.vpp_does_run() else False (state, reason) = fwutils.get_router_state() if not res_update_list: res_update_list.append({ 'ok': stats['ok'], 'running': status, 'state': state, 'stateReason': reason, 'stats': {}, 'tunnel_stats': {}, 'period': 0, 'utc': time.time() }) else: res_update_list[-1]['running'] = status res_update_list[-1]['state'] = state res_update_list[-1]['stateReason'] = reason return {'message': res_update_list, 'ok': 1}
def __init__(self): """Constructor method """ self.connected = False if fwutils.vpp_does_run(): if self.connect(): self.connected = True else: fwglobals.log.excep( "VPP_API.__init__: failed to connect to VPP")
def _on_open(self, ws): """Websocket connection open handler :param ws: Websocket handler. :returns: None. """ if loadsimulator.g.enabled(): loadsimulator.g.simulate_event.set() # Send pending message replies to the MGMT upon connection reopen. # These are replies to messages that might have cause the connection # to the MGMT to disconnect, and thus have to be sent on the new connection. if len(self.pending_msg_replies) > 0: fwglobals.log.info("_on_open: sending %d pending replies to orchestrator" % len(self.pending_msg_replies)) for reply in self.pending_msg_replies: fwglobals.log.debug("_on_open: sending reply: " + json.dumps(reply)) ws.send(json.dumps(reply)) del self.pending_msg_replies[:] def run(*args): slept = 0 while self.isConnRunning: # Every 30 seconds ensure that connection to management is alive. # Management should send 'get-device-stats' request every 10 sec. # Note the WebSocket Ping-Pong (see ping_interval=25, ping_timeout=20) # does not help in case of Proxy in the middle, as was observed in field if (slept % 30) == 0: if self.requestReceived: self.requestReceived = False else: fwglobals.log.debug("connect: no request was received in 30 seconds, drop connection") ws.close() break # Every 30 seconds update statistics if (slept % 30) == 0: if loadsimulator.g.enabled(): if loadsimulator.g.started: loadsimulator.g.update_stats() else: break else: fwstats.update_stats() # Sleep 1 second and make another iteration time.sleep(1) slept += 1 self.isConnRunning = True self.requestReceived = True self.thread_statistics = threading.Thread(target=run, name='Statistics Thread') self.thread_statistics.start() if not fwutils.vpp_does_run(): fwglobals.log.info("connect: router is not running, start it in orchestrator")
def register(self): """Registers device with the flexiManage. To do that the Fwagent establishes secure HTTP connection to the manager and sends GET request with various data regarding device. When user approves device on manager, the Fwagent establishes secure WebSocket connection to the manager and starts to listen for flexiManage requests. :returns: `True` if registration succeeded, `False` otherwise. """ fwglobals.log.info("registering with flexiWAN orchestrator...") self.register_error = '' if not loadsimulator.g.enabled(): if os.path.exists(fwglobals.g.DEVICE_TOKEN_FILE): fwglobals.log.info("register: already registered, to refresh run 'fwagent reset' and retry") return True try: with open(fwglobals.g.cfg.TOKEN_FILE, 'r') as f: self.token = f.readline() except: err = "register: failed to load token from %s: %s (%s)" % \ (fwglobals.g.cfg.TOKEN_FILE, format(sys.exc_info()[1]), format(sys.exc_info()[0])) fwglobals.log.error(err) return False if fwutils.vpp_does_run(): fwglobals.log.error("register: router is running, it by 'fwagent stop' and retry by 'fwagent start'") return False if loadsimulator.g.enabled(): machine_id = loadsimulator.g.get_generated_machine_id(loadsimulator.g.simulate_id) else: machine_id = fwutils.get_machine_id() if machine_id == None: fwglobals.log.error("register: get_machine_id failed, make sure you're running in sudo privileges") return False machine_name = socket.gethostname() all_ip_list = socket.gethostbyname_ex(machine_name)[2] interfaces = fwglobals.g.handle_request('interfaces') default_route = fwutils.get_default_route() # get up to 4 IPs ip_list = ', '.join(all_ip_list[0:min(4,len(all_ip_list))]) url = fwglobals.g.cfg.MANAGEMENT_URL + "/api/connect/register" data = uparse.urlencode({'token': self.token.rstrip(), 'fwagent_version' : self.version, 'machine_id' : machine_id, 'machine_name': machine_name, 'ip_list': ip_list, 'default_route': default_route[0], 'default_dev': default_route[1], 'interfaces': json.dumps(interfaces['message'])}).encode() req = ureq.Request(url, data) ctx = ssl.create_default_context() if fwglobals.g.cfg.BYPASS_CERT: ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE else: ctx.verify_mode = ssl.CERT_REQUIRED try: resp = ureq.urlopen(req, context=ctx) data = resp.read().decode() if loadsimulator.g.enabled(): loadsimulator.g.simulate_device_tokens.append(data) else: with open(fwglobals.g.DEVICE_TOKEN_FILE, 'w') as fout: fout.write(data) fwglobals.log.info("Registation successful with parameters:") fwglobals.log.info(" Hostname: " + machine_name) fwglobals.log.info(" IP List: " + ip_list) fwglobals.log.info(" Device ID: " + machine_id) fwglobals.log.info("Run connect after approving device in the orchestrator") except uerr.URLError as e: if hasattr(e, 'code'): server_response = e.read().decode() fwglobals.log.error('register: got %s - %s' % (str(e.code), hsvr.BaseHTTPRequestHandler.responses[e.code][0])) fwglobals.log.error('register: Server response: %s' % server_response) try: register_response = json.loads(server_response) if 'error' in register_response: self.register_error = register_response['error'].lower() except: pass elif hasattr(e, 'reason'): fwglobals.log.error('register: failed to connect to %s: %s' % (fwglobals.g.cfg.MANAGEMENT_URL, e.reason)) return False except: fwglobals.log.error('register: failed to send request to server %s: %s' % \ (fwglobals.g.cfg.MANAGEMENT_URL, format(sys.exc_info()[1]))) return False return True
def update_stats(): """Update statistics dictionary using values retrieved from VPP interfaces. :returns: None. """ global stats global vpp_pid # If vpp is not running or has crashed (at least one of its process # IDs has changed), reset the statistics and update the vpp pids list current_vpp_pid = fwutils.vpp_pid() if not current_vpp_pid or current_vpp_pid != vpp_pid: reset_stats() vpp_pid = current_vpp_pid new_stats = fwutils.get_vpp_if_count() if new_stats['ok'] == 1: prev_stats = dict(stats) # copy of prev stats stats['time'] = time.time() stats['last'] = new_stats['message'] stats['ok'] = 1 # Update info if previous stats valid if prev_stats['ok'] == 1: if_bytes = {} for intf, counts in stats['last'].items(): prev_stats_if = prev_stats['last'].get(intf, None) if prev_stats_if != None: rx_bytes = 1.0 * (counts['rx_bytes'] - prev_stats_if['rx_bytes']) rx_pkts = 1.0 * (counts['rx_pkts'] - prev_stats_if['rx_pkts']) tx_bytes = 1.0 * (counts['tx_bytes'] - prev_stats_if['tx_bytes']) tx_pkts = 1.0 * (counts['tx_pkts'] - prev_stats_if['tx_pkts']) if_bytes[intf] = { 'rx_bytes': rx_bytes, 'rx_pkts': rx_pkts, 'tx_bytes': tx_bytes, 'tx_pkts': tx_pkts } stats['bytes'] = if_bytes stats['tunnel_stats'] = tunnel_stats_get() stats['period'] = stats['time'] - prev_stats['time'] stats['running'] = True if fwutils.vpp_does_run() else False else: stats['ok'] = 0 # Add the update to the list of updates. If the list is full, # remove the oldest update before pushing the new one if len(updates_list) is UPDATE_LIST_MAX_SIZE: updates_list.pop(0) updates_list.append({ 'ok': stats['ok'], 'running': stats['running'], 'stats': stats['bytes'], 'period': stats['period'], 'tunnel_stats': stats['tunnel_stats'], 'utc': time.time() })