class Reactor(object): class Exit(object): normal = 0 validate = 0 listening = 1 configuration = 1 privileges = 1 log = 1 pid = 1 socket = 1 io_error = 1 process = 1 select = 1 unknown = 1 # [hex(ord(c)) for c in os.popen('clear').read()] clear = concat_bytes_i( character(int(c, 16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']) def __init__(self, configurations): self._ips = environment.settings().tcp.bind self._port = environment.settings().tcp.port self._stopping = environment.settings().tcp.once self.exit_code = self.Exit.unknown self.max_loop_time = environment.settings().reactor.speed self._sleep_time = self.max_loop_time / 100 self._busyspin = {} self._ratelimit = {} self.early_drop = environment.settings().daemon.drop self.processes = None self.configuration = Configuration(configurations) self.logger = Logger() self.asynchronous = ASYNC() self.signal = Signal() self.daemon = Daemon(self) self.listener = Listener(self) self.api = API(self) self._peers = {} self._reload_processes = False self._saved_pid = False self._poller = select.poll() def _termination(self, reason, exit_code): self.exit_code = exit_code self.signal.received = Signal.SHUTDOWN self.logger.critical(reason, 'reactor') def _prevent_spin(self): second = int(time.time()) if not second in self._busyspin: self._busyspin = {second: 0} self._busyspin[second] += 1 if self._busyspin[second] > self.max_loop_time: time.sleep(self._sleep_time) return True return False def _rate_limited(self, peer, rate): if rate <= 0: return False second = int(time.time()) ratelimit = self._ratelimit.get(peer, {}) if not second in ratelimit: self._ratelimit[peer] = {second: rate - 1} return False if self._ratelimit[peer][second] > 0: self._ratelimit[peer][second] -= 1 return False return True def _wait_for_io(self, sleeptime): spin_prevention = False try: for fd, event in self._poller.poll(sleeptime): if event & select.POLLIN or event & select.POLLPRI: yield fd continue elif event & select.POLLHUP or event & select.POLLERR or event & select.POLLNVAL: spin_prevention = True continue if spin_prevention: self._prevent_spin() except KeyboardInterrupt: self._termination('^C received', self.Exit.normal) return except Exception: self._prevent_spin() return # peer related functions def active_peers(self): peers = set() for key, peer in self._peers.items(): if not peer.neighbor.passive or peer.proto: peers.add(key) return peers def established_peers(self): peers = set() for key, peer in self._peers.items(): if peer.fsm == FSM.ESTABLISHED: peers.add(key) return peers def peers(self): return list(self._peers) def handle_connection(self, peer_name, connection): peer = self._peers.get(peer_name, None) if not peer: self.logger.critical('could not find referenced peer', 'reactor') return peer.handle_connection(connection) def neighbor(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: self.logger.critical('could not find referenced peer', 'reactor') return return peer.neighbor def neighbor_name(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: self.logger.critical('could not find referenced peer', 'reactor') return "" return peer.neighbor.name() def neighbor_ip(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: self.logger.critical('could not find referenced peer', 'reactor') return "" return str(peer.neighbor.peer_address) def neighbor_cli_data(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: self.logger.critical('could not find referenced peer', 'reactor') return "" return peer.cli_data() def neighor_rib(self, peer_name, rib_name, advertised=False): peer = self._peers.get(peer_name, None) if not peer: self.logger.critical('could not find referenced peer', 'reactor') return [] families = None if advertised: families = peer.proto.negotiated.families if peer.proto else [] rib = peer.neighbor.rib.outgoing if rib_name == 'out' else peer.neighbor.rib.incoming return list(rib.cached_changes(families)) def neighbor_rib_resend(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: self.logger.critical('could not find referenced peer', 'reactor') return peer.neighbor.rib.outgoing.resend(None, peer.neighbor.route_refresh) def neighbor_rib_out_withdraw(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: self.logger.critical('could not find referenced peer', 'reactor') return peer.neighbor.rib.outgoing.withdraw(None, peer.neighbor.route_refresh) def neighbor_rib_in_clear(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: self.logger.critical('could not find referenced peer', 'reactor') return peer.neighbor.rib.incoming.clear() # ... def _completed(self, peers): for peer in peers: if self._peers[peer].neighbor.rib.outgoing.pending(): return False return True def run(self, validate, root): self.daemon.daemonise() # Make sure we create processes once we have closed file descriptor # unfortunately, this must be done before reading the configuration file # so we can not do it with dropped privileges self.processes = Processes() # we have to read the configuration possibly with root privileges # as we need the MD5 information when we bind, and root is needed # to bind to a port < 1024 # this is undesirable as : # - handling user generated data as root should be avoided # - we may not be able to reload the configuration once the privileges are dropped # but I can not see any way to avoid it for ip in self._ips: if not self.listener.listen_on(ip, None, self._port, None, False, None): return self.Exit.listening if not self.load(): return self.Exit.configuration if validate: # only validate configuration self.logger.warning('', 'configuration') self.logger.warning('parsed Neighbors, un-templated', 'configuration') self.logger.warning('------------------------------', 'configuration') self.logger.warning('', 'configuration') for key in self._peers: self.logger.warning(str(self._peers[key].neighbor), 'configuration') self.logger.warning('', 'configuration') return self.Exit.validate for neighbor in self.configuration.neighbors.values(): if neighbor.listen: if not self.listener.listen_on( neighbor.md5_ip, neighbor.peer_address, neighbor.listen, neighbor.md5_password, neighbor.md5_base64, neighbor.ttl_in): return self.Exit.listening if not self.early_drop: self.processes.start(self.configuration.processes) if not self.daemon.drop_privileges(): self.logger.critical( 'could not drop privileges to \'%s\' refusing to run as root' % self.daemon.user, 'reactor') self.logger.critical( 'set the environmemnt value exabgp.daemon.user to change the unprivileged user', 'reactor') return self.Exit.privileges if self.early_drop: self.processes.start(self.configuration.processes) # This is required to make sure we can write in the log location as we now have dropped root privileges if not self.logger.restart(): self.logger.critical('could not setup the logger, aborting', 'reactor') return self.Exit.log if not self.daemon.savepid(): return self.Exit.pid # did we complete the run of updates caused by the last SIGUSR1/SIGUSR2 ? reload_completed = False wait = environment.settings().tcp.delay if wait: sleeptime = (wait * 60) - int(time.time()) % (wait * 60) self.logger.debug( 'waiting for %d seconds before connecting' % sleeptime, 'reactor') time.sleep(float(sleeptime)) workers = {} peers = set() api_fds = [] ms_sleep = int(self._sleep_time * 1000) while True: try: if self.signal.received: for key in self._peers: if self._peers[key].neighbor.api['signal']: self._peers[key].reactor.processes.signal( self._peers[key].neighbor, self.signal.number) signaled = self.signal.received self.signal.rearm() if signaled == Signal.SHUTDOWN: self.exit_code = self.Exit.normal self.shutdown() break if signaled == Signal.RESTART: self.restart() continue if not reload_completed: continue if signaled == Signal.FULL_RELOAD: self._reload_processes = True if signaled in (Signal.RELOAD, Signal.FULL_RELOAD): self.load() self.processes.start(self.configuration.processes, self._reload_processes) self._reload_processes = False continue if self.listener.incoming(): # check all incoming connection self.asynchronous.schedule( str(uuid.uuid1()), 'checking for new connection(s)', self.listener.new_connections()) peers = self.active_peers() if self._completed(peers): reload_completed = True sleep = ms_sleep # do not attempt to listen on closed sockets even if the peer is still here for io in list(workers.keys()): if io == -1: self._poller.unregister(io) del workers[io] # give a turn to all the peers for key in list(peers): peer = self._peers[key] # limit the number of message handling per second if self._rate_limited(key, peer.neighbor.rate_limit): peers.discard(key) continue # handle the peer action = peer.run() # .run() returns an ACTION enum: # * immediate if it wants to be called again # * later if it should be called again but has no work atm # * close if it is finished and is closing down, or restarting if action == ACTION.CLOSE: if key in self._peers: del self._peers[key] peers.discard(key) # we are loosing this peer, not point to schedule more process work elif action == ACTION.LATER: io = peer.socket() if io != -1: self._poller.register( io, select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLNVAL | select.POLLERR) workers[io] = key # no need to come back to it before a a full cycle peers.discard(key) elif action == ACTION.NOW: sleep = 0 if not peers: break # read at least on message per process if there is some and parse it for service, command in self.processes.received(): self.api.text(self, service, command) sleep = 0 self.asynchronous.run() if api_fds != self.processes.fds: for fd in api_fds: if fd == -1: continue if fd not in self.processes.fds: self._poller.unregister(fd) for fd in self.processes.fds: if fd == -1: continue if fd not in api_fds: self._poller.register( fd, select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLNVAL | select.POLLERR) api_fds = self.processes.fds for io in self._wait_for_io(sleep): if io not in api_fds: peers.add(workers[io]) if self._stopping and not self._peers.keys(): self._termination('exiting on peer termination', self.Exit.normal) except KeyboardInterrupt: self._termination('^C received', self.Exit.normal) except SystemExit: self._termination('exiting', self.Exit.normal) # socket.error is a subclass of IOError (so catch it first) except socket.error: self._termination('socket error received', self.Exit.socket) except IOError: self._termination( 'I/O Error received, most likely ^C during IO', self.Exit.io_error) except ProcessError: self._termination( 'Problem when sending message(s) to helper program, stopping', self.Exit.process) except select.error: self._termination('problem using select, stopping', self.Exit.select) return self.exit_code def register_peer(self, name, peer): self._peers[name] = peer def teardown_peer(self, name, code): self._peers[name].teardown(code) def shutdown(self): """Terminate all the current BGP connections""" self.logger.critical('performing shutdown', 'reactor') if self.listener: self.listener.stop() self.listener = None for key in self._peers.keys(): self._peers[key].shutdown() self.asynchronous.clear() self.processes.terminate() self.daemon.removepid() self._stopping = True def load(self): """Reload the configuration and send to the peer the route which changed""" self.logger.notice('performing reload of exabgp %s' % version, 'configuration') reloaded = self.configuration.reload() if not reloaded: # # Careful the string below is used but the QA code to check for sucess of failure self.logger.error( 'not reloaded, no change found in the configuration', 'configuration') # Careful the string above is used but the QA code to check for sucess of failure # self.logger.error(str(self.configuration.error), 'configuration') return False for key, peer in self._peers.items(): if key not in self.configuration.neighbors: self.logger.debug('removing peer: %s' % peer.neighbor.name(), 'reactor') peer.remove() for key, neighbor in self.configuration.neighbors.items(): # new peer if key not in self._peers: self.logger.debug('new peer: %s' % neighbor.name(), 'reactor') peer = Peer(neighbor, self) self._peers[key] = peer # modified peer elif self._peers[key].neighbor != neighbor: self.logger.debug( 'peer definition change, establishing a new connection for %s' % str(key), 'reactor') self._peers[key].reestablish(neighbor) # same peer but perhaps not the routes else: # finding what route changed and sending the delta is not obvious self.logger.debug( 'peer definition identical, updating peer routes if required for %s' % str(key), 'reactor') self._peers[key].reconfigure(neighbor) for ip in self._ips: if ip.afi == neighbor.peer_address.afi: self.listener.listen_on(ip, neighbor.peer_address, self._port, neighbor.md5_password, neighbor.md5_base64, None) self.logger.notice('loaded new configuration successfully', 'reactor') return True def restart(self): """Kill the BGP session and restart it""" self.logger.notice('performing restart of exabgp %s' % version, 'reactor') # XXX: FIXME: Could return False, in case there is interference with old config... reloaded = self.configuration.reload() for key in self._peers.keys(): if key not in self.configuration.neighbors.keys(): peer = self._peers[key] self.logger.debug('removing peer %s' % peer.neighbor.name(), 'reactor') self._peers[key].remove() else: self._peers[key].reestablish() self.processes.start(self.configuration.processes, True)
class Reactor (object): class Exit (object): normal = 0 validate = 0 listening = 1 configuration = 1 privileges = 1 log = 1 pid = 1 socket = 1 io_error = 1 process = 1 select = 1 unknown = 1 # [hex(ord(c)) for c in os.popen('clear').read()] clear = concat_bytes_i(character(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']) def __init__ (self, configurations): self._ips = environment.settings().tcp.bind self._port = environment.settings().tcp.port self._stopping = environment.settings().tcp.once self.exit_code = self.Exit.unknown self.max_loop_time = environment.settings().reactor.speed self._sleep_time = self.max_loop_time / 100 self._busyspin = {} self.early_drop = environment.settings().daemon.drop self.processes = None self.configuration = Configuration(configurations) self.logger = Logger() self.asynchronous = ASYNC() self.signal = Signal() self.daemon = Daemon(self) self.listener = Listener(self) self.api = API(self) self.peers = {} self._reload_processes = False self._saved_pid = False def _termination (self,reason, exit_code): self.exit_code = exit_code self.signal.received = Signal.SHUTDOWN self.logger.critical(reason,'reactor') def _prevent_spin(self): second = int(time.time()) if not second in self._busyspin: self._busyspin = {second: 0} self._busyspin[second] += 1 if self._busyspin[second] > self.max_loop_time: time.sleep(self._sleep_time) return True return False def _api_ready (self,sockets,sleeptime): fds = self.processes.fds() ios = fds + sockets try: read,_,_ = select.select(ios,[],[],sleeptime) for fd in fds: if fd in read: read.remove(fd) return read except select.error as exc: err_no,message = exc.args # pylint: disable=W0633 if err_no not in error.block: raise exc self._prevent_spin() return [] except socket.error as exc: # python 3 does not raise on closed FD, but python2 does # we have lost a peer and it is causing the select # to complain, the code will self-heal, ignore the issue # (EBADF from python2 must be ignored if when checkign error.fatal) # otherwise sending notification causes TCP to drop and cause # this code to kill ExaBGP self._prevent_spin() return [] except ValueError as exc: # The peer closing the TCP connection lead to a negative file descritor self._prevent_spin() return [] except KeyboardInterrupt: self._termination('^C received',self.Exit.normal) return [] def _active_peers (self): peers = set() for key,peer in self.peers.items(): if not peer.neighbor.passive or peer.proto: peers.add(key) return peers def _completed (self,peers): for peer in peers: if self.peers[peer].neighbor.rib.outgoing.pending(): return False return True def run (self, validate, root): self.daemon.daemonise() # Make sure we create processes once we have closed file descriptor # unfortunately, this must be done before reading the configuration file # so we can not do it with dropped privileges self.processes = Processes() # we have to read the configuration possibly with root privileges # as we need the MD5 information when we bind, and root is needed # to bind to a port < 1024 # this is undesirable as : # - handling user generated data as root should be avoided # - we may not be able to reload the configuration once the privileges are dropped # but I can not see any way to avoid it for ip in self._ips: if not self.listener.listen_on(ip, None, self._port, None, False, None): return self.Exit.listening if not self.load(): return self.Exit.configuration if validate: # only validate configuration self.logger.warning('','configuration') self.logger.warning('parsed Neighbors, un-templated','configuration') self.logger.warning('------------------------------','configuration') self.logger.warning('','configuration') for key in self.peers: self.logger.warning(str(self.peers[key].neighbor),'configuration') self.logger.warning('','configuration') return self.Exit.validate for neighbor in self.configuration.neighbors.values(): if neighbor.listen: if not self.listener.listen_on(neighbor.md5_ip, neighbor.peer_address, neighbor.listen, neighbor.md5_password, neighbor.md5_base64, neighbor.ttl_in): return self.Exit.listening if not self.early_drop: self.processes.start(self.configuration.processes) if not self.daemon.drop_privileges(): self.logger.critical('could not drop privileges to \'%s\' refusing to run as root' % self.daemon.user,'reactor') self.logger.critical('set the environmemnt value exabgp.daemon.user to change the unprivileged user','reactor') return self.Exit.privileges if self.early_drop: self.processes.start(self.configuration.processes) # This is required to make sure we can write in the log location as we now have dropped root privileges if not self.logger.restart(): self.logger.critical('could not setup the logger, aborting','reactor') return self.Exit.log if not self.daemon.savepid(): return self.Exit.pid # did we complete the run of updates caused by the last SIGUSR1/SIGUSR2 ? reload_completed = False wait = environment.settings().tcp.delay if wait: sleeptime = (wait * 60) - int(time.time()) % (wait * 60) self.logger.debug('waiting for %d seconds before connecting' % sleeptime,'reactor') time.sleep(float(sleeptime)) workers = {} peers = set() while True: try: if self.signal.received: for key in self.peers: if self.peers[key].neighbor.api['signal']: self.peers[key].reactor.processes.signal(self.peers[key].neighbor,self.signal.number) signaled = self.signal.received self.signal.rearm() if signaled == Signal.SHUTDOWN: self.shutdown() break if signaled == Signal.RESTART: self.restart() continue if not reload_completed: continue if signaled == Signal.FULL_RELOAD: self._reload_processes = True if signaled in (Signal.RELOAD, Signal.FULL_RELOAD): self.load() self.processes.start(self.configuration.processes,self._reload_processes) self._reload_processes = False continue if self.listener.incoming(): # check all incoming connection self.asynchronous.schedule(str(uuid.uuid1()),'checking for new connection(s)',self.listener.new_connections()) peers = self._active_peers() if self._completed(peers): reload_completed = True sleep = self._sleep_time # do not attempt to listen on closed sockets even if the peer is still here for io in list(workers.keys()): if io.fileno() == -1: del workers[io] # give a turn to all the peers for key in list(peers): peer = self.peers[key] action = peer.run() # .run() returns an ACTION enum: # * immediate if it wants to be called again # * later if it should be called again but has no work atm # * close if it is finished and is closing down, or restarting if action == ACTION.CLOSE: if key in self.peers: del self.peers[key] peers.discard(key) # we are loosing this peer, not point to schedule more process work elif action == ACTION.LATER: for io in peer.sockets(): workers[io] = key # no need to come back to it before a a full cycle peers.discard(key) elif action == ACTION.NOW: sleep = 0 if not peers: break # read at least on message per process if there is some and parse it for service,command in self.processes.received(): self.api.text(self,service,command) sleep = 0 self.asynchronous.run() for io in self._api_ready(list(workers),sleep): peers.add(workers[io]) del workers[io] if self._stopping and not self.peers.keys(): self._termination('exiting on peer termination',self.Exit.normal) except KeyboardInterrupt: self._termination('^C received',self.Exit.normal) except SystemExit: self._termination('exiting', self.Exit.normal) # socket.error is a subclass of IOError (so catch it first) except socket.error: self._termination('socket error received',self.Exit.socket) except IOError: self._termination('I/O Error received, most likely ^C during IO',self.Exit.io_error) except ProcessError: self._termination('Problem when sending message(s) to helper program, stopping',self.Exit.process) except select.error: self._termination('problem using select, stopping',self.Exit.select) return self.exit_code def shutdown (self): """Terminate all the current BGP connections""" self.logger.critical('performing shutdown','reactor') if self.listener: self.listener.stop() self.listener = None for key in self.peers.keys(): self.peers[key].shutdown() self.asynchronous.clear() self.processes.terminate() self.daemon.removepid() self._stopping = True def load (self): """Reload the configuration and send to the peer the route which changed""" self.logger.notice('performing reload of exabgp %s' % version,'configuration') reloaded = self.configuration.reload() if not reloaded: # # Careful the string below is used but the QA code to check for sucess of failure self.logger.error('problem with the configuration file, no change done','configuration') # Careful the string above is used but the QA code to check for sucess of failure # self.logger.error(str(self.configuration.error),'configuration') return False for key, peer in self.peers.items(): if key not in self.configuration.neighbors: self.logger.debug('removing peer: %s' % peer.neighbor.name(),'reactor') peer.remove() for key, neighbor in self.configuration.neighbors.items(): # new peer if key not in self.peers: self.logger.debug('new peer: %s' % neighbor.name(),'reactor') peer = Peer(neighbor,self) self.peers[key] = peer # modified peer elif self.peers[key].neighbor != neighbor: self.logger.debug('peer definition change, establishing a new connection for %s' % str(key),'reactor') self.peers[key].reestablish(neighbor) # same peer but perhaps not the routes else: # finding what route changed and sending the delta is not obvious self.logger.debug('peer definition identical, updating peer routes if required for %s' % str(key),'reactor') self.peers[key].reconfigure(neighbor) for ip in self._ips: if ip.afi == neighbor.peer_address.afi: self.listener.listen_on(ip, neighbor.peer_address, self._port, neighbor.md5_password, neighbor.md5_base64, None) self.logger.notice('loaded new configuration successfully','reactor') return True def restart (self): """Kill the BGP session and restart it""" self.logger.notice('performing restart of exabgp %s' % version,'reactor') self.configuration.reload() for key in self.peers.keys(): if key not in self.configuration.neighbors.keys(): neighbor = self.configuration.neighbors[key] self.logger.debug('removing Peer %s' % neighbor.name(),'reactor') self.peers[key].remove() else: self.peers[key].reestablish() self.processes.start(self.configuration.processes,True)
class Reactor(object): class Exit(object): normal = 0 validate = 0 listening = 1 configuration = 1 privileges = 1 log = 1 pid = 1 socket = 1 io_error = 1 process = 1 select = 1 unknown = 1 # [hex(ord(c)) for c in os.popen('clear').read()] clear = concat_bytes_i( character(int(c, 16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']) def __init__(self, configurations): self._ips = environment.settings().tcp.bind self._port = environment.settings().tcp.port self._stopping = environment.settings().tcp.once self.exit_code = self.Exit.unknown self.max_loop_time = environment.settings().reactor.speed self._sleep_time = self.max_loop_time / 100 self._busyspin = {} self.early_drop = environment.settings().daemon.drop self.processes = None self.configuration = Configuration(configurations) self.logger = Logger() self.asynchronous = ASYNC() self.signal = Signal() self.daemon = Daemon(self) self.listener = Listener(self) self.api = API(self) self.peers = {} self._reload_processes = False self._saved_pid = False def _termination(self, reason, exit_code): self.exit_code = exit_code self.signal.received = Signal.SHUTDOWN self.logger.critical(reason, 'reactor') def _prevent_spin(self): second = int(time.time()) if not second in self._busyspin: self._busyspin = {second: 0} self._busyspin[second] += 1 if self._busyspin[second] > self.max_loop_time: time.sleep(self._sleep_time) return True return False def _api_ready(self, sockets, sleeptime): fds = self.processes.fds() ios = fds + sockets try: read, _, _ = select.select(ios, [], [], sleeptime) for fd in fds: if fd in read: read.remove(fd) return read except select.error as exc: err_no, message = exc.args # pylint: disable=W0633 if err_no not in error.block: raise exc self._prevent_spin() return [] except socket.error as exc: # python 3 does not raise on closed FD, but python2 does # we have lost a peer and it is causing the select # to complain, the code will self-heal, ignore the issue # (EBADF from python2 must be ignored if when checkign error.fatal) # otherwise sending notification causes TCP to drop and cause # this code to kill ExaBGP self._prevent_spin() return [] except ValueError as exc: # The peer closing the TCP connection lead to a negative file descritor self._prevent_spin() return [] except KeyboardInterrupt: self._termination('^C received', self.Exit.normal) return [] def _active_peers(self): peers = set() for key, peer in self.peers.items(): if not peer.neighbor.passive or peer.proto: peers.add(key) return peers def _completed(self, peers): for peer in peers: if self.peers[peer].neighbor.rib.outgoing.pending(): return False return True def run(self, validate, root): self.daemon.daemonise() # Make sure we create processes once we have closed file descriptor # unfortunately, this must be done before reading the configuration file # so we can not do it with dropped privileges self.processes = Processes() # we have to read the configuration possibly with root privileges # as we need the MD5 information when we bind, and root is needed # to bind to a port < 1024 # this is undesirable as : # - handling user generated data as root should be avoided # - we may not be able to reload the configuration once the privileges are dropped # but I can not see any way to avoid it for ip in self._ips: if not self.listener.listen_on(ip, None, self._port, None, False, None): return self.Exit.listening if not self.load(): return self.Exit.configuration if validate: # only validate configuration self.logger.warning('', 'configuration') self.logger.warning('parsed Neighbors, un-templated', 'configuration') self.logger.warning('------------------------------', 'configuration') self.logger.warning('', 'configuration') for key in self.peers: self.logger.warning(str(self.peers[key].neighbor), 'configuration') self.logger.warning('', 'configuration') return self.Exit.validate for neighbor in self.configuration.neighbors.values(): if neighbor.listen: if not self.listener.listen_on( neighbor.md5_ip, neighbor.peer_address, neighbor.listen, neighbor.md5_password, neighbor.md5_base64, neighbor.ttl_in): return self.Exit.listening if not self.early_drop: self.processes.start(self.configuration.processes) if not self.daemon.drop_privileges(): self.logger.critical( 'could not drop privileges to \'%s\' refusing to run as root' % self.daemon.user, 'reactor') self.logger.critical( 'set the environmemnt value exabgp.daemon.user to change the unprivileged user', 'reactor') return self.Exit.privileges if self.early_drop: self.processes.start(self.configuration.processes) # This is required to make sure we can write in the log location as we now have dropped root privileges if not self.logger.restart(): self.logger.critical('could not setup the logger, aborting', 'reactor') return self.Exit.log if not self.daemon.savepid(): return self.Exit.pid # did we complete the run of updates caused by the last SIGUSR1/SIGUSR2 ? reload_completed = False wait = environment.settings().tcp.delay if wait: sleeptime = (wait * 60) - int(time.time()) % (wait * 60) self.logger.debug( 'waiting for %d seconds before connecting' % sleeptime, 'reactor') time.sleep(float(sleeptime)) workers = {} peers = set() while True: try: if self.signal.received: for key in self.peers: if self.peers[key].neighbor.api['signal']: self.peers[key].reactor.processes.signal( self.peers[key].neighbor, self.signal.number) signaled = self.signal.received self.signal.rearm() if signaled == Signal.SHUTDOWN: self.shutdown() break if signaled == Signal.RESTART: self.restart() continue if not reload_completed: continue if signaled == Signal.FULL_RELOAD: self._reload_processes = True if signaled in (Signal.RELOAD, Signal.FULL_RELOAD): self.load() self.processes.start(self.configuration.processes, self._reload_processes) self._reload_processes = False continue if self.listener.incoming(): # check all incoming connection self.asynchronous.schedule( str(uuid.uuid1()), 'checking for new connection(s)', self.listener.new_connections()) peers = self._active_peers() if self._completed(peers): reload_completed = True sleep = self._sleep_time # do not attempt to listen on closed sockets even if the peer is still here for io in list(workers.keys()): if io.fileno() == -1: del workers[io] # give a turn to all the peers for key in list(peers): peer = self.peers[key] action = peer.run() # .run() returns an ACTION enum: # * immediate if it wants to be called again # * later if it should be called again but has no work atm # * close if it is finished and is closing down, or restarting if action == ACTION.CLOSE: if key in self.peers: del self.peers[key] peers.discard(key) # we are loosing this peer, not point to schedule more process work elif action == ACTION.LATER: for io in peer.sockets(): workers[io] = key # no need to come back to it before a a full cycle peers.discard(key) elif action == ACTION.NOW: sleep = 0 if not peers: break # read at least on message per process if there is some and parse it for service, command in self.processes.received(): self.api.text(self, service, command) sleep = 0 self.asynchronous.run() for io in self._api_ready(list(workers), sleep): peers.add(workers[io]) del workers[io] if self._stopping and not self.peers.keys(): self._termination('exiting on peer termination', self.Exit.normal) except KeyboardInterrupt: self._termination('^C received', self.Exit.normal) except SystemExit: self._termination('exiting', self.Exit.normal) # socket.error is a subclass of IOError (so catch it first) except socket.error: self._termination('socket error received', self.Exit.socket) except IOError: self._termination( 'I/O Error received, most likely ^C during IO', self.Exit.io_error) except ProcessError: self._termination( 'Problem when sending message(s) to helper program, stopping', self.Exit.process) except select.error: self._termination('problem using select, stopping', self.Exit.select) return self.exit_code def shutdown(self): """Terminate all the current BGP connections""" self.logger.critical('performing shutdown', 'reactor') if self.listener: self.listener.stop() self.listener = None for key in self.peers.keys(): self.peers[key].shutdown() self.asynchronous.clear() self.processes.terminate() self.daemon.removepid() self._stopping = True def load(self): """Reload the configuration and send to the peer the route which changed""" self.logger.notice('performing reload of exabgp %s' % version, 'configuration') reloaded = self.configuration.reload() if not reloaded: # # Careful the string below is used but the QA code to check for sucess of failure self.logger.error( 'problem with the configuration file, no change done', 'configuration') # Careful the string above is used but the QA code to check for sucess of failure # self.logger.error(str(self.configuration.error), 'configuration') return False for key, peer in self.peers.items(): if key not in self.configuration.neighbors: self.logger.debug('removing peer: %s' % peer.neighbor.name(), 'reactor') peer.remove() for key, neighbor in self.configuration.neighbors.items(): # new peer if key not in self.peers: self.logger.debug('new peer: %s' % neighbor.name(), 'reactor') peer = Peer(neighbor, self) self.peers[key] = peer # modified peer elif self.peers[key].neighbor != neighbor: self.logger.debug( 'peer definition change, establishing a new connection for %s' % str(key), 'reactor') self.peers[key].reestablish(neighbor) # same peer but perhaps not the routes else: # finding what route changed and sending the delta is not obvious self.logger.debug( 'peer definition identical, updating peer routes if required for %s' % str(key), 'reactor') self.peers[key].reconfigure(neighbor) for ip in self._ips: if ip.afi == neighbor.peer_address.afi: self.listener.listen_on(ip, neighbor.peer_address, self._port, neighbor.md5_password, neighbor.md5_base64, None) self.logger.notice('loaded new configuration successfully', 'reactor') return True def restart(self): """Kill the BGP session and restart it""" self.logger.notice('performing restart of exabgp %s' % version, 'reactor') self.configuration.reload() for key in self.peers.keys(): if key not in self.configuration.neighbors.keys(): neighbor = self.configuration.neighbors[key] self.logger.debug('removing Peer %s' % neighbor.name(), 'reactor') self.peers[key].remove() else: self.peers[key].reestablish() self.processes.start(self.configuration.processes, True)
class Reactor(object): # [hex(ord(c)) for c in os.popen('clear').read()] clear = concat_bytes_i( character(int(c, 16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']) def __init__(self, configurations): self.ip = environment.settings().tcp.bind self.port = environment.settings().tcp.port self.respawn = environment.settings().api.respawn self.max_loop_time = environment.settings().reactor.speed self.early_drop = environment.settings().daemon.drop self.logger = Logger() self.daemon = Daemon(self) self.processes = None self.listener = None self.configuration = Configuration(configurations) self.api = API(self) self.peers = {} self.route_update = False self._stopping = environment.settings().tcp.once self._shutdown = False self._reload = False self._reload_processes = False self._restart = False self._saved_pid = False self._pending = deque() self._running = None signal.signal(signal.SIGTERM, self.sigterm) signal.signal(signal.SIGHUP, self.sighup) signal.signal(signal.SIGALRM, self.sigalrm) signal.signal(signal.SIGUSR1, self.sigusr1) signal.signal(signal.SIGUSR2, self.sigusr2) def sigterm(self, signum, frame): self.logger.reactor('SIG TERM received - shutdown') self._shutdown = True def sighup(self, signum, frame): self.logger.reactor('SIG HUP received - shutdown') self._shutdown = True def sigalrm(self, signum, frame): self.logger.reactor('SIG ALRM received - restart') self._restart = True def sigusr1(self, signum, frame): self.logger.reactor('SIG USR1 received - reload configuration') self._reload = True def sigusr2(self, signum, frame): self.logger.reactor( 'SIG USR2 received - reload configuration and processes') self._reload = True self._reload_processes = True def ready(self, sockets, ios, sleeptime=0): # never sleep a negative number of second (if the rounding is negative somewhere) # never sleep more than one second (should the clock time change during two time.time calls) sleeptime = min(max(0.0, sleeptime), 1.0) if not ios: time.sleep(sleeptime) return [] try: read, _, _ = select.select(sockets + ios, [], [], sleeptime) return read except select.error as exc: errno, message = exc.args # pylint: disable=W0633 if errno not in error.block: raise exc return [] except socket.error as exc: if exc.errno in error.fatal: raise exc return [] def run(self): self.daemon.daemonise() # Make sure we create processes once we have closed file descriptor # unfortunately, this must be done before reading the configuration file # so we can not do it with dropped privileges self.processes = Processes(self) # we have to read the configuration possibly with root privileges # as we need the MD5 information when we bind, and root is needed # to bind to a port < 1024 # this is undesirable as : # - handling user generated data as root should be avoided # - we may not be able to reload the configuration once the privileges are dropped # but I can not see any way to avoid it if not self.load(): return False try: self.listener = Listener() if self.ip: self.listener.listen(IP.create(self.ip), IP.create('0.0.0.0'), self.port, None, False, None) self.logger.reactor('Listening for BGP session(s) on %s:%d' % (self.ip, self.port)) for neighbor in self.configuration.neighbors.values(): if neighbor.listen: self.listener.listen(neighbor.md5_ip, neighbor.peer_address, neighbor.listen, neighbor.md5_password, neighbor.md5_base64, neighbor.ttl_in) self.logger.reactor( 'Listening for BGP session(s) on %s:%d%s' % (neighbor.md5_ip, neighbor.listen, ' with MD5' if neighbor.md5_password else '')) except NetworkError as exc: self.listener = None if os.geteuid() != 0 and self.port <= 1024: self.logger.reactor( 'Can not bind to %s:%d, you may need to run ExaBGP as root' % (self.ip, self.port), 'critical') else: self.logger.reactor( 'Can not bind to %s:%d (%s)' % (self.ip, self.port, str(exc)), 'critical') self.logger.reactor( 'unset exabgp.tcp.bind if you do not want listen for incoming connections', 'critical') self.logger.reactor( 'and check that no other daemon is already binding to port %d' % self.port, 'critical') sys.exit(1) if not self.early_drop: self.processes.start() if not self.daemon.drop_privileges(): self.logger.reactor( 'Could not drop privileges to \'%s\' refusing to run as root' % self.daemon.user, 'critical') self.logger.reactor( 'Set the environmemnt value exabgp.daemon.user to change the unprivileged user', 'critical') return if self.early_drop: self.processes.start() # This is required to make sure we can write in the log location as we now have dropped root privileges if not self.logger.restart(): self.logger.reactor('Could not setup the logger, aborting', 'critical') return if not self.daemon.savepid(): return # did we complete the run of updates caused by the last SIGUSR1/SIGUSR2 ? reload_completed = True wait = environment.settings().tcp.delay if wait: sleeptime = (wait * 60) - int(time.time()) % (wait * 60) self.logger.reactor('waiting for %d seconds before connecting' % sleeptime) time.sleep(float(sleeptime)) workers = {} peers = set() scheduled = False while True: try: finished = False start = time.time() end = start + self.max_loop_time if self._shutdown: self._shutdown = False self.shutdown() break if self._reload and reload_completed: self._reload = False self.load() self.processes.start(self._reload_processes) self._reload_processes = False elif self._restart: self._restart = False self.restart() # We got some API routes to announce if self.route_update: self.route_update = False self.route_send() for peer in self.peers.keys(): peers.add(peer) while start < time.time() < end and not finished: if self.peers: for key in list(peers): peer = self.peers[key] action = peer.run() # .run() returns an ACTION enum: # * immediate if it wants to be called again # * later if it should be called again but has no work atm # * close if it is finished and is closing down, or restarting if action == ACTION.CLOSE: self.unschedule(key) peers.discard(key) # we are loosing this peer, not point to schedule more process work elif action == ACTION.LATER: for io in peer.sockets(): workers[io] = key # no need to come back to it before a a full cycle peers.discard(key) if not peers: reload_completed = True if self.listener: for connection in self.listener.connected(): # found # * False, not peer found for this TCP connection # * True, peer found # * None, conflict found for this TCP connections found = False for key in self.peers: peer = self.peers[key] neighbor = peer.neighbor # XXX: FIXME: Inet can only be compared to Inet if connection.local == str( neighbor.peer_address) and ( neighbor.auto_discovery or connection.peer == str( neighbor.local_address)): if peer.incoming(connection): found = True break found = None break if found: self.logger.reactor( 'accepted connection from %s - %s' % (connection.local, connection.peer)) elif found is False: self.logger.reactor( 'no session configured for %s - %s' % (connection.local, connection.peer)) connection.notification( 6, 3, 'no session configured for the peer') connection.close() elif found is None: self.logger.reactor( 'connection refused (already connected to the peer) %s - %s' % (connection.local, connection.peer)) connection.notification( 6, 5, 'could not accept the connection') connection.close() scheduled = self.schedule() finished = not peers and not scheduled # RFC state that we MUST not send more than one KEEPALIVE / sec # And doing less could cause the session to drop if finished: for io in self.ready(list(peers), self.processes.fds(), end - time.time()): if io in workers: peers.add(workers[io]) del workers[io] if self._stopping and not self.peers.keys(): break except KeyboardInterrupt: while True: try: self._shutdown = True self.logger.reactor('^C received') break except KeyboardInterrupt: pass # socket.error is a subclass of IOError (so catch it first) except socket.error: try: self._shutdown = True self.logger.reactor('socket error received', 'warning') break except KeyboardInterrupt: pass except IOError: while True: try: self._shutdown = True self.logger.reactor( 'I/O Error received, most likely ^C during IO', 'warning') break except KeyboardInterrupt: pass except SystemExit: try: self._shutdown = True self.logger.reactor('exiting') break except KeyboardInterrupt: pass except ProcessError: try: self._shutdown = True self.logger.reactor( 'Problem when sending message(s) to helper program, stopping', 'error') except KeyboardInterrupt: pass except select.error: try: self._shutdown = True self.logger.reactor('problem using select, stopping', 'error') except KeyboardInterrupt: pass # from exabgp.leak import objgraph # print objgraph.show_most_common_types(limit=20) # import random # obj = objgraph.by_type('Route')[random.randint(0,2000)] # objgraph.show_backrefs([obj], max_depth=10) def shutdown(self): """terminate all the current BGP connections""" self.logger.reactor('performing shutdown') if self.listener: self.listener.stop() self.listener = None for key in self.peers.keys(): self.peers[key].stop() self.processes.terminate() self.daemon.removepid() self._stopping = True def load(self): """reload the configuration and send to the peer the route which changed""" self.logger.reactor('performing reload of exabgp %s' % version) reloaded = self.configuration.reload() if not reloaded: # # Careful the string below is used but the QA code to check for sucess of failure self.logger.configuration( 'problem with the configuration file, no change done', 'error') # Careful the string above is used but the QA code to check for sucess of failure # self.logger.configuration(str(self.configuration.error), 'error') return False for key, peer in self.peers.items(): if key not in self.configuration.neighbors: self.logger.reactor('removing peer: %s' % peer.neighbor.name()) peer.stop() for key, neighbor in self.configuration.neighbors.items(): # new peer if key not in self.peers: self.logger.reactor('new peer: %s' % neighbor.name()) peer = Peer(neighbor, self) self.peers[key] = peer # modified peer elif self.peers[key].neighbor != neighbor: self.logger.reactor( 'peer definition change, establishing a new connection for %s' % str(key)) self.peers[key].reestablish(neighbor) # same peer but perhaps not the routes else: # finding what route changed and sending the delta is not obvious self.logger.reactor( 'peer definition identical, updating peer routes if required for %s' % str(key)) self.peers[key].reconfigure(neighbor) self.logger.configuration('loaded new configuration successfully', 'info') return True def schedule(self): try: # read at least on message per process if there is some and parse it for service, command in self.processes.received(): self.api.text(self, service, command) # if we have nothing to do, return or save the work if not self._running: if not self._pending: return False self._running, name = self._pending.popleft() self.logger.reactor('callback | installing %s' % name) if self._running: # run it try: self.logger.reactor('callback | running') six.next(self._running) # run # should raise StopIteration in most case # and prevent us to have to run twice to run one command six.next(self._running) # run except StopIteration: self._running = None self.logger.reactor('callback | removing') return True except StopIteration: pass except KeyboardInterrupt: self._shutdown = True self.logger.reactor('^C received', 'error') def route_send(self): """the process ran and we need to figure what routes to changes""" self.logger.reactor('performing dynamic route update') for key in self.configuration.neighbors.keys(): self.peers[key].send_new() self.logger.reactor('updated peers dynamic routes successfully') def restart(self): """kill the BGP session and restart it""" self.logger.reactor('performing restart of exabgp %s' % version) self.configuration.reload() for key in self.peers.keys(): if key not in self.configuration.neighbors.keys(): neighbor = self.configuration.neighbors[key] self.logger.reactor('removing Peer %s' % neighbor.name()) self.peers[key].stop() else: self.peers[key].reestablish() self.processes.terminate() self.processes.start() def unschedule(self, peer): if peer in self.peers: del self.peers[peer] def answer(self, service, string): self.processes.write(service, string) self.logger.reactor('responding to %s : %s' % (service, string.replace('\n', '\\n'))) def api_shutdown(self): self._shutdown = True self._pending = deque() self._running = None def api_reload(self): self._reload = True self._pending = deque() self._running = None def api_restart(self): self._restart = True self._pending = deque() self._running = None @staticmethod def match_neighbor(description, name): for string in description: if re.search(r'(^|[\s])%s($|[\s,])' % re.escape(string), name) is None: return False return True def match_neighbors(self, descriptions): """return the sublist of peers matching the description passed, or None if no description is given""" if not descriptions: return self.peers.keys() returned = [] for key in self.peers: for description in descriptions: if Reactor.match_neighbor(description, key): if key not in returned: returned.append(key) return returned def nexthops(self, peers): return dict( (peer, self.peers[peer].neighbor.local_address) for peer in peers) def plan(self, callback, name): self._pending.append((callback, name))
class Reactor(object): # [hex(ord(c)) for c in os.popen('clear').read()] clear = concat_bytes_i( character(int(c, 16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']) def __init__(self, configurations): self.ips = environment.settings().tcp.bind self.port = environment.settings().tcp.port self.ack = environment.settings().api.ack self.max_loop_time = environment.settings().reactor.speed self.early_drop = environment.settings().daemon.drop self.logger = Logger() self.daemon = Daemon(self) self.processes = None self.listener = None self.configuration = Configuration(configurations) self.api = API(self) self.peers = {} self.route_update = False self._stopping = environment.settings().tcp.once self._shutdown = False self._reload = False self._reload_processes = False self._restart = False self._saved_pid = False self._running = None self._pending = deque() self._async = deque() self._signal = {} signal.signal(signal.SIGTERM, self.sigterm) signal.signal(signal.SIGHUP, self.sighup) signal.signal(signal.SIGALRM, self.sigalrm) signal.signal(signal.SIGUSR1, self.sigusr1) signal.signal(signal.SIGUSR2, self.sigusr2) def _termination(self, reason): while True: try: self._shutdown = True self.logger.reactor(reason, 'warning') break except KeyboardInterrupt: pass def sigterm(self, signum, frame): self.logger.reactor('SIG TERM received - shutdown') self._shutdown = True for key in self.peers: if self.peers[key].neighbor.api['signal']: self._signal[key] = signum def sighup(self, signum, frame): self.logger.reactor('SIG HUP received - shutdown') self._shutdown = True for key in self.peers: if self.peers[key].neighbor.api['signal']: self._signal[key] = signum def sigalrm(self, signum, frame): self.logger.reactor('SIG ALRM received - restart') self._restart = True for key in self.peers: if self.peers[key].neighbor.api['signal']: self._signal[key] = signum def sigusr1(self, signum, frame): self.logger.reactor('SIG USR1 received - reload configuration') self._reload = True for key in self.peers: if self.peers[key].neighbor.api['signal']: self._signal[key] = signum def sigusr2(self, signum, frame): self.logger.reactor( 'SIG USR2 received - reload configuration and processes') self._reload = True self._reload_processes = True for key in self.peers: if self.peers[key].neighbor.api['signal']: self._signal[key] = signum def _api_ready(self, sockets): sleeptime = self.max_loop_time / 20 fds = self.processes.fds() ios = fds + sockets try: read, _, _ = select.select(ios, [], [], sleeptime) for fd in fds: if fd in read: read.remove(fd) return read except select.error as exc: errno, message = exc.args # pylint: disable=W0633 if errno not in error.block: raise exc return [] except socket.error as exc: if exc.errno in error.fatal: raise exc return [] except KeyboardInterrupt: self._termination('^C received') return [] def _setup_listener(self, local_addr, remote_addr, port, md5_password, md5_base64, ttl_in): try: if not self.listener: self.listener = Listener() if not remote_addr: remote_addr = IP.create( '0.0.0.0') if local_addr.ipv4() else IP.create('::') self.listener.listen(local_addr, remote_addr, port, md5_password, md5_base64, ttl_in) self.logger.reactor( 'Listening for BGP session(s) on %s:%d%s' % (local_addr, port, ' with MD5' if md5_password else '')) return True except NetworkError as exc: if os.geteuid() != 0 and port <= 1024: self.logger.reactor( 'Can not bind to %s:%d, you may need to run ExaBGP as root' % (local_addr, port), 'critical') else: self.logger.reactor( 'Can not bind to %s:%d (%s)' % (local_addr, port, str(exc)), 'critical') self.logger.reactor( 'unset exabgp.tcp.bind if you do not want listen for incoming connections', 'critical') self.logger.reactor( 'and check that no other daemon is already binding to port %d' % port, 'critical') return False def _handle_listener(self): if not self.listener: return ranged_neighbor = [] for connection in self.listener.connected(): for key in self.peers: peer = self.peers[key] neighbor = peer.neighbor connection_local = IP.create(connection.local).address() neighbor_peer_start = neighbor.peer_address.address() neighbor_peer_next = neighbor_peer_start + neighbor.range_size if not neighbor_peer_start <= connection_local < neighbor_peer_next: continue connection_peer = IP.create(connection.peer).address() neighbor_local = neighbor.local_address.address() if connection_peer != neighbor_local: if not neighbor.auto_discovery: continue # we found a range matching for this connection # but the peer may already have connected, so # we need to iterate all individual peers before # handling "range" peers if neighbor.range_size > 1: ranged_neighbor.append(peer.neighbor) continue denied = peer.handle_connection(connection) if denied: self.logger.reactor( 'refused connection from %s due to the state machine' % connection.name()) self._async.append(denied) break self.logger.reactor('accepted connection from %s' % connection.name()) break else: # we did not break (and nothign was found/done or we have group match) matched = len(ranged_neighbor) if matched > 1: self.logger.reactor( 'could not accept connection from %s (more than one neighbor match)' % connection.name()) self._async.append( connection.notification( 6, 5, b'could not accept the connection (more than one neighbor match)' )) return if not matched: self.logger.reactor('no session configured for %s' % connection.name()) self._async.append( connection.notification( 6, 3, b'no session configured for the peer')) return new_neighbor = copy.copy(ranged_neighbor[0]) new_neighbor.range_size = 1 new_neighbor.generated = True new_neighbor.local_address = IP.create(connection.peer) new_neighbor.peer_address = IP.create(connection.local) new_peer = Peer(new_neighbor, self) denied = new_peer.handle_connection(connection) if denied: self.logger.reactor( 'refused connection from %s due to the state machine' % connection.name()) self._async.append(denied) return self.peers[new_neighbor.name()] = new_peer return def run(self, validate): self.daemon.daemonise() # Make sure we create processes once we have closed file descriptor # unfortunately, this must be done before reading the configuration file # so we can not do it with dropped privileges self.processes = Processes(self) # we have to read the configuration possibly with root privileges # as we need the MD5 information when we bind, and root is needed # to bind to a port < 1024 # this is undesirable as : # - handling user generated data as root should be avoided # - we may not be able to reload the configuration once the privileges are dropped # but I can not see any way to avoid it for ip in self.ips: if not self._setup_listener(ip, None, self.port, None, False, None): return False if not self.load(): return False if validate: # only validate configuration self.logger.configuration('') self.logger.configuration('Parsed Neighbors, un-templated') self.logger.configuration('------------------------------') self.logger.configuration('') for key in self.peers: self.logger.configuration(str(self.peers[key].neighbor)) self.logger.configuration('') return True for neighbor in self.configuration.neighbors.values(): if neighbor.listen: if not self._setup_listener( neighbor.md5_ip, neighbor.peer_address, neighbor.listen, neighbor.md5_password, neighbor.md5_base64, neighbor.ttl_in): return False if not self.early_drop: self.processes.start() if not self.daemon.drop_privileges(): self.logger.reactor( 'Could not drop privileges to \'%s\' refusing to run as root' % self.daemon.user, 'critical') self.logger.reactor( 'Set the environmemnt value exabgp.daemon.user to change the unprivileged user', 'critical') return if self.early_drop: self.processes.start() # This is required to make sure we can write in the log location as we now have dropped root privileges if not self.logger.restart(): self.logger.reactor('Could not setup the logger, aborting', 'critical') return if not self.daemon.savepid(): return # did we complete the run of updates caused by the last SIGUSR1/SIGUSR2 ? reload_completed = True wait = environment.settings().tcp.delay if wait: sleeptime = (wait * 60) - int(time.time()) % (wait * 60) self.logger.reactor('waiting for %d seconds before connecting' % sleeptime) time.sleep(float(sleeptime)) workers = {} peers = set() busy = False while True: try: start = time.time() end = start + self.max_loop_time if self._shutdown: self._shutdown = False self.shutdown() break if self._reload and reload_completed: self._reload = False self.load() self.processes.start(self._reload_processes) self._reload_processes = False elif self._restart: self._restart = False self.restart() # We got some API routes to announce if self.route_update: self.route_update = False self.route_send() for key, peer in self.peers.items(): if not peer.neighbor.passive or peer.proto: peers.add(key) if key in self._signal: self.peers[key].reactor.processes.signal( self.peers[key].neighbor, self._signal[key]) self._signal = {} # check all incoming connection self._handle_listener() # give a turn to all the peers while start < time.time() < end: for key in list(peers): peer = self.peers[key] action = peer.run() # .run() returns an ACTION enum: # * immediate if it wants to be called again # * later if it should be called again but has no work atm # * close if it is finished and is closing down, or restarting if action == ACTION.CLOSE: self._unschedule(key) peers.discard(key) # we are loosing this peer, not point to schedule more process work elif action == ACTION.LATER: for io in peer.sockets(): workers[io] = key # no need to come back to it before a a full cycle peers.discard(key) # handle API calls busy = self._scheduled_api() # handle new connections busy |= self._scheduled_listener() if not peers and not busy: break if not peers: reload_completed = True for io in self._api_ready(list(workers)): peers.add(workers[io]) del workers[io] if self._stopping and not self.peers.keys(): break except KeyboardInterrupt: self._termination('^C received') # socket.error is a subclass of IOError (so catch it first) except socket.error: self._termination('socket error received') except IOError: self._termination( 'I/O Error received, most likely ^C during IO') except SystemExit: self._termination('exiting') except ProcessError: self._termination( 'Problem when sending message(s) to helper program, stopping' ) except select.error: self._termination('problem using select, stopping') def shutdown(self): """terminate all the current BGP connections""" self.logger.reactor('performing shutdown') if self.listener: self.listener.stop() self.listener = None for key in self.peers.keys(): self.peers[key].stop() self.processes.terminate() self.daemon.removepid() self._stopping = True def load(self): """reload the configuration and send to the peer the route which changed""" self.logger.reactor('performing reload of exabgp %s' % version) reloaded = self.configuration.reload() if not reloaded: # # Careful the string below is used but the QA code to check for sucess of failure self.logger.configuration( 'problem with the configuration file, no change done', 'error') # Careful the string above is used but the QA code to check for sucess of failure # self.logger.configuration(str(self.configuration.error), 'error') return False for key, peer in self.peers.items(): if key not in self.configuration.neighbors: self.logger.reactor('removing peer: %s' % peer.neighbor.name()) peer.stop() for key, neighbor in self.configuration.neighbors.items(): # new peer if key not in self.peers: self.logger.reactor('new peer: %s' % neighbor.name()) peer = Peer(neighbor, self) self.peers[key] = peer # modified peer elif self.peers[key].neighbor != neighbor: self.logger.reactor( 'peer definition change, establishing a new connection for %s' % str(key)) self.peers[key].reestablish(neighbor) # same peer but perhaps not the routes else: # finding what route changed and sending the delta is not obvious self.logger.reactor( 'peer definition identical, updating peer routes if required for %s' % str(key)) self.peers[key].reconfigure(neighbor) for ip in self.ips: if ip.afi == neighbor.peer_address.afi: self._setup_listener(ip, neighbor.peer_address, self.port, neighbor.md5_password, neighbor.md5_base64, None) self.logger.configuration('loaded new configuration successfully', 'info') return True def _scheduled_listener(self, flipflop=[]): try: for generator in self._async: try: six.next(generator) six.next(generator) flipflop.append(generator) except StopIteration: pass self._async, flipflop = flipflop, self._async return len(self._async) > 0 except KeyboardInterrupt: self._termination('^C received') return False def _scheduled_api(self): try: # read at least on message per process if there is some and parse it for service, command in self.processes.received(): self.api.text(self, service, command) # if we have nothing to do, return or save the work if not self._running: if not self._pending: return False self._running, name = self._pending.popleft() self.logger.reactor('callback | installing %s' % name) if self._running: # run it try: self.logger.reactor('callback | running') six.next(self._running) # run # should raise StopIteration in most case # and prevent us to have to run twice to run one command six.next(self._running) # run except StopIteration: self._running = None self.logger.reactor('callback | removing') return True return False except KeyboardInterrupt: self._termination('^C received') return False def route_send(self): """the process ran and we need to figure what routes to changes""" self.logger.reactor('performing dynamic route update') for key in self.configuration.neighbors.keys(): self.peers[key].send_new() self.logger.reactor('updated peers dynamic routes successfully') def restart(self): """kill the BGP session and restart it""" self.logger.reactor('performing restart of exabgp %s' % version) self.configuration.reload() for key in self.peers.keys(): if key not in self.configuration.neighbors.keys(): neighbor = self.configuration.neighbors[key] self.logger.reactor('removing Peer %s' % neighbor.name()) self.peers[key].stop() else: self.peers[key].reestablish() self.processes.terminate() self.processes.start() def _unschedule(self, peer): if peer in self.peers: del self.peers[peer] def answer(self, service, string): if self.ack: self.always_answer(service, string) def always_answer(self, service, string): self.processes.write(service, string) self.logger.reactor('responding to %s : %s' % (service, string.replace('\n', '\\n'))) def api_shutdown(self): self._shutdown = True self._pending = deque() self._running = None def api_reload(self): self._reload = True self._pending = deque() self._running = None def api_restart(self): self._restart = True self._pending = deque() self._running = None @staticmethod def match_neighbor(description, name): for string in description: if re.search(r'(^|[\s])%s($|[\s,])' % re.escape(string), name) is None: return False return True def match_neighbors(self, descriptions): """return the sublist of peers matching the description passed, or None if no description is given""" if not descriptions: return self.peers.keys() returned = [] for key in self.peers: for description in descriptions: if Reactor.match_neighbor(description, key): if key not in returned: returned.append(key) return returned def nexthops(self, peers): return dict( (peer, self.peers[peer].neighbor.local_address) for peer in peers) def plan(self, callback, name): self._pending.append((callback, name))
class Reactor (object): # [hex(ord(c)) for c in os.popen('clear').read()] clear = b''.join([chr_(int(c,16)) for c in ['0x1b', '0x5b', '0x48', '0x1b', '0x5b', '0x32', '0x4a']]) def __init__ (self, configurations): self.ip = environment.settings().tcp.bind self.port = environment.settings().tcp.port self.respawn = environment.settings().api.respawn self.max_loop_time = environment.settings().reactor.speed self.early_drop = environment.settings().daemon.drop self.logger = Logger() self.daemon = Daemon(self) self.processes = None self.listener = None self.configuration = Configuration(configurations) self.api = API(self) self.peers = {} self.route_update = False self._stopping = environment.settings().tcp.once self._shutdown = False self._reload = False self._reload_processes = False self._restart = False self._saved_pid = False self._pending = deque() self._running = None signal.signal(signal.SIGTERM, self.sigterm) signal.signal(signal.SIGHUP, self.sighup) signal.signal(signal.SIGALRM, self.sigalrm) signal.signal(signal.SIGUSR1, self.sigusr1) signal.signal(signal.SIGUSR2, self.sigusr2) def sigterm (self, signum, frame): self.logger.reactor('SIG TERM received - shutdown') self._shutdown = True def sighup (self, signum, frame): self.logger.reactor('SIG HUP received - shutdown') self._shutdown = True def sigalrm (self, signum, frame): self.logger.reactor('SIG ALRM received - restart') self._restart = True def sigusr1 (self, signum, frame): self.logger.reactor('SIG USR1 received - reload configuration') self._reload = True def sigusr2 (self, signum, frame): self.logger.reactor('SIG USR2 received - reload configuration and processes') self._reload = True self._reload_processes = True def ready (self, sockets, ios, sleeptime=0): # never sleep a negative number of second (if the rounding is negative somewhere) # never sleep more than one second (should the clock time change during two time.time calls) sleeptime = min(max(0.0,sleeptime),1.0) if not ios: time.sleep(sleeptime) return [] try: read,_,_ = select.select(sockets+ios,[],[],sleeptime) return read except select.error as exc: errno,message = exc.args # pylint: disable=W0633 if errno not in error.block: raise exc return [] except socket.error as exc: if exc.errno in error.fatal: raise exc return [] def run (self): self.daemon.daemonise() # Make sure we create processes once we have closed file descriptor # unfortunately, this must be done before reading the configuration file # so we can not do it with dropped privileges self.processes = Processes(self) # we have to read the configuration possibly with root privileges # as we need the MD5 information when we bind, and root is needed # to bind to a port < 1024 # this is undesirable as : # - handling user generated data as root should be avoided # - we may not be able to reload the configuration once the privileges are dropped # but I can not see any way to avoid it if not self.load(): return False try: self.listener = Listener() if self.ip: self.listener.listen(IP.create(self.ip),IP.create('0.0.0.0'),self.port,None,None) self.logger.reactor('Listening for BGP session(s) on %s:%d' % (self.ip,self.port)) for neighbor in self.configuration.neighbors.values(): if neighbor.listen: self.listener.listen(neighbor.md5_ip,neighbor.peer_address,neighbor.listen,neighbor.md5_password,neighbor.ttl_in) self.logger.reactor('Listening for BGP session(s) on %s:%d%s' % (neighbor.md5_ip,neighbor.listen,' with MD5' if neighbor.md5_password else '')) except NetworkError as exc: self.listener = None if os.geteuid() != 0 and self.port <= 1024: self.logger.reactor('Can not bind to %s:%d, you may need to run ExaBGP as root' % (self.ip,self.port),'critical') else: self.logger.reactor('Can not bind to %s:%d (%s)' % (self.ip,self.port,str(exc)),'critical') self.logger.reactor('unset exabgp.tcp.bind if you do not want listen for incoming connections','critical') self.logger.reactor('and check that no other daemon is already binding to port %d' % self.port,'critical') sys.exit(1) if not self.early_drop: self.processes.start() if not self.daemon.drop_privileges(): self.logger.reactor('Could not drop privileges to \'%s\' refusing to run as root' % self.daemon.user,'critical') self.logger.reactor('Set the environmemnt value exabgp.daemon.user to change the unprivileged user','critical') return if self.early_drop: self.processes.start() # This is required to make sure we can write in the log location as we now have dropped root privileges if not self.logger.restart(): self.logger.reactor('Could not setup the logger, aborting','critical') return if not self.daemon.savepid(): return # did we complete the run of updates caused by the last SIGUSR1/SIGUSR2 ? reload_completed = True wait = environment.settings().tcp.delay if wait: sleeptime = (wait * 60) - int(time.time()) % (wait * 60) self.logger.reactor('waiting for %d seconds before connecting' % sleeptime) time.sleep(float(sleeptime)) workers = {} peers = set() scheduled = False while True: try: finished = False start = time.time() end = start + self.max_loop_time if self._shutdown: self._shutdown = False self.shutdown() break if self._reload and reload_completed: self._reload = False self.load() self.processes.start(self._reload_processes) self._reload_processes = False elif self._restart: self._restart = False self.restart() # We got some API routes to announce if self.route_update: self.route_update = False self.route_send() for peer in self.peers.keys(): peers.add(peer) while start < time.time() < end and not finished: if self.peers: for key in list(peers): peer = self.peers[key] action = peer.run() # .run() returns an ACTION enum: # * immediate if it wants to be called again # * later if it should be called again but has no work atm # * close if it is finished and is closing down, or restarting if action == ACTION.CLOSE: self.unschedule(peer) peers.discard(key) # we are loosing this peer, not point to schedule more process work elif action == ACTION.LATER: for io in peer.sockets(): workers[io] = key # no need to come back to it before a a full cycle peers.discard(key) if not peers: reload_completed = True if self.listener: for connection in self.listener.connected(): # found # * False, not peer found for this TCP connection # * True, peer found # * None, conflict found for this TCP connections found = False for key in self.peers: peer = self.peers[key] neighbor = peer.neighbor # XXX: FIXME: Inet can only be compared to Inet if connection.local == str(neighbor.peer_address) and connection.peer == str(neighbor.local_address): if peer.incoming(connection): found = True break found = None break if found: self.logger.reactor('accepted connection from %s - %s' % (connection.local,connection.peer)) elif found is False: self.logger.reactor('no session configured for %s - %s' % (connection.local,connection.peer)) connection.notification(6,3,'no session configured for the peer') connection.close() elif found is None: self.logger.reactor('connection refused (already connected to the peer) %s - %s' % (connection.local,connection.peer)) connection.notification(6,5,'could not accept the connection') connection.close() scheduled = self.schedule() finished = not peers and not scheduled # RFC state that we MUST not send more than one KEEPALIVE / sec # And doing less could cause the session to drop if finished: for io in self.ready(list(peers),self.processes.fds(),end-time.time()): if io in workers: peers.add(workers[io]) del workers[io] if self._stopping and not self.peers.keys(): break except KeyboardInterrupt: while True: try: self._shutdown = True self.logger.reactor('^C received') break except KeyboardInterrupt: pass # socket.error is a subclass of IOError (so catch it first) except socket.error: try: self._shutdown = True self.logger.reactor('socket error received','warning') break except KeyboardInterrupt: pass except IOError: while True: try: self._shutdown = True self.logger.reactor('I/O Error received, most likely ^C during IO','warning') break except KeyboardInterrupt: pass except SystemExit: try: self._shutdown = True self.logger.reactor('exiting') break except KeyboardInterrupt: pass except ProcessError: try: self._shutdown = True self.logger.reactor('Problem when sending message(s) to helper program, stopping','error') except KeyboardInterrupt: pass except select.error: try: self._shutdown = True self.logger.reactor('problem using select, stopping','error') except KeyboardInterrupt: pass # from exabgp.leak import objgraph # print objgraph.show_most_common_types(limit=20) # import random # obj = objgraph.by_type('Route')[random.randint(0,2000)] # objgraph.show_backrefs([obj], max_depth=10) def shutdown (self): """terminate all the current BGP connections""" self.logger.reactor('performing shutdown') if self.listener: self.listener.stop() self.listener = None for key in self.peers.keys(): self.peers[key].stop() self.processes.terminate() self.daemon.removepid() self._stopping = True def load (self): """reload the configuration and send to the peer the route which changed""" self.logger.reactor('performing reload of exabgp %s' % version) reloaded = self.configuration.reload() if not reloaded: # # Careful the string below is used but the QA code to check for sucess of failure self.logger.configuration('problem with the configuration file, no change done','error') # Careful the string above is used but the QA code to check for sucess of failure # self.logger.configuration(str(self.configuration.error),'error') return False for key, peer in self.peers.items(): if key not in self.configuration.neighbors: self.logger.reactor('removing peer: %s' % peer.neighbor.name()) peer.stop() for key, neighbor in self.configuration.neighbors.items(): # new peer if key not in self.peers: self.logger.reactor('new peer: %s' % neighbor.name()) peer = Peer(neighbor,self) self.peers[key] = peer # modified peer elif self.peers[key].neighbor != neighbor: self.logger.reactor('peer definition change, establishing a new connection for %s' % str(key)) self.peers[key].reestablish(neighbor) # same peer but perhaps not the routes else: # finding what route changed and sending the delta is not obvious self.logger.reactor('peer definition identical, updating peer routes if required for %s' % str(key)) self.peers[key].reconfigure(neighbor) self.logger.configuration('loaded new configuration successfully','info') return True def schedule (self): try: # read at least on message per process if there is some and parse it for service,command in self.processes.received(): self.api.text(self,service,command) # if we have nothing to do, return or save the work if not self._running: if not self._pending: return False self._running,name = self._pending.popleft() self.logger.reactor('callback | installing %s' % name) if self._running: # run it try: self.logger.reactor('callback | running') six.next(self._running) # run # should raise StopIteration in most case # and prevent us to have to run twice to run one command six.next(self._running) # run except StopIteration: self._running = None self.logger.reactor('callback | removing') return True except StopIteration: pass except KeyboardInterrupt: self._shutdown = True self.logger.reactor('^C received','error') def route_send (self): """the process ran and we need to figure what routes to changes""" self.logger.reactor('performing dynamic route update') for key in self.configuration.neighbors.keys(): self.peers[key].send_new() self.logger.reactor('updated peers dynamic routes successfully') def restart (self): """kill the BGP session and restart it""" self.logger.reactor('performing restart of exabgp %s' % version) self.configuration.reload() for key in self.peers.keys(): if key not in self.configuration.neighbors.keys(): neighbor = self.configuration.neighbors[key] self.logger.reactor('removing Peer %s' % neighbor.name()) self.peers[key].stop() else: self.peers[key].reestablish() self.processes.terminate() self.processes.start() def unschedule (self, peer): key = peer.neighbor.name() if key in self.peers: del self.peers[key] def answer (self, service, string): self.processes.write(service,string) self.logger.reactor('responding to %s : %s' % (service,string.replace('\n','\\n'))) def api_shutdown (self): self._shutdown = True self._pending = deque() self._running = None def api_reload (self): self._reload = True self._pending = deque() self._running = None def api_restart (self): self._restart = True self._pending = deque() self._running = None @staticmethod def match_neighbor (description, name): for string in description: if re.search(r'(^|[\s])%s($|[\s,])' % re.escape(string), name) is None: return False return True def match_neighbors (self, descriptions): """return the sublist of peers matching the description passed, or None if no description is given""" if not descriptions: return self.peers.keys() returned = [] for key in self.peers: for description in descriptions: if Reactor.match_neighbor(description,key): if key not in returned: returned.append(key) return returned def nexthops (self, peers): return dict((peer,self.peers[peer].neighbor.local_address) for peer in peers) def plan (self, callback,name): self._pending.append((callback,name))