class WaitInformMRebootState(EnodebAcsState): """ After sending a reboot request, we expect an Inform request with a specific 'inform event code' """ # Time to wait for eNodeB reboot. The measured time # (on BaiCells indoor eNodeB) # is ~110secs, so add healthy padding on top of this. REBOOT_TIMEOUT = 300 # In seconds # We expect that the Inform we receive tells us the eNB has rebooted INFORM_EVENT_CODE = 'M Reboot' def __init__( self, acs: EnodebAcsStateMachine, when_done: str, when_timeout: str, ): super().__init__() self.acs = acs self.done_transition = when_done self.timeout_transition = when_timeout self.timeout_timer = None self.timer_handle = None def enter(self): self.timeout_timer = StateMachineTimer(self.REBOOT_TIMEOUT) def check_timer() -> None: if self.timeout_timer.is_done(): self.acs.transition(self.timeout_transition) raise Tr069Error('Did not receive Inform response after ' 'rebooting') self.timer_handle = \ self.acs.event_loop.call_later(self.REBOOT_TIMEOUT, check_timer) def exit(self): self.timer_handle.cancel() self.timeout_timer = None def read_msg(self, message: Any) -> AcsReadMsgResult: if not isinstance(message, models.Inform): return AcsReadMsgResult(False, None) if not does_inform_have_event(message, self.INFORM_EVENT_CODE): raise Tr069Error('Did not receive M Reboot event code in ' 'Inform') process_inform_message(message, self.acs.device_name, self.acs.data_model, self.acs.device_cfg) return AcsReadMsgResult(True, self.done_transition) @classmethod def state_description(cls) -> str: return 'Waiting for M Reboot code from Inform'
class BaicellsRemWaitState(EnodebAcsState): """ We've already received an Inform message. This state is to handle a Baicells eNodeB issue. After eNodeB is rebooted, hold off configuring it for some time to give time for REM to run. This is a BaiCells eNodeB issue that doesn't support enabling the eNodeB during initial REM. In this state, just hang at responding to Inform, and then ending the TR-069 session. """ CONFIG_DELAY_AFTER_BOOT = 600 def __init__(self, acs: EnodebAcsStateMachine, when_done: str): super().__init__() self.acs = acs self.done_transition = when_done self.rem_timer = None def enter(self): self.rem_timer = StateMachineTimer(self.CONFIG_DELAY_AFTER_BOOT) logger.info( 'Holding off of eNB configuration for %s seconds. ' 'Will resume after eNB REM process has finished. ', self.CONFIG_DELAY_AFTER_BOOT, ) def exit(self): self.rem_timer = None def read_msg(self, message: Any) -> AcsReadMsgResult: if not isinstance(message, models.Inform): return AcsReadMsgResult(False, None) process_inform_message( message, self.acs.data_model, self.acs.device_cfg, ) return AcsReadMsgResult(True, None) def get_msg(self, message: Any) -> AcsMsgAndTransition: if self.rem_timer.is_done(): return AcsMsgAndTransition( models.DummyInput(), self.done_transition, ) return AcsMsgAndTransition(models.DummyInput(), None) def state_description(self) -> str: remaining = self.rem_timer.seconds_remaining() return 'Waiting for eNB REM to run for %d more seconds before ' \ 'resuming with configuration.' % remaining
class BaicellsRemWaitState(EnodebAcsState): """ We've already received an Inform message. This state is to handle a Baicells eNodeB issue. After eNodeB is rebooted, hold off configuring it for some time to give time for REM to run. This is a BaiCells eNodeB issue that doesn't support enabling the eNodeB during initial REM. """ CONFIG_DELAY_AFTER_BOOT = 600 def __init__(self, acs: EnodebAcsStateMachine, when_done: str): super().__init__() self.acs = acs self.done_transition = when_done self.rem_timer = None self.timer_handle = None def enter(self): self.rem_timer = StateMachineTimer(self.CONFIG_DELAY_AFTER_BOOT) def check_timer() -> None: if self.rem_timer.is_done(): self.acs.transition(self.done_transition) self.timer_handle =\ self.acs.event_loop.call_later(self.CONFIG_DELAY_AFTER_BOOT, check_timer) def exit(self): self.timer_handle.cancel() self.rem_timer = None def get_msg(self) -> AcsMsgAndTransition: return AcsMsgAndTransition(models.DummyInput(), None) def read_msg(self, message: Any) -> AcsReadMsgResult: return AcsReadMsgResult(True, None) @classmethod def state_description(cls) -> str: return 'Waiting for eNB REM to run'
class WaitRebootDelayState(EnodebAcsState): """ After receiving the Inform notifying us that the eNodeB has successfully rebooted, wait a short duration to prevent unspecified race conditions that may occur w.r.t reboot """ # Short delay timer to prevent race conditions w.r.t. reboot SHORT_CONFIG_DELAY = 10 def __init__(self, acs: EnodebAcsStateMachine, when_done: str): super().__init__() self.acs = acs self.done_transition = when_done self.config_timer = None self.timer_handle = None def enter(self): self.config_timer = StateMachineTimer(self.SHORT_CONFIG_DELAY) def check_timer() -> None: if self.config_timer.is_done(): self.acs.transition(self.done_transition) self.timer_handle = \ self.acs.event_loop.call_later( self.SHORT_CONFIG_DELAY, check_timer, ) def exit(self): self.timer_handle.cancel() self.config_timer = None def read_msg(self, message: Any) -> AcsReadMsgResult: return AcsReadMsgResult(True, None) def get_msg(self, message: Any) -> AcsMsgAndTransition: return AcsMsgAndTransition(models.DummyInput(), None) def state_description(self) -> str: return 'Waiting after eNB reboot to prevent race conditions'
class WaitInformMRebootState(EnodebAcsState): """ After sending a reboot request, we expect an Inform request with a specific 'inform event code' """ # Time to wait for eNodeB reboot. The measured time # (on BaiCells indoor eNodeB) # is ~110secs, so add healthy padding on top of this. REBOOT_TIMEOUT = 300 # In seconds # We expect that the Inform we receive tells us the eNB has rebooted INFORM_EVENT_CODE = 'M Reboot' def __init__( self, acs: EnodebAcsStateMachine, when_done: str, when_timeout: str, ): super().__init__() self.acs = acs self.done_transition = when_done self.timeout_transition = when_timeout self.timeout_timer = None self.timer_handle = None self.received_inform = False def enter(self): self.timeout_timer = StateMachineTimer(self.REBOOT_TIMEOUT) def check_timer() -> None: if self.timeout_timer.is_done(): self.acs.transition(self.timeout_transition) raise Tr069Error('Did not receive Inform response after ' 'rebooting') self.timer_handle = \ self.acs.event_loop.call_later(self.REBOOT_TIMEOUT, check_timer) def exit(self): self.timer_handle.cancel() self.timeout_timer = None def read_msg(self, message: Any) -> AcsReadMsgResult: if type(message) == models.Inform: is_correct_event = False for event in message.Event.EventStruct: logging.debug('Inform event: %s', event.EventCode) if event.EventCode == self.INFORM_EVENT_CODE: is_correct_event = True if not is_correct_event: raise Tr069Error('Did not receive M Reboot event code in ' 'Inform') elif type(message) == models.Fault: # eNodeB may send faults for no apparent reason before rebooting return AcsReadMsgResult(True, None) else: return AcsReadMsgResult(False, None) self.received_inform = True process_inform_message(message, self.acs.device_name, self.acs.data_model, self.acs.device_cfg) return AcsReadMsgResult(True, None) def get_msg(self) -> AcsMsgAndTransition: """ Reply with InformResponse """ if self.received_inform: response = models.InformResponse() # Set maxEnvelopes to 1, as per TR-069 spec response.MaxEnvelopes = 1 return AcsMsgAndTransition(response, self.done_transition) else: return AcsMsgAndTransition(models.DummyInput(), None) @classmethod def state_description(cls) -> str: return 'Waiting for M Reboot code from Inform'
class BasicEnodebAcsStateMachine(EnodebAcsStateMachine): """ Most of the EnodebAcsStateMachine classes for each device work about the same way. Differences lie mainly in the data model, desired configuration, and the state transition map. This class specifies the shared implementation between them. """ # eNodeB connection timeout is used to determine whether or not eNodeB is # connected to enodebd based on time of last Inform message. By default, # periodic inform interval is 30secs, so timeout should be larger than # this. # Also set timer longer than reboot time, so that an eNodeB reboot does not # trigger a connection-timeout alarm. ENB_CONNECTION_TIMEOUT = 600 # In seconds # If eNodeB is disconnected from MME for an unknown reason for this time, # then reboot it. Set to a long time to ensure this doesn't interfere with # other enodebd configuration processes - it is just a measure of last # resort for an unlikely error case MME_DISCONNECT_ENODEB_REBOOT_TIMER = 15 * 60 # Check the MME connection status every 15 seconds MME_CHECK_TIMER = 15 def __init__( self, service: MagmaService, ) -> None: super().__init__() self.state = None self.timeout_handler = None self.mme_timeout_handler = None self.mme_timer = None self._start_state_machine(service) def get_state(self) -> str: if self.state is None: logger.warning('ACS State machine is not in any state.') return 'N/A' return self.state.state_description() def handle_tr069_message( self, message: Tr069ComplexModel, ) -> Tr069ComplexModel: """ Accept the tr069 message from the eNB and produce a reply. States may transition after reading a message but BEFORE producing a reply. Most steps in the provisioning process are represented as beginning with enodebd sending a request to the eNB, and waiting for the reply from the eNB. """ # TransferComplete messages come at random times, and we ignore them if isinstance(message, models.TransferComplete): return models.TransferCompleteResponse() try: self._read_tr069_msg(message) return self._get_tr069_msg(message) except Exception: # pylint: disable=broad-except logger.error('Failed to handle tr069 message') logger.error(traceback.format_exc()) self._dump_debug_info() self.transition(self.unexpected_fault_state_name) return self._get_tr069_msg(message) def transition(self, next_state: str) -> Any: logger.debug('State transition to <%s>', next_state) self.state.exit() self.state = self.state_map[next_state] self.state.enter() def stop_state_machine(self) -> None: """ Clean up anything the state machine is tracking or doing """ self.state.exit() if self.timeout_handler is not None: self.timeout_handler.cancel() self.timeout_handler = None if self.mme_timeout_handler is not None: self.mme_timeout_handler.cancel() self.mme_timeout_handler = None self._service = None self._desired_cfg = None self._device_cfg = None self._data_model = None self.mme_timer = None def _start_state_machine( self, service: MagmaService, ): self.service = service self.data_model = self.data_model_class() # The current known device config has few known parameters # The desired configuration depends on what the current configuration # is. This we don't know fully, yet. self.device_cfg = EnodebConfiguration(self.data_model) self._init_state_map() self.state = self.state_map[self.disconnected_state_name] self.state.enter() self._reset_timeout() self._periodic_check_mme_connection() def _reset_state_machine( self, service: MagmaService, ): self.stop_state_machine() self._start_state_machine(service) def _read_tr069_msg(self, message: Any) -> None: """ Process incoming message and maybe transition state """ self._reset_timeout() msg_handled, next_state = self.state.read_msg(message) if not msg_handled: self._transition_for_unexpected_msg(message) _msg_handled, next_state = self.state.read_msg(message) if next_state is not None: self.transition(next_state) def _get_tr069_msg(self, message: Any) -> Any: """ Get a new message to send, and maybe transition state """ msg_and_transition = self.state.get_msg(message) if msg_and_transition.next_state: self.transition(msg_and_transition.next_state) msg = msg_and_transition.msg return msg def _transition_for_unexpected_msg(self, message: Any) -> None: """ eNB devices may send an Inform message in the middle of a provisioning session. To deal with this, transition to a state that expects an Inform message, but also track the status of the eNB as not having been disconnected. """ if isinstance(message, models.Inform): logger.debug( 'ACS in (%s) state. Received an Inform message', self.state.state_description(), ) self._reset_state_machine(self.service) elif isinstance(message, models.Fault): logger.debug( 'ACS in (%s) state. Received a Fault <%s>', self.state.state_description(), message.FaultString, ) self.transition(self.unexpected_fault_state_name) else: raise ConfigurationError('Cannot handle unexpected TR069 msg') def _reset_timeout(self) -> None: if self.timeout_handler is not None: self.timeout_handler.cancel() def timed_out(): self.transition(self.disconnected_state_name) self.timeout_handler = self.event_loop.call_later( self.ENB_CONNECTION_TIMEOUT, timed_out, ) def _periodic_check_mme_connection(self) -> None: self._check_mme_connection() self.mme_timeout_handler = self.event_loop.call_later( self.MME_CHECK_TIMER, self._periodic_check_mme_connection, ) def _check_mme_connection(self) -> None: """ Check if eNodeB should be connected to MME but isn't, and maybe reboot. If the eNB doesn't report connection to MME within a timeout period, get it to reboot in the hope that it will fix things. Usually, enodebd polls the eNodeB for whether it is connected to MME. This method checks the last polled MME connection status, and if eNodeB should be connected to MME but it isn't. """ if self.device_cfg.has_parameter(ParameterName.MME_STATUS) and \ self.device_cfg.get_parameter(ParameterName.MME_STATUS): is_mme_connected = 1 else: is_mme_connected = 0 # True if we would expect MME to be connected, but it isn't is_mme_unexpectedly_dc = \ self.is_enodeb_connected() \ and self.is_enodeb_configured() \ and self.mconfig.allow_enodeb_transmit \ and not is_mme_connected if is_mme_unexpectedly_dc: logger.warning( 'eNodeB is connected to AGw, is configured, ' 'and has AdminState enabled for transmit. ' 'MME connection to eNB is missing.', ) if self.mme_timer is None: logger.warning( 'eNodeB will be rebooted if MME connection ' 'is not established in: %s seconds.', self.MME_DISCONNECT_ENODEB_REBOOT_TIMER, ) metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(1) self.mme_timer = \ StateMachineTimer(self.MME_DISCONNECT_ENODEB_REBOOT_TIMER) elif self.mme_timer.is_done(): logger.warning( 'eNodeB has not established MME connection ' 'within %s seconds - rebooting!', self.MME_DISCONNECT_ENODEB_REBOOT_TIMER, ) metrics.STAT_ENODEB_REBOOTS.labels( cause='MME disconnect').inc() metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) self.mme_timer = None self.reboot_asap() else: # eNB is not connected to MME, but we're still waiting to see # if it will connect within the timeout period. # Take no action for now. pass else: if self.mme_timer is not None: logger.info('eNodeB has established MME connection.') self.mme_timer = None metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) def _dump_debug_info(self) -> None: if self.device_cfg is not None: logger.error( 'Device configuration: %s', self.device_cfg.get_debug_info(), ) else: logger.error('Device configuration: None') if self.desired_cfg is not None: logger.error( 'Desired configuration: %s', self.desired_cfg.get_debug_info(), ) else: logger.error('Desired configuration: None') @abstractmethod def _init_state_map(self) -> None: pass @property @abstractmethod def state_map(self) -> Dict[str, EnodebAcsState]: pass @property @abstractmethod def disconnected_state_name(self) -> str: pass @property @abstractmethod def unexpected_fault_state_name(self) -> str: """ State to handle unexpected Fault messages """ pass
def test_is_done(self): timer_a = StateMachineTimer(0) self.assertTrue(timer_a.is_done(), 'Timer should be done') timer_b = StateMachineTimer(600) self.assertFalse(timer_b.is_done(), 'Timer should not be done')
class BaicellsQRTBQueuedEventsWaitState(EnodebAcsState): """ We've already received an Inform message. This state is to handle a Baicells eNodeB issue. After eNodeB is rebooted, hold off configuring it for some time. In this state, just hang at responding to Inform, and then ending the TR-069 session. """ CONFIG_DELAY_AFTER_BOOT = 60 def __init__(self, acs: EnodebAcsStateMachine, when_done: str): super().__init__() self.acs = acs self.done_transition = when_done self.wait_timer = None def enter(self): """ Perform additional actions on state enter """ self.wait_timer = StateMachineTimer(self.CONFIG_DELAY_AFTER_BOOT) logger.info( 'Holding off of eNB configuration for %s seconds. ', self.CONFIG_DELAY_AFTER_BOOT, ) def exit(self): """ Perform additional actions on state exit """ self.wait_timer = None def read_msg(self, message: Any) -> AcsReadMsgResult: """ Read incoming message Args: message (Any): TR069 message Returns: AcsReadMsgResult """ if not isinstance(message, models.Inform): return AcsReadMsgResult(msg_handled=False, next_state=None) process_inform_message( message, self.acs.data_model, self.acs.device_cfg, ) return AcsReadMsgResult(msg_handled=True, next_state=None) def get_msg(self, message: Any) -> AcsMsgAndTransition: """ Send back a message to enb Args: message (Any): TR069 message Returns: AcsMsgAndTransition """ if not self.wait_timer: logger.error('wait_timer is None.') raise ValueError('wait_timer is None.') if self.wait_timer.is_done(): return AcsMsgAndTransition( msg=models.DummyInput(), next_state=self.done_transition, ) remaining = self.wait_timer.seconds_remaining() logger.info( 'Waiting with eNB configuration for %s more seconds. ', remaining, ) return AcsMsgAndTransition(msg=models.DummyInput(), next_state=None) def state_description(self) -> str: """ Describe the state Returns: str """ if not self.wait_timer: logger.error('wait_timer is None.') raise ValueError('wait_timer is None.') remaining = self.wait_timer.seconds_remaining() return 'Waiting for eNB REM to run for %d more seconds before ' \ 'resuming with configuration.' % remaining
class BasicEnodebAcsStateMachine(EnodebAcsStateMachine): """ Most of the EnodebAcsStateMachine classes for each device work about the same way. Differences lie mainly in the data model, desired configuration, and the state transition map. This class specifies the shared implementation between them. """ # eNodeB connection timeout is used to determine whether or not eNodeB is # connected to enodebd based on time of last Inform message. By default, # periodic inform interval is 30secs, so timeout should be larger than # this. # Also set timer longer than reboot time, so that an eNodeB reboot does not # trigger a connection-timeout alarm. ENB_CONNECTION_TIMEOUT = 600 # In seconds # If eNodeB is disconnected from MME for an unknown reason for this time, # then reboot it. Set to a long time to ensure this doesn't interfere with # other enodebd configuration processes - it is just a measure of last # resort for an unlikely error case MME_DISCONNECT_ENODEB_REBOOT_TIMER = 15 * 60 # Check the MME connection status every 15 seconds MME_CHECK_TIMER = 15 def __init__( self, service: MagmaService, stats_mgr: StatsManager, ) -> None: super().__init__() self.service = service self.stats_manager = stats_mgr self.data_model = self.data_model_class() # The current known device config has few known parameters self.device_cfg = EnodebConfiguration(self.data_model) # The desired configuration depends on what the current configuration # is. This we don't know fully, yet. self.desired_cfg = None self.timeout_handler = None self.mme_timeout_handler = None self.mme_timer = None self._init_state_map() self.state = self.state_map[self.disconnected_state_name] self.state.enter() self._reset_timeout() self._periodic_check_mme_connection() def get_state(self) -> str: return self.state.state_description() def handle_tr069_message( self, message: Tr069ComplexModel, ) -> Tr069ComplexModel: """ Accept the tr069 message from the eNB and produce a reply. States may transition after reading a message but BEFORE producing a reply. Most steps in the provisioning process are represented as beginning with enodebd sending a request to the eNB, and waiting for the reply from the eNB. """ # TransferComplete messages come at random times, and we ignore them if isinstance(message, models.TransferComplete): return models.TransferCompleteResponse() self._read_tr069_msg(message) return self._get_tr069_msg() def transition(self, next_state: str) -> Any: logging.debug('State transition to <%s>', next_state) self.state.exit() self.state = self.state_map[next_state] self.state.enter() def stop_state_machine(self) -> None: self.state.exit() if self.timeout_handler is not None: self.mme_timeout_handler.cancel() self.timeout_handler.cancel() def _read_tr069_msg(self, message: Any) -> None: """ Process incoming message and maybe transition state """ self._reset_timeout() self._handle_unexpected_inform_msg(message) next_state = self.state.read_msg(message) if next_state is not None: self.transition(next_state) def _get_tr069_msg(self) -> Any: """ Get a new message to send, and maybe transition state """ msg_and_transition = self.state.get_msg() if msg_and_transition.next_state: self.transition(msg_and_transition.next_state) msg = msg_and_transition.msg return msg def _handle_unexpected_inform_msg(self, message: Any) -> None: """ eNB devices may send an Inform message in the middle of a provisioning session. To deal with this, transition to a state that expects an Inform message, but also track the status of the eNB as not having been disconnected. """ if isinstance(message, models.Inform): if self.is_enodeb_connected(): self.transition(self.wait_inform_state_name) def _reset_timeout(self) -> None: if self.timeout_handler is not None: self.timeout_handler.cancel() def timed_out(): self.transition(self.disconnected_state_name) self.timeout_handler = self.event_loop.call_later( self.ENB_CONNECTION_TIMEOUT, timed_out, ) def _periodic_check_mme_connection(self) -> None: self._check_mme_connection() self.mme_timeout_handler = self.event_loop.call_later( self.MME_CHECK_TIMER, self._periodic_check_mme_connection, ) def _check_mme_connection(self) -> None: """ Check if eNodeB should be connected to MME but isn't, and maybe reboot. If the eNB doesn't report connection to MME within a timeout period, get it to reboot in the hope that it will fix things. """ logging.info('Checking mme connection') status = get_enodeb_status(self) reboot_disabled = \ not self.is_enodeb_connected() \ or not self.is_enodeb_configured() \ or status['mme_connected'] == '1' \ or not self.mconfig.allow_enodeb_transmit if reboot_disabled: if self.mme_timer is not None: logging.info('Clearing eNodeB reboot timer') metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) self.mme_timer = None return if self.mme_timer is None: logging.info('Set eNodeB reboot timer: %s', self.MME_DISCONNECT_ENODEB_REBOOT_TIMER) metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(1) self.mme_timer = \ StateMachineTimer(self.MME_DISCONNECT_ENODEB_REBOOT_TIMER) elif self.mme_timer.is_done(): logging.warning('eNodeB reboot timer expired - rebooting!') metrics.STAT_ENODEB_REBOOTS.labels(cause='MME disconnect').inc() metrics.STAT_ENODEB_REBOOT_TIMER_ACTIVE.set(0) self.mme_timer = None self.reboot_asap() else: # eNB is not connected to MME, but we're still waiting to see if # it will connect within the timeout period. # Take no action for now. pass @abstractmethod def _init_state_map(self) -> None: pass @property @abstractmethod def state_map(self) -> Dict[str, EnodebAcsState]: pass @property @abstractmethod def disconnected_state_name(self) -> str: pass @property @abstractmethod def wait_inform_state_name(self) -> str: """ State to handle unexpected Inform messages """ pass