def test_initialize_of_coordinator(self): coordinator, mock_os_operator, mock_consumer = self.__get_mock_objects( ) # mock the heartbeat function coordinator._MonitorCoordinator__process_heartbeat = MagicMock( return_value=None) # start coordinator coordinator._MonitorCoordinator__coordinating_monitors( self.__get_mock_msg_list([self.config_msg])) # should tried to connect the two servers Mu.open_ssh_connection.assert_has_calls([ call(ANY, ANY, self.server1, Mc.get_ssh_default_user(), Mc.get_ssh_default_password()), call(ANY, ANY, self.server2, Mc.get_ssh_default_user(), Mc.get_ssh_default_password()) ]) # should tried to start agent on two servers mock_os_operator.restart_agent.assert_has_calls([ call(ANY, 1, "/usr/sap", Mc.get_agent_path(), self.config_msg[self.mem_interval], self.config_msg[self.cpu_interval], self.config_msg[self.disk_interval], self.config_msg[self.instance_interval]), call(ANY, 2, "/usr/sap", Mc.get_agent_path(), self.config_msg[self.mem_interval], self.config_msg[self.cpu_interval], self.config_msg[self.disk_interval], self.config_msg[self.instance_interval]) ])
def __init__(self): self.__os_operator = LinuxOperator() operator = HANAMonitorDAO(Mc.get_hana_server(), Mc.get_hana_port(), Mc.get_hana_user(), Mc.get_hana_password()) self.servers = self.__get_servers(operator) # self.servers = [] # move belows to config? self.path = Mc.get_agent_path()[:-len('agent.py')] self.files = [ 'agent.py', 'util.py', 'errors.py', 'config/configuration.ini', 'config/logging.ini' ]
def __restart_agent_via_server_id(self, server_id): pre_time = self.__heartbeat_agent_restart_info.get( server_id, datetime.min) cur_time = datetime.now() if (cur_time - pre_time ).total_seconds() >= self.__heartbeat_restart_agent_interval: servers = [ s for s in self.__configs.get(Mc.DB_CONFIGURATION_SERVER, []) if s[Mc.FIELD_SERVER_ID] == server_id ] for server in servers: # update restart time before restarting # because restart agent takes more than 2 minutes if server is no responding self.__heartbeat_agent_restart_info[server_id] = datetime.now() Mu.log_info( self.__logger, "Restarting agent on {0}.".format( server[Mc.FIELD_SERVER_FULL_NAME])) self.__restart_agent( server[Mc.FIELD_SERVER_FULL_NAME], server[Mc.FIELD_SERVER_ID], server[Mc.FIELD_MOUNT_POINT], Mc.get_agent_path(), self.__configs.get("CHECK_INTERVAL_MEM_INT", 60), self.__configs.get("CHECK_INTERVAL_CPU_INT", 300), self.__configs.get("CHECK_INTERVAL_DISK_INT", 3600), self.__configs.get("CHECK_INTERVAL_INSTANCE_INT", 300)) Mu.log_info( self.__logger, "Restarting agent on {0} is finished.".format( server[Mc.FIELD_SERVER_FULL_NAME])) else: Mu.log_info(self.__logger, ( "heartbeat failed for {0}, but did not try to restart agent due to the " "configured operation interval time ({1}). (pre: {2}, cur: {3})" ).format(server_id, self.__heartbeat_restart_agent_interval, pre_time.strftime("%Y-%m-%d %H:%M:%S"), cur_time.strftime("%Y-%m-%d %H:%M:%S")))
def __coordinating_monitors(self, consumer): """ Coordinating (start/stop/restart) all the agents :param consumer: kafka consumer """ Mu.log_debug(self.__logger, "Coordinator is listening on topic for configurations.") for msg in consumer: try: Mu.log_debug(self.__logger, "New configs are coming...") if self.__update_configs(msg.value): # start/restart all agents, current design is restart all agents if any config is changed servers = self.__configs.get(Mc.DB_CONFIGURATION_SERVER, []) for server in servers: self.__restart_agent( server[Mc.FIELD_SERVER_FULL_NAME], server[Mc.FIELD_SERVER_ID], server[Mc.FIELD_MOUNT_POINT], Mc.get_agent_path(), self.__configs.get("CHECK_INTERVAL_MEM_INT", 60), self.__configs.get("CHECK_INTERVAL_CPU_INT", 300), self.__configs.get("CHECK_INTERVAL_DISK_INT", 3600), self.__configs.get("CHECK_INTERVAL_INSTANCE_INT", 300)) if self.__check_configuration() and not self.__heartbeat_flag: self.__heartbeat_flag = True # start heart beat thread heartbeat_thread = threading.Thread( target=self.__process_heartbeat) heartbeat_thread.start() except Exception as ex: Mu.log_warning_exc( self.__logger, "Error occurred when coordinating the monitors, Err: {0}". format(ex))